26:- module(pPEG,[ 27 peg_compile/2, 28 peg_compile/3, 29 peg_parse/3, 30 peg_parse/5, 31 peg_grammar/1, 32 peg_lookup_previous/3, 33 pPEG/4 34 ]). 35
36:- use_module(library(strings),[string/4]). 37:- use_module(library(debug)). 38:- use_module(library(option),[option/3]). 39:- use_module(library(pcre),[re_matchsub/4]). 40:- use_module(library(quasi_quotations), [ 41 quasi_quotation_syntax/1,
42 with_quasi_quotation_input/3
43]). 44
45%
46% the "standard" pPEG grammar source for bootstrapping and reference, e.g.,
47% ?- peg_grammar(S), write_term(S,[]).
48%
49peg_grammar({|string||
50 Peg = _ rule+ _
51 rule = id _ '=' _ alt
52
53 alt = seq ('/'_ seq)*
54 seq = rep*
55 rep = pre sfx? _
56 pre = pfx? term
57 term = call / sq / chs / group / extn
58
59 group = '('_ alt ')'
60 pfx = [&!~]
61 sfx = [+?] / '*' range?
62 range = num (dots num?)?
63 num = [0-9]+
64 dots = '..'
65
66 call = id _ !'='
67 id = [a-zA-Z_] [a-zA-Z0-9_-]*
68 sq = ['] ~[']* ['] 'i'?
69 chs = '[' ~']'* ']'
70 extn = '<' ~'>'* '>'
71 _ = ('#' ~[\n\r]* / [ \t\n\r]+)*
72
73|}).
74
78boot_grammar_def('Peg'([
79 rule([id("Peg"), seq([id("_"), rep([id("rule"), sfx("+")]), id("_")])]),
80 rule([id("rule"), seq([id("id"), id("_"), sq("'='"), id("_"), id("alt")])]),
81
82 rule([id("alt"), seq([id("seq"), rep([seq([sq("'/'"), id("_"), id("seq")]), sfx("*")])])]),
83 rule([id("seq"), rep([id("rep"), sfx("*")])]),
84 rule([id("rep"), seq([id("pre"), rep([id("sfx"), sfx("?")]), id("_")])]),
85 rule([id("pre"), seq([rep([id("pfx"), sfx("?")]), id("term")])]),
86 rule([id("term"), alt([id("call"), id("sq"), id("chs"), id("group"), id("extn")])]),
87
88 rule([id("group"), seq([sq("'('"), id("_"), id("alt"), sq("')'")])]),
89 rule([id("pfx"), chs("[&!~]")]),
90 rule([id("sfx"), alt([chs("[+?]"), seq([sq("'*'"), rep([id("range"), sfx("?")])])])]),
91 rule([id("range"), seq([id("num"), rep([seq([id("dots"), rep([id("num"), sfx("?")])]), sfx("?")])])]),
92 rule([id("num"), rep([chs("[0-9]"), sfx("+")])]),
93 rule([id("dots"), sq("'..'")]),
94
95 rule([id("call"), seq([id("id"), id("_"), pre([pfx("!"), sq("'='")])])]),
96 rule([id("id"), seq([chs("[a-zA-Z_]"), rep([chs("[a-zA-Z0-9_-]"), sfx("*")])])]),
97 rule([id("sq"), seq([chs("[']"), rep([pre([pfx("~"), chs("[']")]), sfx("*")]), chs("[']"), rep([sq("'i'"), sfx("?")])])]),
98 rule([id("chs"), seq([sq("'['"), rep([pre([pfx("~"), sq("']'")]), sfx("*")]), sq("']'")])]),
99 rule([id("extn"), seq([sq("'<'"), rep([pre([pfx("~"), sq("'>'")]), sfx("*")]), sq("'>'")])]),
100 rule([id("_"), rep([alt([seq([sq("'#'"), rep([pre([pfx("~"), chs("[\\n\\r]")]), sfx("*")])]), rep([chs("[ \\t\\n\\r]"), sfx("+")])]), sfx("*")])])
101], _)).
102
106:-set_prolog_flag(optimise,false). 107
109debug_peg_trace(FString,Args) :- debug(pPEG(trace),FString,Args).
110
111:-set_prolog_flag(optimise,true). 112
114init_peg :-
115 foreach((nb_current(Key,_), atom_concat('pPEG:',_,Key)), nb_delete(Key)), 116 nodebug(pPEG(trace)), 117 bootstrap_grammar. 118
119user:exception(undefined_global_variable,'pPEG:$pPEG',retry) :-
120 bootstrap_grammar. 121
122bootstrap_grammar :-
123 boot_grammar_def(BootPeg), 124 nb_setval('pPEG:$pPEG',BootPeg),
125 peg_grammar(PegSrc),
126 peg_compile(PegSrc,pPEG,[optimise(true)]). 127
131:- quasi_quotation_syntax(pPEG). 132
133pPEG(Content, Args, _Binding, Grammar) :-
134 with_quasi_quotation_input(Content, Stream, read_string(Stream, _, String)),
135 peg_compile(String,Grammar,Args). 136
140peg_compile(Src, GrammarSpec) :- 141 peg_compile(Src, GrammarSpec, []).
142
143peg_compile(Src, GrammarSpec, OptionList) :- 144 peg_parse(pPEG, Src, Ptree, _, OptionList),
145 option_value(optimise(Opt),OptionList,true),
146 make_grammar(Opt,Ptree,Grammar),
147 (Grammar = GrammarSpec
148 -> true 149 ; (atom(GrammarSpec) 150 -> atomic_concat('pPEG:$',GrammarSpec,GKey),
151 nb_setval(GKey,Grammar)
152 ; current_prolog_flag(verbose,GVrbse),
153 option_value(verbose(Vrbse),OptionList,GVrbse), 154 peg_fail_msg(peg(argError('GrammarSpec',GrammarSpec)),Vrbse)
155 )
156 ).
157
158make_grammar(true,Ptree,Grammar) :- !, 159 optimize_peg(Ptree,Grammar).
160make_grammar(_,'Peg'(Rules),'Peg'(Rules,_)). 161
166peg_parse(GrammarSpec, Input, Result) :-
167 peg_parse(GrammarSpec, Input, Result, _Residue, []).
168
169peg_parse(GrammarSpec, Input, Result, Residue, OptionList) :-
170 171 option_value(incomplete(Incomplete),OptionList,false), 172 option_value(tracing(TRules),OptionList,[]), 173 current_prolog_flag(verbose,GVrbse),
174 option_value(verbose(Vrbse),OptionList,GVrbse), 175 peg_setup_parse_(GrammarSpec,Input,Vrbse,TRules,GName,Env,Eval), 176 (eval_(Eval, Env, Input, 0, PosOut, Result0) 177 -> (Result0 = [] -> sub_string(Input,0,PosOut,_,Result) ; Result = Result0) 178 ; (persistent_env_(Env,(@(Name,Inst,Pos),_)) 179 -> peg_fail_msg(peg(errorinfo(GName,Name,Inst,Pos,Input)),Vrbse) 180 ; fail 181 )
182 ),
183 (string_length(Input,PosOut) 184 -> Residue="" 185 ; (Incomplete = true 186 -> sub_string(Input,PosOut,_,0,Residue) 187 ; (persistent_env_(Env,(@(Name,Inst,Pos),_)), Inst \== [] 188 -> peg_fail_msg(peg(errorinfo(GName,Name,Inst,Pos,Input)),Vrbse) 189 ; peg_fail_msg(peg(incompleteParse(GName,Input,PosOut)),Vrbse) 190 )
191 )
192 ).
193
194option_value(Option, Options, Default) :-
195 (Options = []
196 -> arg(1,Option,Default) 197 ; option(Option, Options, Default) 198 ).
199
202persistent_env_(Env,PEnv) :- arg(5,Env,PEnv). 203
204peg_setup_parse_(GrammarSpec,Input,Vrbse,TRules,GName,@(Grammar,GName,0,([],[]),PEnv),Eval) :-
205 (string(Input)
206 -> true
207 ; peg_fail_msg(peg(argError('Input',Input)),Vrbse)
208 ),
209 (copy_term(GrammarSpec,'Peg'(Grammar0,Grammar0)) 210 -> true
211 ; 212 (atom(GrammarSpec), atomic_concat('pPEG:$',GrammarSpec,GKey), nb_current(GKey,'Peg'(Grammar0,Grammar0))
213 -> true
214 ; peg_fail_msg(peg(argError('Grammar',GrammarSpec)),Vrbse)
215 )
216 ),
217 peg_add_tracing(TRules,Grammar0,Grammar), 218 (Vrbse = normal 219 -> PEnv = (@(GName,[],0), "") 220 ; PEnv = (@(), "")
221 ),
222 Grammar = [FirstRule|_], 223 (FirstRule = rule([Eval|_]) 224 -> Eval = id(GName) 225 ; Eval = call_O(FirstRule), 226 FirstRule = rule(GName,_)
227 ).
228
229peg_fail_msg(Msg, normal) :- 230 print_message(informational, Msg),
231 fail.
232
233:- multifile prolog:message/1. 234
235prolog:message(peg(argError(Arg,Value))) --> 236 [ "pPEG Error: invalid argument, ~w = ~w" - [Arg,Value] ].
237
238prolog:message(peg(errorinfo(GName,Rule,Inst,Pos,Input))) --> 239 {rule_elements(Rule,GName,Elems),
240 atomics_to_string(Elems,".",RName),
241 string_length(Input,InputLen), 242 StartPos is min(Pos,InputLen-1),
243 peg_line_pos(Input,StartPos,0,1,Text,EPos,ELineNum), 244 CPos is EPos+1, 245 (vm_instruction(Inst,Exp) -> true ; Exp = []),
246 rule_elements(Exp,GName,FElems),
247 atomics_to_string(FElems,".",FExp),
248 (FExp = "" -> Expct = "" ; Expct = ", expected ")
249 },
250 251 [ 'pPEG Error: ~w failed~w~w at line ~w.~w:\n% ~|~` t~d~3+ | ~w\n% ~|~` t~3+ ~|~` t~*+^'
252 - [RName,Expct,FExp,ELineNum,CPos,ELineNum,Text,EPos]
253 ].
254
255prolog:message(peg(incompleteParse(GName,Input,PosOut))) --> 256 {peg_line_pos(Input,PosOut,0,1,Text,EPos,ELineNum), 257 CPos is EPos+1 258 },
259 260 [ 'pPEG Error: ~w fell short at line ~w.~w:\n% ~|~` t~d~3+ | ~w\n% ~|~` t~3+ ~|~` t~*+^'
261 - [GName,ELineNum,CPos,ELineNum,Text,EPos]
262 ].
263
264prolog:message(peg(undefined(RuleName))) --> 265 [ 'pPEG: ~w undefined' - [RuleName] ]. 266
267rule_elements([],GName,[GName,GName]) :- !. 268rule_elements(Rule,GName,[GName,Rule]) :-
269 sub_atom(Rule,0,1,_,RType), 270 char_type(RType,alpha),
271 !.
272rule_elements(Rule,_GName,[Rule]). 273
277peg_lookup_previous(Name,Env,Match) :-
278 arg(4,Env,Ctxt), 279 (var(Name)
280 -> lookup_match_(Ctxt,RName,Match), 281 atom_string(RName,Name)
282 ; atom_string(RName,Name), 283 lookup_match_(Ctxt,RName,Match)
284 ).
285
286lookup_match_((Matches,Parent),Name,Match) :-
287 (memberchk((Name,slice(Input,PosIn,PosOut)),Matches)
288 -> Len is PosOut-PosIn, 289 sub_string(Input,PosIn,Len,_,Match)
290 ; lookup_match_(Parent,Name,Match) 291 ).
292
296eval_(id(Name), Env, Input, PosIn, PosOut, R) :- 297 atom_string(PName,Name), 298 arg(1,Env,Grammar), 299 (memberchk(rule([id(Name), Exp]), Grammar) 300 -> eval_(call_O(rule(PName,Exp)), Env, Input, PosIn, PosOut, R) 301 ; print_message(warning, peg(undefined(PName))), 302 fail
303 ).
304
305eval_(alt(Ss), Env, Input, PosIn, PosOut, R) :- 306 alt_eval(Ss, Env, Input, PosIn, PosOut, R).
307
308eval_(seq(Ss), Env, Input, PosIn, PosOut, R) :- 309 seq_eval(Ss, PosIn, Env, Input, PosIn, PosOut, R).
310
311eval_(rep([Exp, ROp]), Env, Input, PosIn, PosOut, R) :- 312 rep_counts(ROp,Min,Max), !, 313 repeat_eval(0, Min, Max, Exp, Env, Input, PosIn, PosOut, R).
314
315eval_(pre([pfx(POp), Exp]), Env, Input, PosIn, PosOut, []) :- 316 317 arg(5,Env,PEnv), 318 arg(1,PEnv,ErrorInfo), 319 nb_linkarg(1,PEnv,@()), 320 (eval_(Exp, Env, Input, PosIn, _PosOut, _R)
321 -> nb_linkarg(1,PEnv,ErrorInfo), 322 323 (POp = "&" -> PosOut = PosIn) 324 ; nb_linkarg(1,PEnv,ErrorInfo), 325 326 (POp = "!" -> PosOut = PosIn
327 ;POp = "~" -> (string_length(Input,PosIn) -> fail ; PosOut is PosIn+1) 328 )
329 ).
330
331eval_(sq(S), _Env, Input, PosIn, PosOut, []) :- 332 (sub_string(S,_,1,0,"i") 333 -> sub_string(S,0,_,1,S1), 334 literal_match_(S1,SMatch), 335 string_upper(SMatch,UMatch),
336 string_length(SMatch,Len),
337 sub_string(Input,PosIn,Len,_,Match),
338 string_upper(Match,UMatch) 339 ; literal_match_(S,Match), 340 sub_string(Input,PosIn,Len,_,Match) 341 ),
342 PosOut is PosIn+Len.
343
344eval_(chs(MatchSet), _Env, Input, PosIn, PosOut, []) :- 345 sub_atom(Input, PosIn, 1, _, R), 346 match_chars(MatchSet,MChars), 347 chars_in_match(MChars,R,in), 348 PosOut is PosIn+1. 349
350eval_(extn(S), Env, Input, PosIn, PosOut, R) :- 351 (string(S) -> extn_pred(S,T) ; T = S), 352 extn_call(T,Env,Input,PosIn,PosOut,R).
353
355eval_(call_O(rule(Name, Exp)), @(Grammar,_RName,Dep,Ctxt,PEnv), Input, PosIn, PosOut, R) :- 356 357 nonvar(Exp), 358 Dep1 is Dep+1, 359 360 (Dep1 >= 64 361 -> recursive_loop_check(eval_(call_O(rule(Name,_)),_,_,P,_,_),P,PosIn,Name)
362 ; true
363 ),
364 eval_(Exp, @(Grammar,Name,Dep1,([],Ctxt),PEnv), Input, PosIn, PosOut, Res), 365 (Exp = trace(_)
366 -> R = Res 367 ; Match = slice(Input,PosIn,PosOut), 368 369 arg(1,Ctxt,Matches), setarg(1,Ctxt,[(Name,Match)|Matches]),
370 sub_atom(Name,0,1,_,RType), 371 (RType == '_'
372 -> R = [] 373 ; 374 flatten_(Res,[],RRs), 375 build_ptree(RRs,RType,Match,Name,R) 376 )
377 ).
378
379eval_(rep_O(Exp, Min, Max), Env, Input, PosIn, PosOut, R) :- 380 repeat_eval(0, Min, Max, Exp, Env, Input, PosIn, PosOut, R).
381
382eval_(sq_O(Case,Match), _Env, Input, PosIn, PosOut, []) :- 383 (Case = exact
384 -> sub_string(Input,PosIn,Len,_,Match) 385 ; 386 string_length(Match,Len),
387 sub_string(Input,PosIn,Len,_,S),
388 string_upper(S,Match)
389 ),
390 PosOut is PosIn+Len.
391
392eval_(chs_O(In,MChars), _Env, Input, PosIn, PosOut, []) :- 393 sub_atom(Input, PosIn, 1, _, R), 394 chars_in_match(MChars,R,In), 395 PosOut is PosIn+1. 396
397eval_(trace(Rule), Env, Input, PosIn, PosOut, R) :- 398 399 (debugging(pPEG(trace),true)
400 -> eval_(call_O(Rule),Env,Input,PosIn,PosOut,R) 401 ; current_prolog_flag(debug,DF), 402 peg_trace, 403 persistent_env_(Env,PEnv),
404 nb_linkarg(2,PEnv," "), 405 (eval_(call_O(Rule),Env,Input,PosIn,PosOut,R) 406 -> peg_notrace, 407 set_prolog_flag(debug,DF) 408 ; peg_notrace, 409 set_prolog_flag(debug,DF), 410 fail
411 )
412 ).
413
417
419alt_eval([S|Ss], Env, Input, PosIn, PosOut, R) :-
420 eval_(S, Env, Input, PosIn, PosOut, R) 421 -> true 422 ; alt_eval(Ss, Env, Input, PosIn, PosOut, R). 423
424
427seq_eval([], _Start, _Env, _Input, PosIn, PosIn, []).
428seq_eval([S|Ss], Start, Env, Input, PosIn, PosOut, R) :-
429 (eval_(S, Env, Input, PosIn, PosNxt, Re) 430 -> (Re == []
431 -> seq_eval(Ss, Start, Env, Input, PosNxt, PosOut, R) 432 ; R = [Re|Rs], 433 seq_eval(Ss, Start, Env, Input, PosNxt, PosOut, Rs) 434 )
435 ; PosIn > Start, 436 arg(5,Env,PEnv), 437 arg(1,PEnv,ErrInfo), 438 arg(3,ErrInfo,HWM),
439 PosIn > HWM, 440 arg(2,Env,FName), 441 nb_linkarg(1,PEnv,@(FName,S,PosIn)),
442 fail
443 ).
444
445
448rep_counts(sfx("?"),0, 1).
449rep_counts(sfx("+"),1,-1).
450rep_counts(sfx("*"),0,-1). 451rep_counts(num(StrN),N,N) :- 452 number_string(N,StrN).
453rep_counts(range([num(StrN),_]),N,-1) :- 454 number_string(N,StrN).
455rep_counts(range([num(StrM),_,num(StrN)]),M,N) :- 456 number_string(M,StrM),
457 number_string(N,StrN).
458
460repeat_eval(Max, _Min, Max, _Exp, _Env, _Input, PosIn, PosIn, []) :- !. 461repeat_eval(C, Min, Max, Exp, Env, Input, PosIn, PosOut, R) :-
462 eval_(Exp, Env, Input, PosIn, PosN, Re),
463 PosN > PosIn, 464 !,
465 C1 is C+1, 466 (Re == [] 467 -> repeat_eval(C1, Min, Max, Exp, Env, Input, PosN, PosOut, R)
468 ; R = [Re|Rs],
469 repeat_eval(C1, Min, Max, Exp, Env, Input, PosN, PosOut, Rs)
470 ).
471repeat_eval(C, Min,_Max, _Exp, _Env, _Input, PosIn, PosIn, []) :- 472 C >= Min. 473
474
477literal_match_(S,Match) :-
478 match_chars(S,Chars), 479 string_chars(Match,Chars). 480
481
484match_chars(MatchSet, MChars) :-
485 sub_string(MatchSet,1,_,1,Str), 486 string_chars(Str,Chars),
487 unescape_(Chars,MChars).
488
489unescape_([],[]).
490unescape_(['\\',u,C1,C2,C3,C4|NxtChars],[Esc|MChars]) :-
491 hex_value(C1,V1), hex_value(C2,V2), hex_value(C3,V3), hex_value(C4,V4), !,
492 VEsc is ((V1*16+V2)*16+V3)*16+V4,
493 char_code(Esc,VEsc),
494 unescape_(NxtChars,MChars).
495unescape_(['\\','U',C1,C2,C3,C4,C5,C6,C7,C8|NxtChars],[Esc|MChars]) :-
496 hex_value(C1,V1), hex_value(C2,V2), hex_value(C3,V3), hex_value(C4,V4),
497 hex_value(C5,V5), hex_value(C6,V6), hex_value(C7,V7), hex_value(C8,V8), !,
498 VEsc is ((((((V1*16+V2)*16+V3)*16+V4)*16*V5)*16+V6)*16+V7)*16+V8,
499 char_code(Esc,VEsc),
500 unescape_(NxtChars,MChars).
501unescape_(['\\',CEsc|Chars],[Esc|MChars]) :-
502 std_escape_(CEsc,Esc), !,
503 unescape_(Chars,MChars).
504unescape_([Char|Chars],[Char|MChars]) :-
505 unescape_(Chars,MChars).
506
507std_escape_('n','\n').
508std_escape_('r','\r').
509std_escape_('t','\t').
510
511hex_value(C,V) :- char_type(C,digit(V)) -> true ; char_type(C,xdigit(V)).
512
514chars_in_match([],_Ch,In) :- In == notin. 515chars_in_match([Cl,'-',Cu|MChars],Ch,In) :- !, 516 (Cl@=<Ch,Ch@=<Cu -> In == in ; chars_in_match(MChars,Ch,In)).
517chars_in_match([Cl|MChars],Ch,In) :- 518 (Cl==Ch -> In == in ; chars_in_match(MChars,Ch,In)).
519
520
523recursive_loop_check(Goal,Last,Pos,Name) :-
524 prolog_current_frame(F), 525 prolog_frame_attribute(F,parent,IPF), 526 prolog_frame_attribute(IPF,parent,GPF), 527 (once(prolog_frame_attribute(GPF,parent_goal,Goal)), Last=Pos
528 -> 529 peg_notrace,
530 format(string(Message),"pPEG infinite recursion applying ~w",[Name]),
531 throw(error(resource_error(Message),_))
532 ; true
533 ).
534
536flatten_([], Tl, Tl) :-
537 !.
538flatten_([Hd|Tl], Tail, List) :-
539 !,
540 flatten_(Hd, FlatHeadTail, List),
541 flatten_(Tl, Tail, FlatHeadTail).
542flatten_(NonList, Tl, [NonList|Tl]).
543
545build_ptree([],RType,Match,PName,R) :- !, 546 (char_type(RType,lower)
547 -> Match = slice(Input,PosIn,PosOut), 548 Len is PosOut-PosIn,
549 sub_string(Input,PosIn,Len,_,Arg),
550 R =.. [PName,Arg]
551 ; R =.. [PName,[]] 552 ).
553build_ptree([Arg],RType,_Match,_PName,Arg) :- 554 compound(Arg),
555 char_type(RType,lower), 556 !.
557build_ptree(Arg,_RType,_Match,PName,R) :- 558 R =.. [PName,Arg].
559
560
563extn_pred(S,T) :-
564 (sub_string(S,Pos,1,_," ") 565 -> FLen is Pos-1, 566 sub_atom(S,1,FLen,_,Pred), 567 APos is Pos+1, 568 sub_string(S,APos,_,1,S1), 569 split_string(S1,""," ",[StringArg]) 570 ; sub_atom(S,1,_,1,Pred), 571 StringArg = ""
572 ),
573 (split_string(Pred,':','',[SM,SF]) 574 -> atom_string(M,SM), atom_string(F,SF),
575 P =.. [F,StringArg],
576 T = M:P
577 ; T =.. [Pred,StringArg]
578 ).
579
581extn_call(T,Env,Input,PosIn,PosOut,R) :-
582 catch(call(T,Env,Input,PosIn,PosOut,R),
583 Err, extn_error(Err,T,Env,Input,PosIn,PosOut,R)
584 ).
585
586extn_error(error(existence_error(procedure,_),_),T,_Env,Input,PosIn,PosIn,[]) :- !,
587 sub_string(Input,PosIn,_,0,Rem),
588 print_message(information, peg(extension(T,Rem))).
589extn_error(Err,_T,_Env,_Input,_PosIn,_PosOut,_R) :-
590 throw(Err).
591
592prolog:message(peg(extension(T,Rem))) --> 593 [ "Extension ~p parsing: ~p\n" - [T,Rem] ].
594
598peg_add_tracing([],Grammar,Grammar) :- !. 599peg_add_tracing(TRules,Grammar,GrammarT) :-
600 (Grammar = [rule(_,_)|_]
601 -> duplicate_term(Grammar,GrammarC) 602 ; GrammarC = Grammar 603 ),
604 add_tracing(TRules,GrammarC,GrammarT).
605
606add_tracing([],Grammar,Grammar) :- !.
607add_tracing([Name|Names],Grammar,GrammarT) :- !,
608 add_tracing(Name,Grammar,NxtGrammar),
609 add_tracing(Names,NxtGrammar,GrammarT).
610add_tracing(Name,Grammar,GrammarT) :-
611 add_trace(Grammar,Name,GrammarT).
612
613add_trace([],_SName,[]).
614add_trace([rule([id(SName), Exp])|Rules], Name,
615 [rule([id(SName), trace(rule(AName,Exp))])|Rules]) :-
616 nonvar(Exp), 617 atom_string(AName,SName), 618 atom_string(AName,Name),
619 !.
620add_trace([Rule|Rules], Name, [Rule|Rules]) :-
621 Rule = rule(AName, Exp), 622 nonvar(Exp), 623 atom_string(AName,Name), 624 !,
625 626 setarg(2,Rule,trace(rule(AName,Exp))).
627add_trace([Rule|Rules], Name, [Rule|RulesT]) :-
628 add_trace(Rules, Name, RulesT).
629
633peg_trace :-
634 debug(pPEG(trace)),
635 trace_control_(spy(pPEG:eval_)).
636
637peg_notrace :-
638 (debugging(pPEG(trace),true)
639 -> trace_control_(nospy(pPEG:eval_)),
640 nodebug(pPEG(trace))
641 ; true
642 ).
643
644trace_control_(G) :- 645 current_prolog_flag(verbose,V),
646 set_prolog_flag(verbose,silent),
647 call(G),
648 set_prolog_flag(verbose,V).
649
651:- multifile user:prolog_trace_interception/4. 652
653user:prolog_trace_interception(Port,Frame,_Choice,continue) :-
654 debugging(pPEG(trace),true), 655 prolog_frame_attribute(Frame,goal,Goal),
656 peg_trace_port(Port,Goal),
657 !. 658
659peg_trace_port(Port,pPEG:eval_(Inst, Env, Input, PosIn, PosOut, R)) :- 660 peg_inst_type(Inst,Type),
661 vm_instruction(Inst,TInst),
662 persistent_env_(Env,PEnv),
663 peg_trace_port_(Type, Port, TInst, PEnv, Input, PosIn, PosOut, R),
664 !.
665
666peg_trace_port_(call, call, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
667 peg_cursor_pos(Input,PosIn,Cursor),
668 peg_trace_msg(postInc, PEnv, "~w~w~w", [Cursor,TInst]). 669peg_trace_port_(call, fail, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
670 peg_cursor_pos(Input,PosIn,Cursor),
671 peg_trace_input(Input,PosIn,Str),
672 peg_trace_msg(preDec, PEnv, "~w~w~w != \t~p", [Cursor,TInst,Str]). 673peg_trace_port_(call, exit, TInst, PEnv, Input, PosIn, PosOut, R) :- !,
674 peg_cursor_pos(Input,PosOut,Cursor),
675 (R = [] 676 -> Len is PosOut-PosIn,
677 sub_string(Input,PosIn,Len,_,RT)
678 ; RT = R
679 ),
680 (string(RT) -> MatchOp = "==" ; MatchOp = "=>"),
681 peg_trace_msg(preDec, PEnv, "~w~w~w ~w \t~p", [Cursor,TInst,MatchOp,RT]). 682peg_trace_port_(meta, call, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
683 peg_cursor_pos(Input,PosIn,Cursor),
684 peg_trace_msg(indent, PEnv, "~w~w~w", [Cursor,TInst]). 685peg_trace_port_(terminal, fail, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
686 peg_cursor_pos(Input,PosIn,Cursor),
687 peg_trace_input(Input,PosIn,Str),
688 peg_trace_msg(indent, PEnv, "~w~w~w != \t~p", [Cursor,TInst,Str]). 689peg_trace_port_(terminal, exit, TInst, PEnv, Input, PosIn, PosOut, _R) :- !,
690 peg_cursor_pos(Input,PosOut,Cursor),
691 Len is PosOut-PosIn,
692 sub_string(Input,PosIn,Len,_,RT),
693 peg_trace_msg(indent, PEnv, "~w~w~w == \t~p", [Cursor,TInst,RT]). 694peg_trace_port_(_Other, _, _, _, _, _, _, _). 695
696peg_inst_type(alt(_),meta).
697peg_inst_type(seq(_),meta).
698peg_inst_type(pre(_),call).
699peg_inst_type(rep(_),meta).
700peg_inst_type(rep_O(_,_,_),meta).
701peg_inst_type(sq(_),terminal).
702peg_inst_type(sq_O(_,_),terminal).
703peg_inst_type(chs(_),terminal).
704peg_inst_type(chs_O(_,_),terminal).
705peg_inst_type(extn(_),terminal).
706peg_inst_type(id(_),notrace). 707peg_inst_type(call_O(rule(_,Exp)),Type) :- 708 Exp = trace(_) -> Type = notrace ; Type = call.
709peg_inst_type(trace(_),notrace). 710
711peg_cursor_pos(Input,Pos,Cursor) :-
712 string_length(Input,InputLen), 713 StartPos is min(Pos,InputLen-1),
714 peg_line_pos(Input,StartPos,0,1,_Text,LinePos,LineNo), 715 CPos is LinePos +1, 716 format(string(Cursor),"~` t~d~4+.~d~4+",[LineNo,CPos]). 717
718peg_line_pos("",_Pos,_LinePos,LineNum,"",0,LineNum) :- !. 719peg_line_pos(Input,Pos,LinePos,LineNum,Text,EPos,ELineNum) :- 720 721 re_matchsub("[^\n\r]*(\n|\r\n?)?",Input,Match,[start(LinePos)]), 722 string_length(Match.0,Len),
723 NxtLinePos is LinePos+Len,
724 ((LinePos =< Pos,Pos < NxtLinePos) 725 -> string_concat(Text,Match.get(1,""),Match.0), 726 EPos is Pos-LinePos,
727 ELineNum = LineNum
728 ; NxtLineNum is LineNum+1, 729 peg_line_pos(Input,Pos,NxtLinePos,NxtLineNum,Text,EPos,ELineNum)
730 ).
731
732peg_trace_input(Input,PosIn,Str) :-
733 sub_string(Input,PosIn,L,0,SStr), 734 (L =< 32
735 -> Str = SStr
736 ; sub_string(SStr,0,32,_,SStr1),
737 string_concat(SStr1," ... ",Str)
738 ).
739
740peg_trace_msg(postInc, PEnv, Msg, [Cursor|Args]) :-
741 arg(2,PEnv,Indent),
742 debug_peg_trace(Msg,[Cursor,Indent|Args]),
743 string_concat(Indent,"| ",NxtIndent), 744 nb_linkarg(2,PEnv,NxtIndent).
745peg_trace_msg(preDec, PEnv, Msg, [Cursor|Args]) :-
746 arg(2,PEnv,Indent),
747 sub_string(Indent,0,_,3,NxtIndent), 748 debug_peg_trace(Msg,[Cursor,NxtIndent|Args]),
749 nb_linkarg(2,PEnv,NxtIndent).
750peg_trace_msg(indent, PEnv, Msg, [Cursor|Args]) :-
751 arg(2,PEnv,Indent),
752 debug_peg_trace(Msg,[Cursor,Indent|Args]).
753
757vm_instruction(id(Name), Name).
758vm_instruction(call_O(rule(Name,_Exp)), Name).
759vm_instruction(seq(Exps), Is) :-
760 vm_instruction_list(Exps,LIs),
761 atomics_to_string(LIs," ",Is0),
762 atomics_to_string(["(",Is0,")"],Is).
763vm_instruction(alt(Exps), Is) :-
764 vm_instruction_list(Exps,LIs),
765 atomics_to_string(LIs," / ",Is0),
766 atomics_to_string(["(",Is0,")"],Is).
767vm_instruction(rep([Exp, Sfx]), Is) :-
768 vm_rep_sfx(Sfx,ROp), !,
769 vm_instruction(Exp,I),
770 string_concat(I,ROp,Is).
771vm_instruction(rep_O(Exp, Min, Max), Is) :-
772 rep_counts(Sfx, Min, Max), !,
773 vm_instruction(rep([Exp, Sfx]), Is).
774vm_instruction(pre([pfx(Chs),Exp]), Is) :-
775 vm_instruction(Exp,I),
776 string_concat(Chs,I,Is).
777vm_instruction(sq(Match), Is) :-
778 unescape_std(Match,Is).
779vm_instruction(sq_O(Case,Match), Is) :-
780 (Case = exact -> Sens = "" ; Sens = "i"),
781 unescape_std(Match,S1),
782 unescape_string(S1,"'","\\u0027",S),
783 atomics_to_string(["'",S,"'",Sens],Is).
784vm_instruction(chs(Match), Is) :-
785 unescape_std(Match,Is).
786vm_instruction(chs_O(In,MChars), Is) :-
787 (In = notin -> Pfx = '~' ; Pfx = ''),
788 string_chars(MStr,MChars),
789 unescape_std(MStr,S),
790 unescape_string(S,"]","\\u005d",S1),
791 atomics_to_string([Pfx,"[",S1,"]"],Is).
792vm_instruction(extn(Ext), Is) :-
793 (string(Ext)
794 -> Is = Ext 795 ; (Ext = Mod:Pred
796 -> Pred =.. [Func,StringArg], 797 atomics_to_string(['<',Mod,':',Func,' ',StringArg,'>'],Is)
798 ; Ext =.. [Func,StringArg], 799 atomics_to_string(['<',Func,' ',StringArg,'>'],Is)
800 )
801 ).
802vm_instruction(trace(Rule), Is) :-
803 vm_instruction(call_O(Rule), Is).
804
805vm_instruction_list([],[]).
806vm_instruction_list([Exp|Exps],[Is|LIs]) :-
807 vm_instruction(Exp,Is),
808 vm_instruction_list(Exps,LIs).
809
810vm_rep_sfx(sfx(ROp), ROp).
811vm_rep_sfx(num(StrN), ROp) :- atomics_to_string(["*",StrN],ROp).
812vm_rep_sfx(range([num(StrN),_]), ROp) :- atomics_to_string(["*",StrN,".."],ROp).
813vm_rep_sfx(range([num(StrM),_,num(StrN)]), ROp) :- atomics_to_string(["*",StrM,"..",StrN],ROp).
814
815unescape_string(Sin,Esc,Usc,Sout) :-
816 split_string(Sin,Esc,"",L),
817 atomics_to_string(L,Usc,Sout).
818
819unescape_std(Sin,Sout) :-
820 string_chars(Sin,CharsIn),
821 escape_chars(CharsIn,CharsOut),
822 string_chars(Sout,CharsOut).
823
824escape_chars([],[]).
825escape_chars([C|CharsIn],[C|CharsOut]) :-
826 char_code(C,CS), between(32,126,CS), !, 827 escape_chars(CharsIn,CharsOut).
828escape_chars([ECh|CharsIn],['\\',Ch|CharsOut]) :-
829 std_escape_(Ch,ECh),!, 830 escape_chars(CharsIn,CharsOut).
831escape_chars([C|CharsIn],['\\','u',X1,X2,X3,X4|CharsOut]) :-
832 char_code(C,CS), 833 divmod(CS,16,Q4,R4),
834 divmod(Q4,16,Q3,R3),
835 divmod(Q3,16,R1,R2),
836 hex_value(X1,R1), hex_value(X2,R2), hex_value(X3,R3), hex_value(X4,R4),
837 escape_chars(CharsIn,CharsOut).
838
844optimize_peg('Peg'(Rules),'Peg'(RulesO,RRefs)) :-
845 (optimize_rules(Rules,RDefs,RulesO)
846 -> once(length(RDefs,_)), 847 chk_RDefs(RulesO,RDefs,RRefs) 848 ; (Rules = [rule([id(GName),_])|_Rules] -> true ; GName = "?unknown?"),
849 print_message(warning,peg(optimize_fail(GName))), 850 fail
851 ).
852
853chk_RDefs([],RDefs,[]) :-
854 forall(member(Name:_,RDefs), print_message(warning, peg(undefined(Name)))).
855chk_RDefs([rule(PName,_)|Rules],RDefs,[_|RRefs]) :-
856 memberchk(rule(PName,_),Rules), !, 857 print_message(warning,peg(duplicate(PName))), 858 chk_RDefs(Rules,RDefs,RRefs).
859chk_RDefs([rule(PName,_)|Rules],RDefs,[RRef|RRefs]) :-
860 atom_string(PName,Name),
861 remove_def(RDefs,Name,RRef,NxtRDefs),
862 chk_RDefs(Rules,NxtRDefs,RRefs).
863
864remove_def([],_Name,_RRef,[]).
866remove_def([Name:RRef|RDefs],Name,RRef,RDefs) :- !.
867remove_def([RDef|RDefs],Name,RRef,[RDef|NxtRDefs]) :-
868 remove_def(RDefs,Name,RRef,NxtRDefs).
869
870prolog:message(peg(duplicate(Name))) --> 871 [ "pPEG: duplicate rule ~w, last definition will apply" - [Name] ].
872
873prolog:message(peg(optimize_fail(GName))) --> 874 [ "pPEG: grammar ~w optimization failed" - [GName] ].
875
876optimize_rules([],_RDefs,[]).
877optimize_rules([Rule|Rules],RDefs,[RuleO|RulesO]) :-
878 optimize_rule(Rule,RDefs,RuleO),
879 optimize_rules(Rules,RDefs,RulesO).
880
881optimize_rule(rule([id(Name),Exp]), RDefs, rule(PName,ExpO)) :- !, 882 atom_string(PName,Name), 883 optimize_exp(Exp, RDefs, ExpO).
884optimize_rule(rule(Name,Exp), _RDefs, rule(Name,Exp)). 885
886optimize_exp(id(Name), RDefs, call_O(Rule)) :- 887 memberchk(Name:Rule, RDefs).
888
889optimize_exp(seq(Ins), RDefs, seq(Opt)) :-
890 optimize_exp_list(Ins,RDefs,Opt).
891
892optimize_exp(alt(Ins), RDefs, alt(Opt)) :-
893 optimize_exp_list(Ins,RDefs,Opt).
894
895optimize_exp(rep([Exp, ROp]), RDefs, rep_O(ExpO, Min, Max)) :-
896 rep_counts(ROp,Min,Max), !,
897 optimize_exp(Exp,RDefs,ExpO).
898
899optimize_exp(pre([pfx("~"), chs(MatchSet)]), RDefs, chs_O(notin,MChars)) :- !,
900 optimize_exp(chs(MatchSet), RDefs, chs_O(_,MChars)).
901optimize_exp(pre([pfx(POp), Exp]), RDefs, pre([pfx(POp), ExpO])) :-
902 optimize_exp(Exp,RDefs,ExpO).
903
904optimize_exp(chs(MatchSet), _RDefs, chs_O(in,MChars)) :-
905 match_chars(MatchSet, MChars).
906
907optimize_exp(sq(QS), _RDefs, sq_O(Case,Match)) :-
908 (sub_string(QS,_,1,0,"i") 909 -> Case = upper,
910 sub_string(QS,0,_,1,S), 911 literal_match_(S,AMatch), 912 string_upper(AMatch,Match)
913 ; Case = exact,
914 literal_match_(QS,Match) 915 ).
916
917optimize_exp(extn(S), _RDefs, extn(T)) :- 918 (string(S) -> extn_pred(S,T) ; T = S).
919
920optimize_exp(call_O(Rule), _RDefs, call_O(Rule)). 921optimize_exp(rep_O(Exp, Min, Max), _RDefs, rep_O(Exp, Min, Max)). 922optimize_exp(sq_O(C,M), _RDefs, sq_O(C,M)). 923optimize_exp(chs_O(M), _RDefs, chs_O(M)). 925
926optimize_exp_list([],_RDefs,[]).
927optimize_exp_list([Exp|Exps],RDefs,[ExpO|ExpOs]) :-
928 optimize_exp(Exp,RDefs,ExpO),
929 optimize_exp_list(Exps,RDefs,ExpOs).
930
934:- initialization(init_peg,now).