2% DCTG grammar for stochastic regular expressions
3% Feb/99
4%
5% Grammar: a | E:F | [a:E1(N1)+b:E2(N2)] | E*(Pr) | E+(Pr)
6%
7% epsilon e: epsilon (not allowed in constructs; equiv. to [] generation)
8% action: a
9% concatanation: E:F Pr(E:F) = Pr(E)*Pr(F)
10% choice: [E1(N1)+b:E2(N2)] Pr(Ei(Ni)) = Ni / N1+N2
11% Kleene star: E*(P) Pr(E*) = [epsilon (1-P), (E:E*)(P)]
12% Kleene plus: E+(P) Pr(E+) = Pr(E:(E*(P)))
13%
14% where: Ni = integers >= 0, P is probability (0 <= P < 1)
15%
16% Also: (a) no directly nested iteration allowed (star, plus)
17% (b) choice must have at least 2 terms
18%
19% Semantic rules:
20% construct(E): makes a Prolog structure equivalent of expression
21% generate(S,SL1,SL2,P): randomly interprets an expression, giving output
22% string S (as list) with associated probability P.
23% Note that repetition (star, plus) are executed according to
24% probability, as well as for a max. generated string length.
25% When length met, then no repetitions possible (max_string_length_P/1)
26% Resulting string may unavoidably exceed this max length.
27% raw_generate(S,SL1,SL2): like generate, but no probability computed.
28% recognize(S,P): finds way of recognizing a string with the
29% expression, resulting in probability P. Will work with backtracking,
30% so that all possibile derivations can be found (no probabilistic
31% execution as with generate/2.
32
33
34expr ::= iter_expr^^A
35<:>
36(construct(E) ::- A^^construct(E)),
37(raw_generate(S, SL1, SL2) ::- A^^raw_generate(S, SL1, SL2)),
38(recognize(S, S2, PrSoFar, Pr) ::-
39 check_prob(PrSoFar),
40 A^^recognize(S, S2, PrSoFar, Pr)).
41
42expr ::= noniter_expr^^A
43<:>
44(construct(E) ::- A^^construct(E)),
45(raw_generate(S, SL1, SL2) ::- A^^raw_generate(S, SL1, SL2)),
46(recognize(S, S2, PrSoFar, Pr) ::-
47 check_prob(PrSoFar),
48 A^^recognize(S, S2, PrSoFar, Pr)).
49
50% -------------------------------------
51
52noniter_expr ::= [a] % action a
53<:>
54construct(a),
55(raw_generate([a], SL1, SL2) ::- SL2 is SL1+1),
56(recognize([a|T], T, PrSoFar, PrSoFar) ::- check_prob(PrSoFar)).
57
58noniter_expr ::= [b] % action b
59<:>
60construct(b),
61(raw_generate([b], SL1, SL2) ::- SL2 is SL1+1),
62(recognize([b|T], T, PrSoFar, PrSoFar) ::- check_prob(PrSoFar)).
63
64% - - - -
65
66noniter_expr ::= guardedexpr_a^^A1, intval^^B1, guardedexpr_b^^A2, intval^^B2
67<:>
68(construct([(E1,N1),(E2,N2)]) ::-
69 A1^^construct(E1),
70 B1^^construct(N1),
71 A2^^construct(E2),
72 B2^^construct(N2)),
73(raw_generate(S, SL1, SL2) ::-
74 B1^^construct(N1), B2^^construct(N2),
75 (raw_select_term([N1,N2], 1) ->
76 A1^^raw_generate(S, SL1, SL2)
77 ;
78 A2^^raw_generate(S, SL1, SL2))),
79(recognize(S, S2, PrSoFar, Pr) ::-
80 B1^^construct(Val1), B2^^construct(Val2),
81 Pr2 is PrSoFar*(Val1/(Val1+Val2)),
82 check_prob(Pr2),
83 A1^^recognize(S, S2, Pr2, Pr)),
84(recognize(S, S2, PrSoFar, Pr) ::-
85 B1^^construct(Val1), B2^^construct(Val2),
86 Pr2 is PrSoFar*(Val2/(Val1+Val2)),
87 check_prob(Pr2),
88 A2^^recognize(S, S2, Pr2, Pr)).
89
90% - - - -
91
92noniter_expr ::= expr^^A, expr^^B % concat
93<:>
94(construct((E:F)) ::- A^^construct(E), B^^construct(F)),
95(raw_generate(S, SL1, SL2) ::-
96 A^^raw_generate(S1, SL1, SL3),
97 B^^raw_generate(S2, SL3, SL2),
98 append(S1, S2, S)),
99(recognize(S, S2, PrSoFar, Pr) ::-
100 check_prob(PrSoFar),
101 A^^recognize(S, S3, PrSoFar, Pr1),
102 check_prob(Pr1),
103 B^^recognize(S3, S2, Pr1, Pr)).
104
105% -------------------------------------
106
107iter_expr ::= noniter_expr^^A, probval^^B % star
108<:>
109(construct((E)*(P)) ::- A^^construct(E), B^^construct(P)),
110(raw_generate(S, SL1, SL2) ::-
111 B^^construct(P),
112 max_string_length_P(MaxL),
113 raw_gen_loop(A, P, MaxL, S, SL1, SL2)),
114(recognize(S, S2, PrSoFar, Pr) ::-
115 check_prob(PrSoFar),
116 B^^construct(Pr1),
117 recognize_loop(A, Pr1, S, S2, PrSoFar, Pr)).
118
119% - - - -
120
121iter_expr ::= noniter_expr^^A, probval^^B % plus
122<:>
123(construct((E)+(P)) ::- A^^construct(E), B^^construct(P)),
124(raw_generate(S, SL1, SL2) ::-
125 A^^raw_generate(S1, SL1, SL3),
126 B^^construct(P),
127 max_string_length_P(MaxL),
128 raw_gen_loop(A, P, MaxL, S2, SL3, SL2),
129 append(S1, S2, S),
130 !),
131(recognize(S, S2, PrSoFar, Pr) ::-
132 check_prob(PrSoFar),
133 A^^recognize(S, S3, PrSoFar, Pr1),
134 \+ (S=S3), % new
135 check_prob(Pr1),
136 B^^construct(Pr2),
137 recognize_loop(A, Pr2, S3, S2, Pr1, Pr)).
138
139% -------------------------------------
140
141guardedexpr_a ::= [a] % action a
142<:>
143construct(a),
144(raw_generate([a], SL1, SL2) ::- SL2 is SL1+1),
145(recognize([a|T], T, PrSoFar, PrSoFar) ::- check_prob(PrSoFar)).
146
147guardedexpr_a ::= [a], expr^^A % concat
148<:>
149(construct((a:E)) ::- A^^construct(E)),
150(raw_generate([a|S], SL1, SL2) ::-
151 A^^raw_generate(S, SL1, SL3),
152 SL2 is SL3+1),
153(recognize([a|S], S2, PrSoFar, Pr) ::-
154 check_prob(PrSoFar),
155 A^^recognize(S, S2, PrSoFar, Pr)).
156
157guardedexpr_b ::= [b] % action b
158<:>
159construct(b),
160(raw_generate([b], SL1, SL2) ::- SL2 is SL1+1),
161(recognize([b|T], T, PrSoFar, PrSoFar) ::- check_prob(PrSoFar)).
162
163guardedexpr_b ::= [b], expr^^A % concat
164<:>
165(construct((b:E)) ::- A^^construct(E)),
166(raw_generate([b|S], SL1, SL2) ::-
167 A^^raw_generate(S, SL1, SL3),
168 SL2 is SL3+1),
169(recognize([b|S], S2, PrSoFar, Pr) ::-
170 check_prob(PrSoFar),
171 A^^recognize(S, S2, PrSoFar, Pr)).
172
173
174% -------------------------------------
175
176intval ::= [N], { is_an_integer(N) }
177<:>
178construct(N).
179
180probval ::= [R], { is_a_probability(R)}
181<:>
182construct(R).
183
187
192
193is_an_integer(N) :-
194 integer(N),
195 !.
196is_an_integer(N) :-
197 int_range(Low, High),
198 random(Low, High, N).
199
200int_range(0, 1000).
201
207
208is_a_probability(R) :-
209 float(R),
210 !.
211is_a_probability(R) :-
212 random(T),
213 R is truncate(T*100)/100.
214
221
222raw_select_term(L, K) :-
223 sumlist(L, SL, 0, Sum),
224 random(0, Sum, X),
225 select_kth_term(SL, X, 1, K, _),
226 !.
227
228
236
237sumlist([], [], Sum, Sum).
238sumlist([N|R], [NewSum|SumList2], LastSum, Sum) :-
239 NewSum is LastSum + N,
240 sumlist(R, SumList2, NewSum, Sum).
241
249
250select_kth_term([Val], _, K, K, Val) :- !.
251select_kth_term([Val|_], X, K, K, Val) :-
252 Val >= X,
253 !.
254select_kth_term([_|R], X, K, K2, Val) :-
255 K3 is K + 1,
256 select_kth_term(R, X, K3, K2, Val).
257
258% ------------------------------------
259% raw_gen_loop(Tree, Pr, MaxL, S, SL1, SL2):
260% Tree - grammar tree to process
261% Pr - probability of doing an iteration
262% MaxL - max length of generated string for terminating looping
263% S - final generated string
264% SL1, SL2 - current length and final length of generated string
265% Like gen_loop, but no probabilities computed.
266
267raw_gen_loop(Tree, Pr, MaxL, S, SL1, SL2) :-
268 SL1 < MaxL,
269 maybe(Pr),
270 Tree^^raw_generate(S1, SL1, SL3),
271 raw_gen_loop(Tree, Pr, MaxL, S2, SL3, SL2),
272 append(S1, S2, S),
273 !.
274raw_gen_loop(Tree, Pr, _, [], SL, SL) :-
275 !.
276
277
293
294recognize_loop(_, Pr, [], [], PrSoFar, FinalPr) :- 295 !,
296 FinalPr is PrSoFar*(1.0 - Pr),
297 check_prob(FinalPr).
298recognize_loop(T, Pr, S, S, PrSoFar, FinalPr) :-
299 FinalPr is PrSoFar*(1.0 - Pr),
300 check_prob(FinalPr).
301recognize_loop(Tree, Pr, S, S2, PrSoFar, FinalPr) :-
302 Pr3 is PrSoFar*Pr,
303 check_prob(Pr3),
304 Tree^^recognize(S, S3, Pr3, Pr1),
305 \+(S=S3),
306 check_prob(Pr1),
307 recognize_loop(Tree, Pr, S3, S2, Pr1, FinalPr).
308
309check_prob(P) :-
310 min_grammar_prob_P(E),
311 P > E,
312 !.
313
314% ------------------------------------
315% for testing...
316
317sre(Type, Expr, String, SL) :-
318 repeat,
319 (Type = full ; Type = grow),
320 generate_tree(expr, Type, 12, _, Tree, _),
321 Tree^^construct(Expr),
322 Tree^^raw_generate(String, 0, SL),
323 nl,sre_pp(Expr),nl,
324 write('tree '),write(Tree),nl,
325 tree_depth(Tree, Depth),
326 write('Depth = '), write(Depth),nl.
327
328sre2(Type, Expr, Input) :-
329 repeat,
330 (Type = full ; Type = grow),
331 generate_tree(expr, Type, 12, _, Tree, _),
332 Tree^^construct(Expr),
333 nl, write(Type),
334 nl,sre_pp(Expr),nl,
335 bagof((Leftover,Pr),Tree^^recognize(Input, Leftover, 1.0, Pr), Rlist),
336 write('Recog list: '), nl, writelist(Rlist), nl.
337
338sre2c(Type, Expr, Input) :-
339 repeat,
340 (Type = full ; Type = grow),
341 generate_tree(expr, Type, 12, _, Tree, _),
342 Tree^^construct(Expr),
343 nl, write(Type),
344 nl,sre_pp(Expr),nl,
345 bagof(Pr,Tree^^recognize(Input, [], 1.0, Pr), Prlist),
346 write('Pr list: '), nl, writelist(Prlist), nl.
347
348sre2b(Input) :-
349 generate_tree(expr, grow, 8, _, Tree, _),
350 Tree^^construct(Expr),
351 Tree^^recognize(Input, Leftover, 1.0, Pr),
352 nl,sre_pp(Expr),nl,
353 write('Prob = '), write(Pr), nl,
354 write('Leftover = '), write(Leftover), nl.
355
358
359sre_pp((E*R)) :-
360 write('('),
361 sre_pp(E),
362 write(')*'),
363 write(R),
364 !.
365sre_pp((E+R)) :-
366 write('('),
367 sre_pp(E),
368 write(')+'),
369 write(R),
370 !.
371sre_pp((E:F)) :-
372 sre_pp(E),
373 write(':'),
374 sre_pp(F),
375 !.
376sre_pp([A|T]) :-
377 sre_pp_l([A|T]),
378 !.
379sre_pp((A,B)) :-
380 write('('),
381 sre_pp(A),
382 write(','),
383 write(B),
384 write(')'),
385 !.
386sre_pp(X) :-
387 write(X).
388
389sre_pp_l([A]) :-
390 sre_pp(A),
391 !.
392sre_pp_l([A|T]) :-
393 write('['),
394 sre_pp(A),
395 write('+'),
396 sre_pp_l(T),
397 write(']'),
398 !.
399
404