5
6:- prolog_load_context(file,File),
7 absolute_file_name('..',X,[relative_to(File),file_type(directory)]),
8 asserta(user:file_search_path(candc,X)). 9
10:- set_prolog_flag(double_quotes,codes). 11
12user:file_search_path(semlib, candc(lib)).
13user:file_search_path(boxer, candc(boxer)).
14user:file_search_path(knowledge, boxer(knowledge)).
15user:file_search_path(lex, boxer(lex)).
16
17
21
22:- dynamic verbnet/3. 23
27
28:- use_module(library(lists),[member/2,reverse/2,append/3]). 29:- use_module(boxer(slashes)). 30:- use_module(knowledge(roles),[old2new/2]). 31
32
36
37verbnet2prolog(File):-
38 load_xml_file(File,T),
40 elements(T,['VNCLASS'],f(X,C)),
41 value(X,'ID',ID),
42 members(C,File,ID,_).
43
44
48
49pretty_print([],_).
50
51pretty_print([element(A,B,C)|L],Tab):- !,
52 tab(Tab), write(A), write(' '), write(B), nl,
53 NewTab is Tab+3,
54 pretty_print(C,NewTab),
55 pretty_print(L,Tab).
56
57pretty_print([E|L],Tab):-
58 tab(Tab), write(unknown:E),nl,
59 pretty_print(L,Tab).
60
61
65
66members(X,File,XID,Names):-
67 findall(Sub:YID,(elements(X,['SUBCLASSES','VNSUBCLASS'],f(Y,Sub)),value(Y,'ID',YID)),Subs),
68 subclasses(Subs,File,Names1),
69 findall(Frame,(elements(X,['FRAMES','FRAME'],f(_,Frame))),Frames),
70 findall(Name,(elements(X,['MEMBERS','MEMBER'],f(Member,_)),value(Member,name,Name)),Names2),
71 append(Names1,Names2,Names),
72 frames(Frames,Names,XID,File).
73
74
78
79subclasses([],_,[]).
80
81subclasses([X:XID|L],File,Names):-
82 members(X,File,XID,Names1),
83 append(Names1,Names2,Names),
84 subclasses(L,File,Names2).
85
86
90
91frames([],_,_,_):- !.
92
93frames([Frame|L],Names,ID,File):-
94 elements(Frame,['DESCRIPTION'],f(De,_)),
95 value(De,primary,Primary),
96 example(Frame,Example),
97 elements(Frame,['SYNTAX'],f(_,Syntax)),
98 subcat(Syntax,[],SubCat),
101 ccg(SubCat,C^C,CCG,Missing,Roles0),
102 roles2roles(Roles0,Roles1),
103 append(Wrong,[Role],Roles1),
104 reverse(Wrong,Right),
105 append(Right,[Role],Roles),
106 atom_chars(ID,IDChars),
107 formatID(IDChars,[_,_|FID]),
108 reverse(SubCat,Normal),
109 format('~n%%% File: ~p~n%%% Primary: ~p (~p)~n%%% Syntax: ~p~n',[File,Primary,ID,Normal]),
110 write('%%% CCG: '), write(CCG),
111 format('~n%%% Roles: ~p~n',[Roles]),
112 ( Missing = [], !; format('%%% Missing: ~p~n',[Missing]) ),
113 format('%%% Example: ~p~n%%%~n',[Example]),
114 frameMembers(Names,CCG,FID,Roles), !,
115 addPP(SubCat,FID),
116 frames(L,Names,ID,File).
117
118frames([Frame|L],Names,ID,File):-
119 format('% frameproblem: ~p~n',[Frame]),
120 frames(L,Names,ID,File).
121
122
126
127addPP([X,lex:as|L],FID):- !,
128 addPP([X,prep:as|L],FID).
129
130addPP([np:Old,prep:Preps|_],FID):- !,
131 old2new(Old,New),
132 atomic_list_concat(L,' ',Preps),
133 findall(_,(member(Prep,L),format('verbnet(~p, pp/np, [~q], ~p).~n',[Prep,New,FID]),add(pp/np,[New])),_).
134
135addPP([vp_ng:Old,prep:Preps|_],FID):- !,
136 old2new(Old,New),
137 atomic_list_concat(L,' ',Preps),
138 findall(_,(member(Prep,L),format('verbnet(~p, pp/(s:ng\\np), [~q], ~p).~n',[Prep,New,FID]),add(pp/np,[New])),_).
139
140addPP([vp_to:Old,prep:Preps|_],FID):- !,
141 old2new(Old,New),
142 atomic_list_concat(L,' ',Preps),
143 findall(_,(member(Prep,L),format('verbnet(~p, pp/(s:to\\np), [~q], ~p).~n',[Prep,New,FID]),add(pp/np,[New])),_).
144
145addPP([s:Old,prep:Preps|_],FID):- !,
146 old2new(Old,New),
147 atomic_list_concat(L,' ',Preps),
148 findall(_,(member(Prep,L),format('verbnet(~p, pp/s:_, [~q], ~p).~n',[Prep,New,FID]),add(pp/np,[New])),_).
149
150addPP(_,_).
151
152
156
157example(Frame,Example):-
158 elements(Frame,['EXAMPLES','EXAMPLE'],f(_,[Example])), !.
159
160example(_,'error (no example)').
161
162
166
167frameMembers([],_,_,_).
168
169frameMembers([Name1|L],CCG,FID,Roles):-
170 reformatName(Name1,Name2),
171 format('verbnet(~q, ',[Name2]),
172 write(CCG),
173 format(', ~q, ~q).~n',[Roles,FID]),
174 add(CCG,Roles),
175 frameMembers(L,CCG,FID,Roles).
176
177
181
182reformatName(N1,N2):-
183 atom_chars(N1,C1),
184 reformatString(C1,C2),
185 atom_chars(N2,C2).
186
187reformatString([],[]).
188reformatString([' '|L1],['_'|L2]):- !, reformatString(L1,L2).
189reformatString([C|L1],[C|L2]):- reformatString(L1,L2).
190
191
195
196add(CCG,Roles):-
197 verbnet(CCG,Roles,Old), !,
198 New is Old + 1,
199 retract(verbnet(CCG,Roles,Old)),
200 assert(verbnet(CCG,Roles,New)).
201
202add(CCG,Roles):-
203 assert(verbnet(CCG,Roles,1)).
204
205
209
210formatID(Chars,[Pre,Sep1|L]):-
211 Seps = ['-','.'], member(Sep1,Seps),
212 append(PreChars,[Sep1|RestChars],Chars),
213 \+ ( member(Sep2,Seps), member(Sep2,PreChars) ), !,
214 formatNumber(PreChars,Pre),
215 formatID(RestChars,L).
216
217formatID(Chars,[ID]):-
218 formatNumber(Chars,ID).
219
220formatNumber(Chars,Num):-
221 Chars = [First|_],
222 member(First,['0','1','2','3','4','5','6','7','8','9']), !,
223 number_chars(Num,Chars).
224
225formatNumber(Chars,Atom):-
226 atom_chars(Atom,Chars).
227
231
232subcat([],Acc,Acc).
233subcat([E|L],Acc1,Acc3):- cat(E,Acc1,Acc2), subcat(L,Acc2,Acc3).
234
235
239
240roles2roles([],[]).
241roles2roles([X|L1],[Y|L2]):- old2new(X,Y), roles2roles(L1,L2).
242
243
247
250ccg([np:_,pp],X^C,C,[],[]):- !, X=pp.
251ccg([np:_,prep:_],X^C,C,[],[]):- !, X=pp.
252ccg([vp_ng:_,prep:_],X^C,C,[],[]):- !, X=pp.
253ccg([vp_to:_,prep:_],X^C,C,[],[]):- !, X=pp.
254ccg([np:_,lex:as],X^C,C,[],[]):- !, X=pp.
255ccg([np:R],np^C,C,[],[R]):- !.
256ccg([s:R],(s:'_')^C,C,[],[R]):- !.
258ccg([pp:_],pp^C,C,[],[]):- !. 259ccg([X],X^C,C,[],[]):- !.
260
263ccg([np:_,lex:as|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
264ccg([np:_,prep:_|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
265ccg([s:_,prep:_|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
266ccg([vp_to:_,prep:_|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
267ccg([vp_ng:_,prep:_|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
268ccg([np:_,pp|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
269ccg([np:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/np),New,M,Oles).
270ccg([s_to:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/(s:to\np)),New,M,Oles).
271ccg([vp_ng:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/(s:ng\np)),New,M,Oles).
272ccg([vp_to:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/(s:ng\np)),New,M,Oles).
273ccg([vp_b:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/(s:b\np)),New,M,Oles).
274ccg([s:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/s:'_'),New,M,Oles).
275ccg([v|L],X^Old,New,M,Roles):- !, X=(s:'_'\Y), ccg(L,Y^Old,New,M,Roles).
276ccg([adv|L],Old,New,M,Roles):- !, ccg(L,Old,New,M,Roles).
278ccg([U|L],Old,New,[U|M],Roles):- !, ccg(L,Old,New,M,Roles).
279
280
284
285restr(Restr,Type):-
286 Restr = [element('SYNRESTRS',[],L)],
287 member(element('SYNRESTR',['Value'='+',type=Type],[]),L), !.
288
289ing(acc_ing).
290ing(oc_ing).
291ing(ac_ing).
292ing(be_sc_ing).
293ing(np_omit_ing).
294
295inf(oc_to_inf).
296inf(ac_to_inf).
297inf(sc_to_inf).
298inf(vc_to_inf).
299inf(rs_to_inf).
300inf(to_inf_rs).
301
302bare(oc_bare_inf).
303
304s_restr(np_to_inf).
305s_restr(that_comp).
306s_restr(for_comp).
307s_restr(wh_comp).
308s_restr(quotation).
309
312s_restr(np_ing). 313
314s_restr(how_extract).
315s_restr(what_extract).
316
317s_restr(wh_inf).
318s_restr(what_inf).
319s_restr(wheth_inf).
320
321
322
323
327
328cat(element('NP', [value=Value], R),A,[vp_ng:Value|A]):- ing(Ing), restr(R,Ing), !.
329cat(element('NP', [value=Value], R),A,[vp_to:Value|A]):- inf(Inf), restr(R,Inf), !.
330cat(element('NP', [value=Value], R),A,[vp_b:Value|A]):- bare(B), restr(R,B), !.
331cat(element('NP', [value=Value], R),A,[s:Value|A]):- s_restr(S), restr(R,S), !.
332cat(element('NP', [value=Value], _),A,[np:Value|A]):- !.
333cat(element('PREP', [], _),A,[pp|A]):- !.
334cat(element('PREP', [value=Value], _),A,[prep:Value|A]):- !.
335cat(element('LEX', [value='[+be]'], _),A,[lex:be|A]):- !.
336cat(element('LEX', [value='it[+be]'], _),A,[lex:be,lex:it|A]):- !.
337cat(element('LEX', [value=at], _),A,[prep:at|A]):- !.
338cat(element('LEX', [value=of], _),A,[prep:of|A]):- !.
339cat(element('LEX', [value=Value], _),A,[lex:Value|A]):- !.
340cat(element('VERB',[],[]),A,[v|A]):- !.
341cat(element('ADJ',[],[]),A,[adj|A]):- !.
342cat(element('ADV',[],[]),A,[adv|A]):- !.
343cat(U,A,[unk:U|A]):- !.
344
345
349
350elements([element(X,F,L)|_],[X],f(F,L)).
351elements([element(X,_,L)|_],[X|R],A):- elements(L,R,A).
352elements([_|L],X,A):- elements(L,X,A).
353
354
358
359value([Name=Value|_],Name,Value):- !.
360value([_|L],Name,Value):- value(L,Name,Value).
361
362
366
367verbnet_dir('ext/VerbNet/').
368
369
373
374process([]):-
375 planB.
376
377process([File|L]):-
378 verbnet2prolog(File), !,
379 process(L).
380
381
382/* ----------------------------------------------------------------------
383 Plan B (verbs that are not in VerbNet)
384---------------------------------------------------------------------- */
385
386planB:-
387 setof(X,A^B^verbnet(X,A,B),L),
388 format('~n%%% Most frequent roles for a particular CCG category.~n%%%~n',[]),
389 format('verbnet(_, ~p, [~q], []). % ~n',[s:adj\np,'Theme']),
390 planB(L).
391
392planB([]).
393
394planB([CCG|L]):-
395 verbnet(CCG,R,N), \+ (verbnet(CCG,_,M), M>N),
396 write('verbnet(_, '),
397 write(CCG),
398 format(' , ~q, []). % n=~p~n',[R,N]),
399 planB(L).
400
401
405
:-
407 format('%%% automatically generated by src/prolog/lib/verbnet2boxer.pl~n%%%~n',[]),
408 format(':- module(verbnet,[verbnet/3,verbnet/4]).~n',[]),
409 format(':- use_module(boxer(slashes)).~n~n',[]),
410 format('%%% wrapper~n%%%~nverbnet(A,B,C):- verbnet(A,B,C,_).~n').
411
412
416
417wildCard('*.xml').
428
429
433
434run:-
435 verbnet_dir(Dir),
436 exists_directory(Dir),
437 wildCard(WildCard),
438 atom_concat(Dir,WildCard,Expand),
439 expand_file_name(Expand,Files),
440 header,
441 process(Files),
442 halt.
443
444:- run.