:- dynamic found_vpe/4, acc_ant/3.
:- multifile sem/5, id/2.
:- discontiguous sem/5, id/2.
:- dynamic sem/5, id/2.
:- use_module(library(lists),[member/2,select/3]).
/* =======================================================================
Consulting gold standard
========================================================================*/
:- ['working/wsj/vpe00.pl'].
:- ['working/wsj/vpe01.pl'].
:- ['working/wsj/vpe02.pl'].
:- ['working/wsj/vpe03.pl'].
:- ['working/wsj/vpe04.pl'].
:- ['working/wsj/vpe05.pl'].
:- ['working/wsj/vpe06.pl'].
:- ['working/wsj/vpe07.pl'].
:- ['working/wsj/vpe08.pl'].
:- ['working/wsj/vpe09.pl'].
:- ['working/wsj/vpe10.pl'].
:- ['working/wsj/vpe11.pl'].
:- ['working/wsj/vpe12.pl'].
:- ['working/wsj/vpe13.pl'].
:- ['working/wsj/vpe14.pl'].
:- ['working/wsj/vpe15.pl'].
:- ['working/wsj/vpe16.pl'].
:- ['working/wsj/vpe17.pl'].
:- ['working/wsj/vpe18.pl'].
:- ['working/wsj/vpe19.pl'].
/*
:- ['working/wsj/vpe20.pl'].
:- ['working/wsj/vpe21.pl'].
:- ['working/wsj/vpe22.pl'].
:- ['working/wsj/vpe23.pl'].
:- ['working/wsj/vpe24.pl'].
*/
/* =======================================================================
Corpora (comment in/out to select/unselect)
Files created with
bin/candc
--candc-int-betas "0.075 0.03 0.01 0.005 0.001 0.0005 0.00001"
--candc-int-dict_cutoffs "20 20 20 20 150 200 500"
--candc-parser-maxsupercats 1000000
bin/boxer --input working/wsj/wsj.discourse.??.ccg
--output working/wsj/wsj.discourse.??.drs
--resolve --vpe --flat --instantiate --presup min --x
======================================================================= */
corpus(wsj00,'working/wsj/wsj.discourse.00.drs').
/*
corpus(wsj01,'working/wsj/wsj.discourse.01.drs').
corpus(wsj02,'working/wsj/wsj.discourse.02.drs').
corpus(wsj03,'working/wsj/wsj.discourse.03.drs').
corpus(wsj04,'working/wsj/wsj.discourse.04.drs').
corpus(wsj05,'working/wsj/wsj.discourse.05.drs').
corpus(wsj06,'working/wsj/wsj.discourse.06.drs').
corpus(wsj07,'working/wsj/wsj.discourse.07.drs').
corpus(wsj08,'working/wsj/wsj.discourse.08.drs').
corpus(wsj09,'working/wsj/wsj.discourse.09.drs').
corpus(wsj10,'working/wsj/wsj.discourse.10.drs').
corpus(wsj11,'working/wsj/wsj.discourse.11.drs').
corpus(wsj12,'working/wsj/wsj.discourse.12.drs').
corpus(wsj13,'working/wsj/wsj.discourse.13.drs').
corpus(wsj14,'working/wsj/wsj.discourse.14.drs').
corpus(wsj15,'working/wsj/wsj.discourse.15.drs').
corpus(wsj16,'working/wsj/wsj.discourse.16.drs').
corpus(wsj17,'working/wsj/wsj.discourse.17.drs').
corpus(wsj18,'working/wsj/wsj.discourse.18.drs').
corpus(wsj19,'working/wsj/wsj.discourse.19.drs').
*/
/*
corpus(wsj20,'working/wsj/wsj.discourse.20.drs').
corpus(wsj21,'working/wsj/wsj.discourse.21.drs').
corpus(wsj22,'working/wsj/wsj.discourse.22.drs').
corpus(wsj23,'working/wsj/wsj.discourse.23.drs').
corpus(wsj24,'working/wsj/wsj.discourse.24.drs').
*/
/* =======================================================================
Main
======================================================================= */
main:-
retractall(found_vpe(_,_,_,_)),
retractall(acc_ant(_,_,_)),
assert(acc_ant(0,0,0)),
setof(c(Corpus,File),corpus(Corpus,File),L),
open('working/vpe.html',write,Stream,[encoding(utf8)]),
write(Stream,''), nl(Stream),
write(Stream,'
Verb Phrase Ellipsis Detection
'), nl(Stream),
write(Stream,''), nl(Stream),
write(Stream,'Section | Line | VPE | Acc | F | Example | '), nl(Stream),
detect(L,[],Stream),
write(Stream,'
'), nl(Stream),
results(Stream),
write(Stream,''), nl(Stream),
close(Stream).
/* =======================================================================
Detection
======================================================================= */
detect([],_,_).
detect([c(Corpus,FILE)|L],Corpora,Stream):-
retractall(sem(_,_,_,_,_)),
retractall(id(_,_)),
consult(FILE),
findall(Id,id(Id,_),IDs),
detect2(IDs,Corpus,Stream),
detect(L,[Corpus|Corpora],Stream).
detect2([],Corpus,Stream):-
missing(Corpus,Stream).
detect2([ID|L],Corpus,Stream):-
id(ID,Sem),
sem(Sem,Words,_,_,DRS),
detect_vpe(Words,Corpus,ID,DRS,Stream),
detect2(L,Corpus,Stream).
/* -----------------------------------------------------------------------------
VPE
----------------------------------------------------------------------------- */
detect_vpe(Words,Corpus,ID,DRS,Str):-
member(_:VPEInd:pred(E1,do,vpe,1),DRS),
member(Type,[likewise,so]),
member(_:_:pred(E2,Type,a,0),DRS), E1==E2,
output(Corpus,ID,VPEInd,Type,Words,Str),
fail.
detect_vpe(Words,Corpus,ID,DRS,Str):-
member(_:VPEInd:pred(E1,Sym,vpe,1),DRS),
\+ (Sym=do, member(Type,[likewise,so]), member(_:_:pred(E2,Type,a,0),DRS), E1==E2),
output(Corpus,ID,VPEInd,Sym,Words,Str),
fail.
detect_vpe(Words,Corpus,ID,DRS,Str):-
member(_:VPEInd:pred(E1,do,v,97),DRS),
member(Type,[likewise,so]),
member(_:_:pred(E2,Type,a,0),DRS), E1==E2,
output(Corpus,ID,VPEInd,Type,Words,Str),
fail.
detect_vpe(Words,Corpus,ID,DRS,Str):-
member(_:VPEInd:pred(E1,Sym,v,97),DRS),
\+ (Sym=do, member(Type,[likewise,so]), member(_:_:pred(E2,Type,a,0),DRS), E1==E2),
output(Corpus,ID,VPEInd,Sym,Words,Str),
fail.
detect_vpe(_,_,_,_,_).
/* =======================================================================
Output
======================================================================= */
output(_Corpus,_ID,_,Type,_Words,_Stream):-
excluded(Type), !.
output(Corpus,_ID,[VPE|_],_,_Words,_Stream):-
Position is mod(VPE,1000),
Sentence is (VPE-Position)/1000,
found_vpe(Corpus,Sentence,Position,_), !.
output(Corpus,ID,[VPE,ABeg,AEnd|_],Type,Words,Stream):-
Position is mod(VPE,1000),
Sentence is (VPE-Position)/1000,
write(Stream,''),
format(Stream,'~p | ~p | ',[Corpus,ID]),
assert(found_vpe(Corpus,Sentence,Position,Type)),
correct(Corpus,Sentence,Position,Stream,Result,GoldAnt),
evalAntecedent([ABeg,AEnd],GoldAnt,Stream),
printVPE(Words,[VPE,ABeg,AEnd],Result,GoldAnt,Stream), !,
write(Stream,'
'),
nl(Stream).
output(Corpus,ID,[VPE],Type,Words,Stream):-
Position is mod(VPE,1000),
Sentence is (VPE-Position)/1000,
write(Stream,''),
format(Stream,'~p | ~p | ',[Corpus,ID]),
assert(found_vpe(Corpus,Sentence,Position,Type)),
correct(Corpus,Sentence,Position,Stream,Result,GoldAnt),
evalAntecedent([],GoldAnt,Stream),
printVPE(Words,[VPE],Result,GoldAnt,Stream), !,
write(Stream,'
'),
nl(Stream).
/* =======================================================================
Check Correctness VPE
======================================================================= */
correct(Corpus,Sentence,Position,Stream,r,Gold):-
vpe(Id,Corpus,Sentence,Position),
\+ (excluded(Type),vpe_type(Id,Corpus,Type,_)), !,
ant_beg(Id,Corpus,SenBegAnt,PosBegAnt), BegAnt is (SenBegAnt*1000)+PosBegAnt,
ant_end(Id,Corpus,SenEndAnt,PosEndAnt), EndAnt is (SenEndAnt*1000)+PosEndAnt,
Gold = [BegAnt,EndAnt],
format(Stream,'R | ',[]).
correct(_,_,_,Stream,w,[]):-
write(Stream,'W | ').
/* =======================================================================
Check Correctness Antecedent choice
======================================================================= */
nCorrect(N,N,Beg,End,Old,New):-
(N = Beg; N = End; N > Beg, N < End), !,
New is Old + 1.
nCorrect(N,N,_,_,Old,Old):- !.
nCorrect(I,N,Beg,End,Old,New):-
(I = Beg; I = End; I > Beg, I < End), !,
Temp is Old + 1,
J is I + 1,
nCorrect(J,N,Beg,End,Temp,New).
nCorrect(I,N,Beg,End,Old,New):-
J is I + 1,
nCorrect(J,N,Beg,End,Old,New).
/* =======================================================================
Check Correctness Antecedent choice
======================================================================= */
evalAntecedent(Found,Gold,Stream):-
Found = [BF,EF], Gold = [BG,EG], !,
( BG > EF, !, Result = 0
; BF > EG, !, Result = 0
; Result is ((1+(min(EF,EG)-max(BF,BG))) / (1+(max(EF,EG)-min(BF,BG)))) ),
retract(acc_ant(Total1,Total3,N1)),
Total2 is Total1 + Result,
NGold is (EG-BG)+1,
NFound is (EF-BF)+1,
nCorrect(BF,EF,BG,EG,0,NCorrect),
Precision is NCorrect/NFound,
Recall is NCorrect/NGold,
( Precision = 0, Recall = 0, Fscore = 0
; Fscore is (2*Precision*Recall)/(Precision+Recall) ),
Total4 is Total3 + Fscore,
N2 is N1 + 1,
assert(acc_ant(Total2,Total4,N2)),
% format(Stream,'~p | P:~p R:~p F:~p (Found:~p Gold:~p Correct:~p) | ',[Result,Precision,Recall,Fscore,NFound,NGold,NCorrect]).
format(Stream,' | ~p | ~p | ',[Result,Fscore]).
evalAntecedent(_Found,_Gold,Stream):-
format(Stream,' | - | - | ',[]).
/* =======================================================================
Exclude certain types
======================================================================= */
%excluded(do).
%excluded(be). % checked
%excluded(have). % checked
%excluded(can). % checked
%excluded(could). % checked
%excluded(would). % checked
%excluded(should). % checked
%excluded(will). % checked
%excluded(might). % checked
%excluded(may). % checked
%excluded(must). % checked
%excluded(to). % checked - antecednets can be improved/one missing case?
excluded(same). % checked
excluded(likewise).% checked
excluded(so). % checked
/* =======================================================================
Calculate Results VPE detection (by type)
======================================================================= */
results_by_type(Stream):-
setof(Type,A^B^C^vpe_type(A,B,Type,C),Types),
results_by_type(Types,Stream).
results_by_type([],Stream):- nl(Stream).
results_by_type([Type|L],Stream):-
excluded(Type), !,
results_by_type(L,Stream).
results_by_type([Type|L],Stream):-
findall(X,(found_vpe(X,S,P,_),vpe(ID,X,S,P),vpe_type(ID,X,Type,_)),LC),
length(LC,CorrectVPEFound),
findall(X,(corpus(Corpus,_),vpe(ID,Corpus,_,_),vpe_type(ID,Corpus,Type,_)),LA),
length(LA,AllVPEInTest),
findall(t(S,P),found_vpe(_,S,P,Type),LF),
length(LF,AllVPEFound),
( AllVPEInTest = 0, Recall = 0 ; \+ AllVPEInTest = 0, Recall is CorrectVPEFound/AllVPEInTest ),
( AllVPEFound = 0, Precision = 1 ; \+ AllVPEFound = 0, Precision is CorrectVPEFound/AllVPEFound ),
( \+ (Precision = 0, Recall = 0), FScore is (2 * Precision * Recall)/(Precision + Recall)
; Precision = 0, Recall = 0, FScore = 0 ),
format(Stream,' | ~p | ~f (~p/~p) | ~f (~p/~p) | ~f |
~n',[Type,Recall,CorrectVPEFound,AllVPEInTest,Precision,CorrectVPEFound,AllVPEFound,FScore]),
results_by_type(L,Stream).
/* =======================================================================
Calculate Results VPE detection
======================================================================= */
results(Stream):-
write(Stream,'Results VPE Detection
'), nl(Stream),
write(Stream,''), nl(Stream),
write(Stream,'Type | Recall | Precision | F-Score | '), nl(Stream),
results_by_type(Stream),
findall(X,(found_vpe(X,S,P,_),vpe(ID,X,S,P),vpe_type(ID,X,Type,_),\+ excluded(Type)),LC),
length(LC,CorrectVPEFound),
findall(X,(corpus(Corpus,_),vpe(ID,Corpus,_,_),vpe_type(ID,Corpus,Type,_),\+ excluded(Type)),LA),
length(LA,AllVPEInTest),
findall(t(S,P),found_vpe(_,S,P,_),LF),
length(LF,AllVPEFound),
( AllVPEInTest = 0, Recall = 0; \+ AllVPEInTest = 0,
Recall is CorrectVPEFound/AllVPEInTest ),
( AllVPEFound = 0, Precision = 1; \+ AllVPEFound = 0,
Precision is CorrectVPEFound/AllVPEFound ),
( \+ (Precision = 0, Recall = 0), FScore is (2 * Precision * Recall)/(Precision + Recall);
Precision = 0, Recall = 0, FScore = 0 ),
format(Stream,'~p | ~p (~p/~p) | ~p (~p/~p) | ~p |
~n',
['Total',Recall,CorrectVPEFound,AllVPEInTest,Precision,CorrectVPEFound,AllVPEFound,FScore]),
write(Stream,'
Results VPE antecedent choice'),
acc_ant(AccTotal,AccF,AccN),
( AccN = 0, !, AveAcc = 0, AveF = 0
; AveAcc is AccTotal/AccN, AveF is AccF/AccN ),
format(Stream,'Average accuracy (~p/~p): ~f
~n',[AccTotal,AccN,AveAcc]),
format(Stream,'Average F-score (~p/~p): ~f
~n',[AccF,AccN,AveF]).
/* =======================================================================
Print VPE Sentence
======================================================================= */
printVPE([],_,_,_,_):- !.
printVPE([word(Index,_)|L],VPE,Res,Gold,Stream):-
\+ relevantContext(Index,VPE),
\+ relevantContext(Index,Gold), !,
printVPE(L,VPE,Res,Gold,Stream).
printVPE([word(Index,Word)|L],VPE,Res,Gold,Stream):-
VPE = [Index|_], !,
( Res=r, Col=blue, !; Res=m, Col=orange, !; Col=red ),
format(Stream,'',[Col]),
write(Stream,Word),
write(Stream,' '),
printVPE(L,VPE,Res,Gold,Stream).
printVPE([word(Index,Word)|L],VPE,Res,Gold,Stream):-
VPE = [_,Index,Index|_], !,
printGoldBeg(Index,Gold,Stream),
write(Stream,''),
write(Stream,Word),
printGoldEnd(Index,Gold,Stream),
write(Stream,' '),
printVPE(L,VPE,Res,Gold,Stream).
printVPE([word(Index,Word)|L],VPE,Res,Gold,Stream):-
VPE = [_,Index|_], !,
printGoldBeg(Index,Gold,Stream),
write(Stream,''),
write(Stream,Word),
printGoldEnd(Index,Gold,Stream),
write(Stream,' '),
printVPE(L,VPE,Res,Gold,Stream).
printVPE([word(Index,Word)|L],VPE,Res,Gold,Stream):-
VPE = [_,_,Index|_], !,
printGoldBeg(Index,Gold,Stream),
write(Stream,Word),
printGoldEnd(Index,Gold,Stream),
write(Stream,' '),
printVPE(L,VPE,Res,Gold,Stream).
printVPE([word(Index,Word)|L],VPE,Res,Gold,Stream):-
printGoldBeg(Index,Gold,Stream),
write(Stream,Word),
printGoldEnd(Index,Gold,Stream),
tab(Stream,1),
printVPE(L,VPE,Res,Gold,Stream).
/* =======================================================================
Print Gold Antecedent
======================================================================= */
printGoldBeg(Index,[Gold,_],Stream):-
Gold = Index, !,
write(Stream,'[').
printGoldBeg(_,_,_).
printGoldEnd(Index,[_,Gold],Stream):-
Gold = Index, !,
write(Stream,']').
printGoldEnd(_,_,_).
/* =======================================================================
Relevant Context
======================================================================= */
relevantContext(Index,L):-
S is Index-mod(Index,1000),
member(X,L), S is X-mod(X,1000), !.
/* =======================================================================
Missing VPE
======================================================================= */
missing(Corpus,Stream):-
findall(gold(Corpus,S,P),
( vpe(ID,Corpus,S,P), \+ (excluded(Type), vpe_type(ID,Corpus,Type,_)) ),
Gold),
missing2(Gold,Stream).
missing2([],_).
missing2([gold(Corpus,S,P)|L],Stream):-
found_vpe(Corpus,S,P,_), !,
missing2(L,Stream).
missing2([gold(Corpus,S,P)|L],Stream):-
VPEIndex is (S*1000)+P,
sem(SemID,Words,_,_,_),
id(ID,SemID),
member(word(VPEIndex,_),Words), !,
format(Stream,'~p | ~p | M | - | - | ',[Corpus,ID]),
printVPE(Words,[VPEIndex],m,[],Stream),
write(Stream,' |
'),
nl(Stream),
missing2(L,Stream).
missing2([gold(Corpus,S,P)|L],Stream):-
format(Stream,'~p | ? | M | - | - | ~p ~p (no DRS) |
~n',[Corpus,S,P]),
missing2(L,Stream).
/* =======================================================================
Automatically start
======================================================================= */
:- main, halt.