1:- module(xml_reader,[fileToLineInfoElements/3]).
8:- use_module(library(sgml)). 9
10atrace:-trace.
11useCateID:- fail.
12:- dynamic(aimlCateSig/1). 13:- dynamic(aimlCate/13). 14
15prolog_mostly_ground(Out):-ground(Out),!.
16prolog_mostly_ground(Out):-var(Out),!,atrace.
17prolog_mostly_ground([H|_Out]):-!,prolog_must(prolog_mostly_ground1(H)),!.
18prolog_mostly_ground(Out):- ((arg(_N,Out,Arg),prolog_must(prolog_mostly_ground1(Arg)),fail));true.
19prolog_mostly_ground1(Out):-prolog_must(nonvar(Out)).
20
21aimlCateSig(_).
25get_sgml_parser_defs(PARSER_DEFAULTS,PARSER_CALLBACKS):-
26 current_prolog_flag(sgml_parser_defaults,PARSER_DEFAULTS),
27 current_prolog_flag(sgml_parser_callbacks,PARSER_CALLBACKS),!.
28
29get_sgml_parser_defs(
30 [defaults(false), space(remove),qualify_attributes(false),
31 32 33 34 dialect(xml)
35 ],
36 [max_errors(0),call(begin, on_begin),call(end, on_end)]).
37
38
39
42
43
44on_end('aiml', _) :- !,
45 ignore(retract(in_aiml_tag(_))).
46
47on_begin('aiml', Attribs, _) :- !,
48 asserta(in_aiml_tag(Attribs)).
49
50
51on_begin(Tag, Attr, Parser) :- skipOver(not(inLineNum)),
52 get_sgml_parser(Parser,context(Context)), Context=[Tag,aiml|_],
53 skipOver(debugFmt(on_begin(Tag, Attr, Context))),
54 skipOver(retract(in_aiml_tag(AimlAttr))),
55 56 get_sgml_parser(Parser,line(Line)),
57 get_sgml_parser(Parser,charpos(Offset)),
58 get_sgml_parser(Parser,file(File)),
59 global_pathname(File,Pathname),
60 61 skipOver(asserta(inLineNum)),
63 64 NEW = t_l:lineInfoElem(Pathname,Line:Offset, Context, element(Tag, Attr, no_content_yet)),
65 66 skipOver(ignore(retract(inLineNum))),
67 skipOver(asserta(in_aiml_tag(AimlAttr))),
68 assertz(NEW),!.
69
70on_begin(_Tag, _Attr, _Parser) :-!. 71
73on_begin_ctx(_TAG, _URL, _Parser, _Context) :- !. 74
75
76
77:- thread_local
78 xmlns/3. 79
80on_xmlns(rdf, URL, _Parser) :- !,debugFmt(on_xmlns(URL, rdf)),asserta(xmlns(URL, rdf, _)).
81on_xmlns(TAG, URL, _Parser) :- sub_atom(URL, _, _, _, 'rdf-syntax'), !,
82 debugFmt('rdf-syntax'(URL, TAG)),
83 immediateCall(_Ctx,asserta(xmlns(URL, rdf, _))).
84on_xmlns(TAG, URL, _Parser) :- debugFmt(on_xmlns(URL, TAG)).
85
86on_decl(URL, _Parser) :- debugFmt(on_decl(URL)).
87on_pi(URL, _Parser) :- debugFmt(on_pi(URL)).
88
89
90xml_error(TAG, URL, Parser) :- !, debugFmt(xml_error(URL, TAG, Parser)).
94
95load_aiml_structure_lineno(Attributes,Ctx,L):-must_maplist(load_inner_aiml_lineno(Attributes,Ctx),L),!.
96
97:-thread_local(t_l:lineInfoElem/4). 98
99load_inner_aiml_lineno(Attributes,Ctx,element(Tag,Attribs,ContentIn)):-
100 appendAttributes(Ctx,Attributes,Attribs,RightAttribs),
101 load_aiml_structure(Ctx,element(Tag,RightAttribs,ContentIn)),!.
102
117 131
132
133
134
135
136tls :- string_to_structure('<aiml><p>hi</p></aiml>',X),debugFmt(X).
137tls2 :- string_to_structure('<?xml version="1.0" encoding="ISO-8859-1"?>\n<aiml><p>hi</p></aiml>\n\n',X),debugFmt(X).
138
139string_to_structure(String,XMLSTRUCTURESIN):- fail, sformat(Strin0,'<pre>~s</pre>',[String]),string_to_structure0(Strin0,XMLSTRUCTURES),!,
140 prolog_must([element(pre,[],XMLSTRUCTURESIN)]=XMLSTRUCTURES).
141
142string_to_structure(String,XMLSTRUCTURES):- string_to_structure0(String,XMLSTRUCTURES),!.
143string_to_structure0(String,XMLSTRUCTURES):-
144 145 PARSER_DEFAULTS = [defaults(false), space(remove),qualify_attributes(false),dialect(xml)],
146 string_to_structure0(String,PARSER_DEFAULTS,XMLSTRUCTURES),!.
147
148string_to_structure(String,PARSER_DEFAULTS0,XMLSTRUCTURES):-string_to_structure0(String,PARSER_DEFAULTS0,XMLSTRUCTURES).
149
150string_to_structure0(String,PARSER_DEFAULTS0,XMLSTRUCTURESIN):-
151 setup_call_cleanup(((string_to_stream(String,In),new_sgml_parser(Parser, []))),
152 prolog_must((
153 atom_length(String,Len),
154 append(PARSER_DEFAULTS0,[],PARSER_DEFAULTS),
155 must_maplist(set_sgml_parser(Parser),PARSER_DEFAULTS),
156 string_parse_structure(Len, Parser, user:PARSER_DEFAULTS, XMLSTRUCTURES, In)
157 )),
158 (free_sgml_parser(Parser),close(In))),!,prolog_must(XMLSTRUCTURESIN=XMLSTRUCTURES).
159
160string_parse_structure(Len,Parser, M:Options, Document, In) :-
161 quietly((catch(call(call,string_parse_structure_opts_547(Parser),In,M,Options,Options2),_,string_parse_structure_opts(Parser,In,M,Options,Options2)))),
162 163 sgml:sgml_parse(Parser,
164 [ document(Document),
165 source(In),
166 parse(input),
167 content_length(Len)
168 | Options2
169 ]).
170
176
177string_parse_structure_opts(Parser,In,M,Options,Options2):-
178 sgml:set_parser_options(Options, Parser, In, Options1),
179 sgml:parser_meta_options(Options1, M, Options2).
180
181
182
183fileToLineInfoElements(Ctx,File,XMLSTRUCTURES):-
184 atom_concat(File,'.term',Elis),
185 ((fail,file_newer(Elis,File)) ->
186 termFileContents(Ctx,Elis,XMLSTRUCTURES) ;
187 fileToLineInfoElements0(Ctx,File,XMLSTRUCTURES)).
188
189
190termFileContents(_Ctx,File,termFileContents(File)):-!. 191termFileContents(_Ctx,File,element(aiml,[],XMLSTRUCTURES)):-
192 setup_call_cleanup((open(File, read, In, [])),
193 findall(Elem,((repeat,read(In,Elem),((Elem\=end_of_file)->true;!))),XMLSTRUCTURES), close(In)),!
193.
194
195
196
197
199fileToLineInfoElements0(Ctx,F0,XMLSTRUCTURES):-
200 global_pathname(F0,File),
201 retractall(t_l:lineInfoElem(File,_,_,_)),
202 setup_call_cleanup((open(File, read, In, [type(binary)]),new_sgml_parser(Parser, [])),
203
204 prolog_must((
205 get_sgml_parser_defs(PARSER_DEFAULTS,PARSER_CALLBACKS),
206 must_maplist(set_sgml_parser(Parser),[file(File)|PARSER_DEFAULTS]),
208 sgml_parse(Parser,[source(In)|PARSER_CALLBACKS]))
208)
208,
209
210 (free_sgml_parser(Parser),close(In)))
210,!,
211
212
213 fileToLineInfoElements2(Ctx,File,XMLSTRUCTURES)
213.
214
215
217fileToLineInfoElements2(Ctx,File,XMLSTRUCTURES):-!,
218 get_sgml_parser_defs(PARSER_DEFAULTS,_PARSER_CALLBACKS),
219 setup_call_cleanup(open(File, read, In, [type(binary)]),(load_structure(In,Whole, [file(File)|PARSER_DEFAULTS]),!,
220 load_inner_aiml_w_lineno(File,[],[],[],Ctx,Whole,XMLSTRUCTURES)),close(In)),!.
221
222load_inner_aiml_w_lineno(_SrcFile,_OuterTag,_Parent,_Attributes,_Ctx,Atom,Atom):-(atomic(Atom);var(Atom)),!.
223load_inner_aiml_w_lineno(SrcFile,OuterTag,Parent,Attributes,Ctx,[H|T],LL):-!,
224 must_maplist(load_inner_aiml_w_lineno(SrcFile,OuterTag,Parent,Attributes,Ctx),[H|T],LL),!.
225
227load_inner_aiml_w_lineno(SrcFile,[OuterTag|PREV],Parent,Attributes,Ctx,element(Tag,Attribs,ContentIn),element(Tag,NewAttribs,ContentOut)):-
228 Context=[Tag,OuterTag|_],
229 MATCH = t_l:lineInfoElem(SrcFile,Line:Offset, Context, element(Tag, Attribs, no_content_yet)),
230 MATCH,!,
231 ignore(Line = nonfile),
232 ignore(Offset = nonfile),
233 appendAttributes(Ctx,Attributes,Attribs,RightAttribs),
234 235 Src = nosrc,
236 appendAttributes(Ctx,[srcfile=SrcFile:Line-Offset,srcinfo=Src],RightAttribs,NewAttribs),
237 ignore(retract(MATCH)),
238 (member(Tag,[aiml,topic]) -> NextAttribs = NewAttribs ; NextAttribs = []),
239 must_maplist(load_inner_aiml_w_lineno(SrcFile,[Tag,OuterTag|PREV],Parent,NextAttribs,Ctx),ContentIn,ContentOut),!.
240
241load_inner_aiml_w_lineno(SrcFile,MORE,Parent,Attributes,Ctx,element(Tag,Attribs,ContentIn),element(Tag,RightAttribs,ContentOut)):-
242 appendAttributes(Ctx,Attributes,Attribs,RightAttribs),
243 load_inner_aiml_w_lineno(SrcFile,[Tag|MORE],Parent,[],Ctx,ContentIn,ContentOut),!.
244
245load_inner_aiml_w_lineno(SrcFile,OuterTag,Parent,Attributes,_Ctx,L,L):-
246 aiml_error(load_inner_aiml_w_lineno(SrcFile,OuterTag,Parent,Attributes,L)).
247
248
249addAttribsToXML(Attribs,element(Tag,Pre,Content),element(Tag,Post,Content)):-appendAttributes(_Ctx,Pre,Attribs,Post),!.
250addAttribsToXML(Attribs,[H|T],OUT):-must_maplist(addAttribsToXML(Attribs),[H|T],OUT),!.
251addAttribsToXML(Attribs,OUT,OUT):-!,debugFmt(addAttribsToXML(Attribs,OUT,OUT)),!.
252
253
254appendAttributes(_Ctx,L,R,AA):-hotrace((mergeAppend0(L,R,A),list_to_set_safe(A,AA))),!.
255mergeAppend0(L,R,R):-var(L),!,var(R),!.
256mergeAppend0(L,R,A):-var(R),append(L,R,A),!.
257mergeAppend0(L,R,A):-var(L),append(L,R,A),!.
258mergeAppend0(L,[R|RR],A):-eqmember(R,L),mergeAppend0(L,RR,A).
259mergeAppend0([L|LL],R,A):-eqmember(L,R),mergeAppend0(LL,R,A).
260mergeAppend0(L,R,A):-append(L,R,A).
261
262eqmember(E,List):-copy_term_numvars(E:List,E0:List0),member(E0,List0).
263
264list_to_set_safe(A,A):-(var(A);atomic(A)),!.
265list_to_set_safe([A|AA],BB):- (not(not(lastMember(A,AA))) -> list_to_set_safe(AA,BB) ; (list_to_set_safe(AA,NB),BB=[A|NB])),!.
266
267
268lastMember(E,List):-hotrace(lastMember0(E,List)).
269
270lastMember0(_E,List):-var(List),!,fail.
271lastMember0(E,[H|List]):-lastMember0(E,List);E=H.
272
273lastMember(E,List,Rest):-hotrace(lastMember0(E,List,Rest)).
274
275lastMember0(E,List,Rest):-lastMember0(E,List),!,delete_safe(List,E,Rest),!.
276lastMember0(E,List,Rest):-lastMember0(EE,List),!,lastMember0(E,EE,Rest),!,atrace. 277
278delete_safe(List,_E,Rest):-var(List),!,Rest=List.
279delete_safe(List,E,Rest):-is_list(List),!,delete(List,E,Rest).
280delete_safe([H|List],E,Rest):- H==E,!,delete_safe(List,E,Rest).
281delete_safe([H|List],E,[H|Rest]):-delete_safe(List,E,Rest).
282
283
284getKeyValue(FullList,N=V):-lastMember(N=V,FullList),!.
286addKeyValue(FullList,NV):- prolog_must((not(ground(FullList)),nonvar(NV))),append(_Closed,[NV|_],FullList),!.
287
288
289lastMember2(E,List):-to_open_list(_,Closed,_Open,List),reverse(Closed,Rev),member(E,Rev).
290
292
293
294to_open_list(FullList,Closed,Open,FullList) :- append(Closed,Open,FullList),var(Open),!.
295to_open_list(Closed,Closed,Open,FullList) :- append(Closed,Open,FullList),!.
296
297
298copy_term_numvars(OLD,NEW):-copy_term(OLD,NEW),numbervars(NEW,0,_).
299
300
301error_catch(C,E,F):-E=error(E1,E2),!,catch(C,error(E1,E2),F).
302error_catch(C,E,F):-nonvar(E),!,catch(C,E,F).
303error_catch(C,E,F):-catch(C,E,(needs_rethrown(E),F)).
304needs_rethrown(E):- functor(aiml_goto,E,_),!,throw(E).
305needs_rethrown(E):- functor(aiml_novalue,E,_),!,throw(E).
306needs_rethrown(_).
307
308hotrace(G):- quietly(G).
309
310
311:-thread_local(in_aiml_tag/1). 312:-thread_local(inLineNum/0). 313
314skipOver(_).
315
316aiml_error(G):- wdmsg(aiml_error(G)),!.
317
318immediateCall(_,G):- trace,call(G).
319
320:- use_module(library(memfile)).
323string_to_stream(String,InStream):- string(String),string_to_atom(String,Atom),!,string_to_stream(Atom,InStream).
324string_to_stream(Atom,InStream):- atom_to_memory_file(Atom, Handle),open_memory_file(Handle,read,InStream).
325
326
328load_aiml_structure(Ctx,O):-atomic(O),!,debugFmt(load_aiml_structure(Ctx,O)),!.
329
330
331:- fixup_exports.
Utility LOGICMOO XML READER
Allows you to read xml files from prolog.