LSP Parser For Formatter

Module for parsing Prolog source code, for subsequent formatting

author: - James Cash
To be done: - Files using quasi-quotations currently aren't supported; need to teach prolog_read_source_term/4 to load correctly
*/

14:- use_module(library(apply)). 15:- use_module(library(apply_macros)). 16:- use_module(library(clpfd)). 17:- use_module(library(prolog_source)). 18:- use_module(library(readutil), [ read_line_to_codes/2, 19 read_file_to_string/3 ]). 20 21%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 22% Reading in terms 23%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

30file_lines_start_end(Path, LineCharRange) :- 31 Acc = line_data([], line(1, 0)), 32 setup_call_cleanup( 33 open(Path, read, Stream), 34 ( repeat, 35 read_line_to_codes(Stream, Line), 36 stream_property(Stream, position(Position)), 37 stream_position_data(char_count, Position, NewLineStart), 38 arg(2, Acc, line(LastLine, LastLineStart)), 39 arg(1, Acc, Data), 40 LastLineEnd is NewLineStart - 1, 41 nb_setarg(1, Acc, [line_start_end(LastLine, LastLineStart, LastLineEnd)|Data]), 42 NextLine is LastLine + 1, 43 nb_setarg(2, Acc, line(NextLine, NewLineStart)), 44 Line == end_of_file, ! 45 ), 46 close(Stream)), 47 arg(1, Acc, RangesReversed), 48 reverse(RangesReversed, LineCharRange).

66read_term_positions(Path, TermsWithPositions) :- 67 Acc = data([]), 68 prolog_canonical_source(Path, SourceId), 69 setup_call_cleanup( 70 prolog_open_source(SourceId, Stream), 71 ( repeat, 72 prolog_read_source_term(Stream, Term, _Ex, [term_position(TermPos), 73 subterm_positions(SubTermPos), 74 variable_names(VarNames), 75 comments(Comments), 76 syntax_errors(error)]), 77 maplist([Name=Var]>>( Var = var(Name) ), VarNames), 78 arg(1, Acc, Lst), 79 nb_setarg(1, Acc, [_{term: Term, pos: TermPos, subterm: SubTermPos, 80 varible_names: VarNames, comments: Comments}|Lst]), 81 Term = end_of_file, ! 82 ), 83 prolog_close_source(Stream)), 84 arg(1, Acc, TermsWithPositionsRev), 85 reverse(TermsWithPositionsRev, TermsWithPositions). 86 87:- thread_local current_source_string/1.

93reified_format_for_file(Path, Reified) :- 94 retractall(current_source_string(_)), 95 read_file_to_string(Path, FileString, []), 96 read_term_positions(Path, TermsWithPos), 97 setup_call_cleanup( 98 assertz(current_source_string(FileString)), 99 expand_term_positions(TermsWithPos, Reified0), 100 retractall(current_source_string(_)) 101 ), 102 sort(1, @=<, Reified0, Reified1), 103 file_lines_start_end(Path, LinesStartEnd), 104 InitState = _{last_line: 1, last_char: 0, line_bounds: LinesStartEnd}, 105 add_whitespace_terms(InitState, Reified1, Reified2), 106 simplify_reified_terms(Reified2, Reified). 107 108% Remove no-longer needed positioning information to make things less 109% annoying for later steps. 110simplify_reified_terms(In, Out) :- 111 maplist(simplify_reified_term, In, Out). 112 113simplify_reified_term(newline, newline) :- !. 114simplify_reified_term(white(N), white(N)) :- !. 115simplify_reified_term(Term, SimpleTerm) :- 116 % all other terms have two extra args, From & To 117 compound_name_arguments(Term, Name, [_, _|Args]), 118 ( Args = [] 119 -> SimpleTerm = Name 120 ; compound_name_arguments(SimpleTerm, Name, Args) ).

126emit_reified(_, []) :- !. 127emit_reified(To, [Term|Rest]) :- 128 emit_reified_(To, Term), 129 emit_reified(To, Rest). 130 131emit_reified_(To, newline) => format(To, "~n", []). 132emit_reified_(To, white(N)) => 133 length(Whites, N), 134 maplist(=(0' ), Whites), 135 format(To, "~s", [Whites]). 136emit_reified_(To, comma) => format(To, ",", []). 137emit_reified_(To, simple(T)) => 138 format(To, "~s", [T]). 139emit_reified_(To, simple_quoted(T)) => 140 format(To, "'~q'", [T]). 141emit_reified_(To, string(T)), string(T) => 142 format(To, "~q", [T]). 143emit_reified_(To, string(T)) => 144 % string term, but not a string, must be codes 145 format(To, "`~s`", [T]). 146emit_reified_(To, term_begin(Func, _, Parens)) => 147 ( Parens = true 148 -> Format = "~q(" 149 ; Format = "~w" ), 150 format(To, Format, [Func]). 151emit_reified_(To, term_end(Parens, TermState)) => 152 ( Parens = true 153 -> MaybeClose = ")" 154 ; MaybeClose = "" ), 155 ( TermState = toplevel 156 -> MaybeStop = "." 157 ; MaybeStop = "" ), 158 format(To, "~w~w", [MaybeClose, MaybeStop]). 159emit_reified_(To, list_begin) => 160 format(To, "[", []). 161emit_reified_(To, list_tail) => 162 format(To, "|", []). 163emit_reified_(To, list_end) => 164 format(To, "]", []). 165emit_reified_(To, comment(Text)) => 166 format(To, "~s", [Text]). 167emit_reified_(To, braces_begin) => 168 format(To, "{", []). 169emit_reified_(To, braces_end) => 170 format(To, "}", []). 171emit_reified_(To, parens_begin) => 172 format(To, "(", []). 173emit_reified_(To, parens_end) => 174 format(To, ")", []). 175emit_reified_(To, dict_tag(var(Tag))) => 176 format(To, "~w", [Tag]). 177emit_reified_(To, dict_tag(Tag)), var(Tag) => 178 % if Tag is still a var, it must be anonymous 179 format(To, "_", []). 180emit_reified_(To, dict_tag(Tag)) => 181 % if Tag is still a var, it must be anonymous 182 format(To, "~w", [Tag]). 183emit_reified_(To, dict_begin) => 184 format(To, "{", []). 185emit_reified_(To, dict_sep) => 186 format(To, ":", []). 187emit_reified_(To, dict_end) => 188 format(To, "}", []).

194add_whitespace_terms(_State, [], [newline]) :- !. 195add_whitespace_terms(State, [Term|Terms], Out) :- 196 arg(1, Term, TermStart), 197 stream_position_at_offset(State.line_bounds, TermStart, Pos), 198 sync_position_whitespace(State, Pos, Out, Out1), 199 Out1 = [Term|Out2], 200 arg(2, Term, TermEnd), 201 stream_position_at_offset(State.line_bounds, TermEnd, EndPos), 202 update_state_position(State, EndPos, State1), 203 add_whitespace_terms(State1, Terms, Out2). 204 205expand_term_positions([], []). 206expand_term_positions([InfoDict|Rest], Expanded0) :- 207 ( InfoDict.comments \= [] 208 -> expand_comments_positions(InfoDict.comments, Expanded0, Expanded1) 209 ; Expanded1 = Expanded0 ), 210 211 Term = InfoDict.term, 212 ( Term \= end_of_file % just for comments at the end 213 -> expand_subterm_positions(Term, toplevel, InfoDict.subterm, 214 Expanded1, Expanded2) 215 ; Expanded2 = Expanded1 ), 216 217 expand_term_positions(Rest, Expanded2). 218 219expand_comments_positions([], Tail, Tail) :- !. 220expand_comments_positions([Comment|Rest], Expanded, Tail) :- 221 expand_comment_positions(Comment, Expanded, Tail0), 222 expand_comments_positions(Rest, Tail0, Tail). 223 224expand_comment_positions(CommentPos-Comment, Expanded, ExpandedTail) :- 225 term_end_position(Comment, CommentEndPosRel), 226 increment_stream_position(CommentPos, CommentEndPosRel, CommentEndPos), 227 stream_position_data(char_count, CommentPos, From), 228 stream_position_data(char_count, CommentEndPos, To), 229 Expanded = [comment(From, To, Comment)|ExpandedTail]. 230 231expand_subterm_positions(Term, _TermState, term_position(_From, _To, FFrom, FTo, SubPoses), 232 Expanded, ExTail), functor(Term, ',', _, _) => 233 % special-case comma terms to be reified as commas 234 Expanded = [comma(FFrom, FTo)|ExpandedTail0], 235 functor(Term, _, Arity, _), 236 expand_term_subterms_positions(false, Term, Arity, 1, SubPoses, ExpandedTail0, ExTail). 237expand_subterm_positions(Term, TermState, term_position(From, To, FFrom, FTo, SubPoses), 238 Expanded, ExTail) => 239 % using functor/4 to allow round-tripping zero-arity functors 240 functor(Term, Func, Arity, TermType), 241 % better way to tell if term is parenthesized? 242 % read functor from current_source_string/1 (as with simple below) 243 % and see if parens are there? 244 ( From = FFrom, max_subterm_to(SubPoses, SubTermMax), To > SubTermMax 245 -> ( Parens = true, FTo1 is FTo + 1 ) % add space for the parenthesis 246 ; ( Parens = false, FTo1 = FTo ) ), 247 Expanded = [term_begin(FFrom, FTo1, Func, TermType, Parens)|ExpandedTail0], 248 expand_term_subterms_positions(Parens, Term, Arity, 1, SubPoses, 249 ExpandedTail0, ExpandedTail1), 250 succ(To0, To), 251 ExpandedTail1 = [term_end(To0, To, Parens, TermState)|ExpandedTail2], 252 maybe_add_comma(TermState, To, ExpandedTail2, ExTail). 253expand_subterm_positions(Term, TermState, string_position(From, To), Expanded, Tail) => 254 Expanded = [string(From, To, Term)|Tail0], 255 maybe_add_comma(TermState, To, Tail0, Tail). 256expand_subterm_positions(_Term, TermState, From-To, Expanded, Tail) => 257 current_source_string(FileString), 258 Length is To - From, 259 sub_string(FileString, From, Length, _, SimpleString), 260 Expanded = [simple(From, To, SimpleString)|Tail0], 261 maybe_add_comma(TermState, To, Tail0, Tail). 262expand_subterm_positions(Term, TermState, list_position(From, To, Elms, HasTail), Expanded, Tail) => 263 assertion(is_listish(Term)), 264 ListBeginTo is From + 1, 265 Expanded = [list_begin(From, ListBeginTo)|Expanded1], 266 expand_list_subterms_positions(Term, Elms, Expanded1, Expanded2), 267 succ(To0, To), 268 ( HasTail = none 269 -> Expanded2 = [list_end(To0, To)|Tail0] 270 ; ( arg(1, HasTail, TailFrom), 271 succ(TailBarFrom, TailFrom), 272 Expanded2 = [list_tail(TailBarFrom, TailFrom)|Expanded3], 273 list_tail(Term, Elms, ListTail), 274 expand_subterm_positions(ListTail, false, HasTail, Expanded3, Expanded4), 275 Expanded4 = [list_end(To0, To)|Tail0] ) ), 276 maybe_add_comma(TermState, To, Tail0, Tail). 277expand_subterm_positions(Term, TermState, brace_term_position(From, To, BracesPos), Expanded, Tail) => 278 BraceTo is From + 1, 279 Expanded = [braces_begin(From, BraceTo)|Tail0], 280 Term = {Term0}, 281 expand_subterm_positions(Term0, false, BracesPos, Tail0, Tail1), 282 succ(To1, To), 283 Tail1 = [braces_end(To1, To)|Tail2], 284 maybe_add_comma(TermState, To1, Tail2, Tail). 285expand_subterm_positions(Term, TermState, parentheses_term_position(From, To, ContentPos), 286 Expanded, Tail) => 287 ParenTo is From + 1, 288 Expanded = [parens_begin(From, ParenTo)|Tail0], 289 expand_subterm_positions(Term, false, ContentPos, Tail0, Tail1), 290 succ(To1, To), 291 Tail1 = [parens_end(To1, To)|Tail2], 292 maybe_add_comma(TermState, To, Tail2, Tail). 293expand_subterm_positions(Term, TermState, dict_position(_From, To, TagFrom, TagTo, KeyValPos), 294 Expanded, Tail) => 295 is_dict(Term, Tag), 296 DictBraceTo is TagTo + 1, 297 Expanded = [dict_tag(TagFrom, TagTo, Tag), dict_begin(TagTo, DictBraceTo)|Tail0], 298 expand_dict_kvs_positions(Term, KeyValPos, Tail0, Tail1), 299 succ(To1, To), 300 Tail1 = [dict_end(To1, To)|Tail2], 301 maybe_add_comma(TermState, To, Tail2, Tail). 302 303maybe_add_comma(subterm_item, CommaFrom, Tail0, Tail) :- !, 304 CommaTo is CommaFrom + 1, 305 Tail0 = [comma(CommaFrom, CommaTo)|Tail]. 306maybe_add_comma(_, _, Tail, Tail). 307 308is_listish(L) :- \+ var(L), !. 309is_listish([]). 310is_listish([_|_]). 311 312list_tail(Tail, [], Tail) :- !. 313list_tail([_|Rest], [_|PosRest], Tail) :- 314 list_tail(Rest, PosRest, Tail). 315 316max_subterm_to(SubPoses, SubTermMaxTo) :- 317 aggregate_all(max(To), 318 ( member(Pos, SubPoses), 319 arg(2, Pos, To) ), 320 SubTermMaxTo). 321 322expand_dict_kvs_positions(_, [], Tail, Tail) :- !. 323expand_dict_kvs_positions(Dict, [Pos|Poses], Expanded0, Tail) :- 324 Pos = key_value_position(_From, To, SepFrom, SepTo, Key, KeyPos, ValuePos), 325 get_dict(Key, Dict, Value), 326 expand_subterm_positions(Key, false, KeyPos, Expanded0, Expanded1), 327 Expanded1 = [dict_sep(SepFrom, SepTo)|Expanded2], 328 expand_subterm_positions(Value, false, ValuePos, Expanded2, Expanded3), 329 CommaTo is To + 1, 330 ( Poses = [_|_] 331 -> Expanded3 = [comma(To, CommaTo)|Expanded4] 332 ; Expanded3 = Expanded4 ), 333 expand_dict_kvs_positions(Dict, Poses, Expanded4, Tail). 334 335% possible for the list to still have a tail when out of positions 336expand_list_subterms_positions(_, [], Tail, Tail) :- !. 337expand_list_subterms_positions([Term|Terms], [Pos|Poses], Expanded, Tail) :- 338 ( Poses = [_|_] 339 -> TermState = subterm_item 340 ; TermState = false ), 341 expand_subterm_positions(Term, TermState, Pos, Expanded, Expanded1), 342 expand_list_subterms_positions(Terms, Poses, Expanded1, Tail). 343 344expand_term_subterms_positions(_Parens, _Term, _Arity, _Arg, [], Tail, Tail) :- !. 345expand_term_subterms_positions(Parens, Term, Arity, Arg, [SubPos|Poses], Expanded, ExpandedTail) :- 346 assertion(between(1, Arity, Arg)), 347 arg(Arg, Term, SubTerm), 348 ( Parens = true, Arg < Arity 349 -> State = subterm_item 350 ; State = false ), 351 expand_subterm_positions(SubTerm, State, SubPos, Expanded, Expanded0), 352 succ(Arg, Arg1), 353 expand_term_subterms_positions(Parens, Term, Arity, Arg1, Poses, Expanded0, ExpandedTail). 354 355increment_stream_position(StartPos, RelPos, EndPos) :- 356 stream_position_data(char_count, StartPos, StartCharCount), 357 stream_position_data(char_count, RelPos, RelCharCount), 358 CharCount is StartCharCount + RelCharCount, 359 stream_position_data(byte_count, StartPos, StartByteCount), 360 stream_position_data(byte_count, RelPos, RelByteCount), 361 ByteCount is StartByteCount + RelByteCount, 362 stream_position_data(line_count, StartPos, StartLineCount), 363 stream_position_data(line_count, RelPos, RelLineCount), 364 stream_position_data(line_position, StartPos, StartLinePosition), 365 stream_position_data(line_position, RelPos, RelLinePosition), 366 ( RelLineCount == 1 367 -> LineCount = StartLineCount, 368 LinePosition is StartLinePosition + RelLinePosition 369 ; ( LineCount is StartLineCount + RelLineCount - 1, 370 LinePosition = RelLinePosition ) ), 371 EndPos = '$stream_position_data'(CharCount, LineCount, LinePosition, ByteCount). 372 373update_state_position(State0, EndPos, State2) :- 374 stream_position_data(line_count, EndPos, EndLineCount), 375 stream_position_data(line_position, EndPos, EndLinePos), 376 put_dict(last_line, State0, EndLineCount, State1), 377 put_dict(last_char, State1, EndLinePos, State2). 378 379sync_position_whitespace(State, TermPos, Expanded, ExpandedTail) :- 380 PrevLineCount = State.last_line, 381 stream_position_data(line_count, TermPos, NewLineCount), 382 NewLines is NewLineCount - PrevLineCount, 383 ( NewLines > 0 384 -> n_copies_of(NewLines, newline, Expanded, Expanded0), 385 PrevLinePosition = 0 386 ; ( Expanded = Expanded0, 387 PrevLinePosition = State.last_char ) 388 ), 389 390 stream_position_data(line_position, TermPos, NewLinePosition), 391 Whitespace is NewLinePosition - PrevLinePosition, 392 ( Whitespace > 0 393 -> Expanded0 = [white(Whitespace)|ExpandedTail] 394 ; Expanded0 = ExpandedTail ). 395 396file_offset_line_position(LineCharMap, CharCount, Line, LinePosition) :- 397 member(line_start_end(Line, Start, End), LineCharMap), 398 between(Start, End, CharCount), 399 LinePosition #= CharCount - Start, !. 400 401stream_position_at_offset(LineCharMap, To, EndPos) :- 402 CharCount = To, 403 ByteCount = To, % need to check for multibyte... 404 file_offset_line_position(LineCharMap, To, LineCount, LinePosition), 405 % breaking the rules, building an opaque term 406 EndPos = '$stream_position_data'(CharCount, LineCount, LinePosition, ByteCount). 407 408% Helpers 409 410term_end_position(Term, Position) :- 411 setup_call_cleanup( 412 open_null_stream(Out), 413 ( write(Out, Term), 414 stream_property(Out, position(Position)) 415 ), 416 close(Out)). 417 418n_copies_of(0, _, Tail, Tail) :- !. 419n_copies_of(N, ToCopy, [ToCopy|Rest], Tail) :- 420 N1 is N - 1, 421 n_copies_of(N1, ToCopy, Rest, Tail)