1:- module(lsp_formatter_parser, [ reified_format_for_file/2,
    2                                  emit_reified/2 ]).

LSP Parser For Formatter

Module for parsing Prolog source code, for subsequent formatting

author
- James Cash

*/

   11:- use_module(library(apply)).   12:- use_module(library(apply_macros)).   13:- use_module(library(clpfd)).   14:- use_module(library(rbtrees)).   15:- use_module(library(readutil), [ read_file_to_string/3 ]).   16
   17:- include('_lsp_path_add.pl').   18
   19:- use_module(lsp(lsp_reading_source), [ file_lines_start_end/2,
   20                                         read_term_positions/2,
   21                                         file_offset_line_position/4 ]).   22
   23:- thread_local current_source_string/1.
 reified_format_for_file(+Path:string, -Reified:list) is det
Read the prolog source file at Path into a flattened list of terms indicating content, comments, and whitespace.
   29reified_format_for_file(Path, Reified) :-
   30    retractall(current_source_string(_)),
   31    read_file_to_string(Path, FileString, []),
   32    read_term_positions(Path, TermsWithPos),
   33    setup_call_cleanup(
   34        assertz(current_source_string(FileString)),
   35        expand_term_positions(TermsWithPos, Reified0),
   36        retractall(current_source_string(_))
   37    ),
   38    sort(1, @=<, Reified0, Reified1),
   39    file_lines_start_end(Path, LinesStartEnd),
   40    InitState = _{last_line: 1, last_char: 0, line_bounds: LinesStartEnd},
   41    add_whitespace_terms(InitState, Reified1, Reified2),
   42    simplify_reified_terms(Reified2, Reified).
   43
   44% Remove no-longer needed positioning information to make things less
   45% annoying for later steps.
   46simplify_reified_terms(In, Out) :-
   47    maplist(simplify_reified_term, In, Out).
   48
   49simplify_reified_term(newline, newline) :- !.
   50simplify_reified_term(white(N), white(N)) :- !.
   51simplify_reified_term(Term, SimpleTerm) :-
   52    % all other terms have two extra args, From & To
   53    compound_name_arguments(Term, Name, [_, _|Args]),
   54    ( Args = []
   55    -> SimpleTerm = Name
   56    ;  compound_name_arguments(SimpleTerm, Name, Args) ).
 emit_reified(+To, +Reified) is det
Output source file as read with reified_format_for_file/2 to To, as format/3.
   62emit_reified(_, []) :- !.
   63emit_reified(To, [Term|Rest]) :-
   64    emit_reified_(To, Term),
   65    emit_reified(To, Rest).
   66
   67emit_reified_(To, newline) => format(To, "~n", []).
   68emit_reified_(To, white(N)) =>
   69    length(Whites, N),
   70    maplist(=(0' ), Whites),
   71    format(To, "~s", [Whites]).
   72emit_reified_(To, comma) => format(To, ",", []).
   73emit_reified_(To, simple(T)) =>
   74    format(To, "~s", [T]).
   75emit_reified_(To, simple_quoted(T)) =>
   76    format(To, "'~q'", [T]).
   77emit_reified_(To, string(T)), string(T) =>
   78    format(To, "~q", [T]).
   79emit_reified_(To, string(T)) =>
   80    % string term, but not a string, must be codes
   81    format(To, "`~s`", [T]).
   82emit_reified_(To, term_begin(Func, _, Parens)) =>
   83    ( Parens = true
   84    -> Format = "~w("
   85    ;  Format = "~w" ),
   86    format(To, Format, [Func]).
   87emit_reified_(To, term_end(Parens, TermState)) =>
   88    ( Parens = true
   89    -> MaybeClose = ")"
   90    ; MaybeClose = "" ),
   91    ( TermState = toplevel
   92    -> MaybeStop = "."
   93    ; MaybeStop = "" ),
   94    format(To, "~w~w", [MaybeClose, MaybeStop]).
   95emit_reified_(To, list_begin) =>
   96    format(To, "[", []).
   97emit_reified_(To, list_tail) =>
   98    format(To, "|", []).
   99emit_reified_(To, list_end) =>
  100    format(To, "]", []).
  101emit_reified_(To, comment(Text)) =>
  102    format(To, "~s", [Text]).
  103emit_reified_(To, braces_begin) =>
  104    format(To, "{", []).
  105emit_reified_(To, braces_end) =>
  106    format(To, "}", []).
  107emit_reified_(To, parens_begin) =>
  108    format(To, "(", []).
  109emit_reified_(To, parens_end) =>
  110    format(To, ")", []).
  111emit_reified_(To, dict_tag('$var'(Tag))) =>
  112    format(To, "~w", [Tag]).
  113emit_reified_(To, dict_tag(Tag)), var(Tag) =>
  114    % if Tag is still a var, it must be anonymous
  115    format(To, "_", []).
  116emit_reified_(To, dict_tag(Tag)) =>
  117    % if Tag is still a var, it must be anonymous
  118    format(To, "~w", [Tag]).
  119emit_reified_(To, dict_begin) =>
  120    format(To, "{", []).
  121emit_reified_(To, dict_sep) =>
  122    format(To, ":", []).
  123emit_reified_(To, dict_end) =>
  124    format(To, "}", []).
 add_whitespace_terms(+State:dict, +Reified:list, -Formatted:list) is det
Add terms indicating whitespace and newlines in between positioned terms, as created by reified_format_for_file/2.
  130add_whitespace_terms(_State, [], [newline]) :- !.
  131add_whitespace_terms(State, [Term|Terms], Out) :-
  132    arg(1, Term, TermStart),
  133    stream_position_at_offset(State.line_bounds, TermStart, Pos),
  134    sync_position_whitespace(State, Pos, Out, Out1),
  135    Out1 = [Term|Out2],
  136    arg(2, Term, TermEnd),
  137    stream_position_at_offset(State.line_bounds, TermEnd, EndPos),
  138    update_state_position(State, EndPos, State1),
  139    add_whitespace_terms(State1, Terms, Out2).
  140
  141expand_term_positions([], []).
  142expand_term_positions([InfoDict|Rest], Expanded0) :-
  143    ( InfoDict.comments \= []
  144    -> expand_comments_positions(InfoDict.comments, Expanded0, Expanded1)
  145    ;  Expanded1 = Expanded0 ),
  146
  147    Term = InfoDict.term,
  148    ( Term \= end_of_file % just for comments at the end
  149    -> expand_subterm_positions(Term, toplevel, InfoDict.subterm,
  150                                Expanded1, Expanded2)
  151    ;  Expanded2 = Expanded1 ),
  152
  153    expand_term_positions(Rest, Expanded2).
  154
  155expand_comments_positions([], Tail, Tail) :- !.
  156expand_comments_positions([Comment|Rest], Expanded, Tail) :-
  157    expand_comment_positions(Comment, Expanded, Tail0),
  158    expand_comments_positions(Rest, Tail0, Tail).
  159
  160expand_comment_positions(CommentPos-Comment, Expanded, ExpandedTail) :-
  161    term_end_position(Comment, CommentEndPosRel),
  162    increment_stream_position(CommentPos, CommentEndPosRel, CommentEndPos),
  163    stream_position_data(char_count, CommentPos, From),
  164    stream_position_data(char_count, CommentEndPos, To),
  165    Expanded = [comment(From, To, Comment)|ExpandedTail].
  166
  167expand_subterm_positions(Term, _TermState, term_position(_From, _To, FFrom, FTo, SubPoses),
  168                         Expanded, ExTail), functor(Term, ',', _, _) =>
  169    % special-case comma terms to be reified as commas
  170    Expanded = [comma(FFrom, FTo)|ExpandedTail0],
  171    functor(Term, _, Arity, _),
  172    expand_term_subterms_positions(false, Term, Arity, 1, SubPoses, ExpandedTail0, ExTail).
  173expand_subterm_positions(Term, TermState, term_position(From, To, FFrom, FTo, SubPoses),
  174                         Expanded, ExTail) =>
  175    % using functor/4 to allow round-tripping zero-arity functors
  176    functor(Term, _Func, Arity, TermType),
  177    current_source_string(FileString),
  178    Length is FTo - FFrom,
  179    sub_string(FileString, FFrom, Length, _, FuncString),
  180    atom_string(Func, FuncString),
  181    % better way to tell if term is parenthesized?
  182    % read functor from current_source_string/1 (as with simple below)
  183    % and see if parens are there?
  184    (  From = FFrom, max_subterm_to(SubPoses, SubTermMax), To > SubTermMax
  185    -> ( Parens = true, FTo1 is FTo + 1 ) % add space for the parenthesis
  186    ;  ( Parens = false, FTo1 = FTo )  ),
  187    Expanded = [term_begin(FFrom, FTo1, Func, TermType, Parens)|ExpandedTail0],
  188    expand_term_subterms_positions(Parens, Term, Arity, 1, SubPoses,
  189                                   ExpandedTail0, ExpandedTail1),
  190    succ(To0, To),
  191    ExpandedTail1 = [term_end(To0, To, Parens, TermState)|ExpandedTail2],
  192    maybe_add_comma(TermState, To, ExpandedTail2, ExTail).
  193expand_subterm_positions(Term, TermState, string_position(From, To), Expanded, Tail) =>
  194    Expanded = [string(From, To, Term)|Tail0],
  195    maybe_add_comma(TermState, To, Tail0, Tail).
  196expand_subterm_positions(_Term, TermState, From-To, Expanded, Tail) =>
  197    current_source_string(FileString),
  198    Length is To - From,
  199    sub_string(FileString, From, Length, _, SimpleString),
  200    Expanded = [simple(From, To, SimpleString)|Tail0],
  201    maybe_add_comma(TermState, To, Tail0, Tail).
  202expand_subterm_positions(Term, TermState, list_position(From, To, Elms, HasTail), Expanded, Tail) =>
  203    assertion(is_listish(Term)),
  204    ListBeginTo is From + 1,
  205    Expanded = [list_begin(From, ListBeginTo)|Expanded1],
  206    expand_list_subterms_positions(Term, Elms, Expanded1, Expanded2),
  207    succ(To0, To),
  208    (  HasTail = none
  209    -> Expanded2 = [list_end(To0, To)|Tail0]
  210    ;  ( arg(1, HasTail, TailFrom),
  211         succ(TailBarFrom, TailFrom),
  212         Expanded2 = [list_tail(TailBarFrom, TailFrom)|Expanded3],
  213         list_tail(Term, Elms, ListTail),
  214         expand_subterm_positions(ListTail, false, HasTail, Expanded3, Expanded4),
  215         Expanded4 = [list_end(To0, To)|Tail0] )  ),
  216    maybe_add_comma(TermState, To, Tail0, Tail).
  217expand_subterm_positions(Term, TermState, brace_term_position(From, To, BracesPos), Expanded, Tail) =>
  218    BraceTo is From + 1,
  219    Expanded = [braces_begin(From, BraceTo)|Tail0],
  220    Term = {Term0},
  221    expand_subterm_positions(Term0, false, BracesPos, Tail0, Tail1),
  222    succ(To1, To),
  223    Tail1 = [braces_end(To1, To)|Tail2],
  224    maybe_add_comma(TermState, To1, Tail2, Tail).
  225expand_subterm_positions(Term, TermState, parentheses_term_position(From, To, ContentPos),
  226                         Expanded, Tail) =>
  227    ParenTo is From + 1,
  228    Expanded = [parens_begin(From, ParenTo)|Tail0],
  229    expand_subterm_positions(Term, false, ContentPos, Tail0, Tail1),
  230    succ(To1, To),
  231    Tail1 = [parens_end(To1, To)|Tail2],
  232    maybe_add_comma(TermState, To, Tail2, Tail).
  233expand_subterm_positions(Term, TermState, dict_position(_From, To, TagFrom, TagTo, KeyValPos),
  234                         Expanded, Tail) =>
  235    is_dict(Term, Tag),
  236    DictBraceTo is TagTo + 1,
  237    Expanded = [dict_tag(TagFrom, TagTo, Tag), dict_begin(TagTo, DictBraceTo)|Tail0],
  238    expand_dict_kvs_positions(Term, KeyValPos, Tail0, Tail1),
  239    succ(To1, To),
  240    Tail1 = [dict_end(To1, To)|Tail2],
  241    maybe_add_comma(TermState, To, Tail2, Tail).
  242
  243maybe_add_comma(subterm_item, CommaFrom, Tail0, Tail) :- !,
  244    CommaTo is CommaFrom + 1,
  245    Tail0 = [comma(CommaFrom, CommaTo)|Tail].
  246maybe_add_comma(_, _, Tail, Tail).
  247
  248is_listish(L) :- \+ var(L), !.
  249is_listish([]).
  250is_listish([_|_]).
  251
  252list_tail(Tail, [], Tail) :- !.
  253list_tail([_|Rest], [_|PosRest], Tail) :-
  254    list_tail(Rest, PosRest, Tail).
  255
  256max_subterm_to(SubPoses, SubTermMaxTo) :-
  257    aggregate_all(max(To),
  258                  ( member(Pos, SubPoses),
  259                    arg(2, Pos, To) ),
  260                  SubTermMaxTo).
  261
  262expand_dict_kvs_positions(_, [], Tail, Tail) :- !.
  263expand_dict_kvs_positions(Dict, [Pos|Poses], Expanded0, Tail) :-
  264    Pos = key_value_position(_From, To, SepFrom, SepTo, Key, KeyPos, ValuePos),
  265    get_dict(Key, Dict, Value),
  266    expand_subterm_positions(Key, false, KeyPos, Expanded0, Expanded1),
  267    Expanded1 = [dict_sep(SepFrom, SepTo)|Expanded2],
  268    expand_subterm_positions(Value, false, ValuePos, Expanded2, Expanded3),
  269    CommaTo is To + 1,
  270    ( Poses = [_|_]
  271    -> Expanded3 = [comma(To, CommaTo)|Expanded4]
  272    ;  Expanded3 = Expanded4 ),
  273    expand_dict_kvs_positions(Dict, Poses, Expanded4, Tail).
  274
  275% possible for the list to still have a tail when out of positions
  276expand_list_subterms_positions(_, [], Tail, Tail) :- !.
  277expand_list_subterms_positions([Term|Terms], [Pos|Poses], Expanded, Tail) :-
  278    ( Poses = [_|_]
  279    -> TermState = subterm_item
  280    ;  TermState = false ),
  281    expand_subterm_positions(Term, TermState, Pos, Expanded, Expanded1),
  282    expand_list_subterms_positions(Terms, Poses, Expanded1, Tail).
  283
  284expand_term_subterms_positions(_Parens, _Term, _Arity, _Arg, [], Tail, Tail) :- !.
  285expand_term_subterms_positions(Parens, Term, Arity, Arg, [SubPos|Poses], Expanded, ExpandedTail) :-
  286    assertion(between(1, Arity, Arg)),
  287    arg(Arg, Term, SubTerm),
  288    ( Parens = true, Arg < Arity
  289    -> State = subterm_item
  290    ;  State = false ),
  291    expand_subterm_positions(SubTerm, State, SubPos, Expanded, Expanded0),
  292    succ(Arg, Arg1),
  293    expand_term_subterms_positions(Parens, Term, Arity, Arg1, Poses, Expanded0, ExpandedTail).
  294
  295increment_stream_position(StartPos, RelPos, EndPos) :-
  296    stream_position_data(char_count, StartPos, StartCharCount),
  297    stream_position_data(char_count, RelPos, RelCharCount),
  298    CharCount is StartCharCount + RelCharCount,
  299    stream_position_data(byte_count, StartPos, StartByteCount),
  300    stream_position_data(byte_count, RelPos, RelByteCount),
  301    ByteCount is StartByteCount + RelByteCount,
  302    stream_position_data(line_count, StartPos, StartLineCount),
  303    stream_position_data(line_count, RelPos, RelLineCount),
  304    stream_position_data(line_position, StartPos, StartLinePosition),
  305    stream_position_data(line_position, RelPos, RelLinePosition),
  306    ( RelLineCount == 1
  307    -> LineCount = StartLineCount,
  308       LinePosition is StartLinePosition + RelLinePosition
  309    ; ( LineCount is StartLineCount + RelLineCount - 1,
  310        LinePosition = RelLinePosition ) ),
  311    EndPos = '$stream_position_data'(CharCount, LineCount, LinePosition, ByteCount).
  312
  313update_state_position(State0, EndPos, State2) :-
  314    stream_position_data(line_count, EndPos, EndLineCount),
  315    stream_position_data(line_position, EndPos, EndLinePos),
  316    put_dict(last_line, State0, EndLineCount, State1),
  317    put_dict(last_char, State1, EndLinePos, State2).
  318
  319sync_position_whitespace(State, TermPos, Expanded, ExpandedTail) :-
  320    PrevLineCount = State.last_line,
  321    stream_position_data(line_count, TermPos, NewLineCount),
  322    NewLines is NewLineCount - PrevLineCount,
  323    ( NewLines > 0
  324    -> n_copies_of(NewLines, newline, Expanded, Expanded0),
  325       PrevLinePosition = 0
  326    ;  ( Expanded = Expanded0,
  327         PrevLinePosition = State.last_char )
  328    ),
  329
  330    stream_position_data(line_position, TermPos, NewLinePosition),
  331    Whitespace is NewLinePosition - PrevLinePosition,
  332    ( Whitespace > 0
  333    -> Expanded0 = [white(Whitespace)|ExpandedTail]
  334    ;  Expanded0 = ExpandedTail ).
 stream_position_at_offset(+LineCharMap:rbtree, +Offset:Int, -Pos) is det
  337stream_position_at_offset(LineCharMap, To, EndPos) :-
  338    CharCount = To,
  339    ByteCount = To, % need to check for multibyte...
  340    file_offset_line_position(LineCharMap, To, LineCount, LinePosition),
  341    % breaking the rules, building an opaque term
  342    EndPos = '$stream_position_data'(CharCount, LineCount, LinePosition, ByteCount).
  343
  344% Helpers
  345
  346term_end_position(Term, Position) :-
  347    setup_call_cleanup(
  348        open_null_stream(Out),
  349        ( write(Out, Term),
  350          stream_property(Out, position(Position))
  351        ),
  352        close(Out)).
  353
  354n_copies_of(0, _, Tail, Tail) :- !.
  355n_copies_of(N, ToCopy, [ToCopy|Rest], Tail) :-
  356    N1 is N - 1,
  357    n_copies_of(N1, ToCopy, Rest, Tail)