1:- encoding(utf8).
    2:- module(
    3  dcg,
    4  [
    5    '...'//0,
    6    '...'//1,                 % -Codes
    7    add_indent//1,            % +Indent
    8    alpha//1,                 % ?Code
    9    alphanum//1,              % ?Code
   10    atom_phrase/2,            % :Dcg_0, ?Atom
   11    atom_phrase/3,            % :Dcg_0, +Atom1, ?Atom2
   12    dcg_atom//2,              % :Dcg_1, ?Atom
   13    dcg_between//2,           % +Low, +High
   14    dcg_between//3,           % +Low, +High, ?Code
   15    dcg_boolean//1,           % ?Boolean
   16    dcg_call//1,              % :Dcg_0
   17    dcg_call//2,              % :Dcg_1, ?Arg1
   18    dcg_call//3,              % :Dcg_2, ?Arg1, ?Arg2
   19    dcg_call//4,              % :Dcg_3, ?Arg1, ?Arg2, ?Arg3
   20    dcg_call//5,              % :Dcg_4, ?Arg1, ?Arg2, ?Arg3, ?Arg4
   21    dcg_call//6,              % :Dcg_5, ?Arg1, ?Arg2, ?Arg3, ?Arg4, ?Arg5
   22    dcg_char//1,              % ?Char
   23    dcg_peek//1,              % +Length
   24    dcg_pp_boolean//1,        % +Boolean
   25    dcg_string//2,            % :Dcg_1, ?String
   26    dcg_with_output_to/1,     % :Dcg_0
   27    dcg_with_output_to/2,     % +Sink, :Dcg_0
   28    default//2,               % :Dcg_0, ?Default_0
   29    digit_weight//1,          % ?N
   30    ellipsis//2,              % +Atom, +MaxLength
   31    error_location/2,         % +SyntaxError, +Input, +Length
   32    error_location/3,         % +SyntaxError, +Input, +Length
   33    indent//1,                % +Indent
   34    must_see//1,              % :Dcg_0
   35    must_see_code//2,         % +Code, :Skip_0
   36    nl//0,
   37    nonblank//0,
   38    nonblanks//0,
   39    parsing//0,
   40    remainder_as_atom//1,     % -Remainder
   41    remainder_as_string//1,   % -Remainder
   42    string_phrase/2,          % :Dcg_0, ?String
   43    string_phrase/3,          % :Dcg_0, +String1, -String2
   44    tab//1,                   % +N
   45    term//1,                  % +Term
   46    thousands//1,             % +N
   47    ws//0
   48  ]
   49).   50:- reexport(library(dcg/basics)).

Extended support for DCGs

*/

   56:- use_module(library(pure_input)).   57
   58:- use_module(library(code_ext)).   59:- use_module(library(list_ext)).   60:- use_module(library(string_ext)).   61
   62:- meta_predicate
   63    atom_phrase(//, ?),
   64    atom_phrase(//, ?, ?),
   65    dcg_atom(3, ?, ?, ?),
   66    dcg_call(//, ?, ?),
   67    dcg_call(3, ?, ?, ?),
   68    dcg_call(4, ?, ?, ?, ?),
   69    dcg_call(5, ?, ?, ?, ?, ?),
   70    dcg_call(6, ?, ?, ?, ?, ?, ?),
   71    dcg_call(7, ?, ?, ?, ?, ?, ?, ?),
   72    dcg_string(3, ?, ?, ?),
   73    dcg_with_output_to(//),
   74    dcg_with_output_to(+, //),
   75    default(//, //, ?, ?),
   76    must_see(//, ?, ?),
   77    must_see_code(+, //, ?, ?),
   78    string_phrase(//, ?),
   79    string_phrase(//, ?, ?).
 ...//
 ...(-Codes:list(code))//
Wrapper around string//1.
   88... -->
   89  ...(_).
   90
   91
   92...(Codes) -->
   93  string(Codes).
 add_indent(+Indent:positive_integer)//
   99add_indent(N), "\n", indent(N) -->
  100  "\n", !,
  101  add_indent(N).
  102add_indent(N), [Code] -->
  103  [Code], !,
  104  add_indent(N).
  105add_indent(_) --> "".
 alpha//
 alpha(?Code:code)//
  112alpha -->
  113  alpha(_).
  114
  115
  116alpha(Code) --> dcg_between(0'a, 0'z, Code).
  117alpha(Code) --> dcg_between(0'A, 0'Z, Code).
 alphanum(?Code:code)//
  123alphanum(Code) --> alpha(Code).
  124alphanum(Code) --> digit(Code).
 atom_phrase(:Dcg_0, ?Atom:atom)// is nondet
 atom_phrase(:Dcg_0, +Atom1:atomic, ?Atom2:atom)// is nondet
  131atom_phrase(Dcg_0, Atom) :-
  132  var(Atom), !,
  133  phrase(Dcg_0, Codes),
  134  atom_codes(Atom, Codes).
  135atom_phrase(Dcg_0, Atom) :-
  136  atom_codes(Atom, Codes),
  137  phrase(Dcg_0, Codes).
  138
  139
  140atom_phrase(Dcg_0, Atom1, Atom2) :-
  141  must_be(atom, Atom1),
  142  atom_codes(Atom1, Codes1),
  143  phrase(Dcg_0, Codes1, Codes2),
  144  atom_codes(Atom2, Codes2).
 dcg_atom(:Dcg_1, ?Atom:atom)//
This meta-DCG rule handles the translation between the word and the character level of parsing/generating.

Typically, grammar A specifies how words can be formed out of characters. A character is a code, and a word is a list of codes. Grammar B specifies how sentences can be built out of words. Now the word is an atom, and the sentences in a list of atoms.

This means that at some point, words in grammar A, i.e. lists of codes, need to be translated to words in grammar B, i.e. atoms.

This is where dcg_atom//2 comes in. We illustrate this with a schematic example:

sentence([W1,...,Wn]) -->
  word2(W1),
  ...,
  word2(Wn).

word2(W) -->
  dcg_atom(word1, W).

word1([C1, ..., Cn]) -->
  char(C1),
  ...,
  char(Cn).
throws
- instantiation_error
- type_error
  182dcg_atom(Dcg_1, Atom) -->
  183  {var(Atom)}, !,
  184  dcg_call(Dcg_1, Codes),
  185  {atom_codes(Atom, Codes)}.
  186dcg_atom(Dcg_1, Atom) -->
  187  {atom_codes(Atom, Codes)},
  188  dcg_call(Dcg_1, Codes).
 dcg_between(+Low:nonneg, +High:nonneg)//
 dcg_between(+Low:nonneg, +High:nonneg, ?Code:nonneg)//
  195dcg_between(Low, High) -->
  196  dcg_between(Low, High, _).
  197
  198
  199dcg_between(Low, High, Code) -->
  200  [Code],
  201  {between(Low, High, Code)}.
 dcg_boolean(+Boolean:boolean)// is det
dcg_boolean(-Boolean:boolean)// is det
  208dcg_boolean(false) --> "false".
  209dcg_boolean(true) --> "true".
 dcg_call(:Dcg_0)//
 dcg_call(:Dcg_1, ?Arg1)//
 dcg_call(:Dcg_2, ?Arg1, ?Arg2)//
 dcg_call(:Dcg_3, ?Arg1, ?Arg2, ?Arg3)//
 dcg_call(:Dcg_4, ?Arg1, ?Arg2, ?Arg3, ?Arg4)//
 dcg_call(:Dcg_5, ?Arg1, ?Arg2, ?Arg3, ?Arg4, ?Arg5)//
See also
- call/[1-8]
  222dcg_call(Dcg_0, X, Y) :-
  223  call(Dcg_0, X, Y).
  224
  225
  226dcg_call(Dcg_1, Arg1, X, Y) :-
  227  call(Dcg_1, Arg1, X, Y).
  228
  229
  230dcg_call(Dcg_2, Arg1, Arg2, X, Y) :-
  231  call(Dcg_2, Arg1, Arg2, X, Y).
  232
  233
  234dcg_call(Dcg_3, Arg1, Arg2, Arg3, X, Y) :-
  235  call(Dcg_3, Arg1, Arg2, Arg3, X, Y).
  236
  237
  238dcg_call(Dcg_4, Arg1, Arg2, Arg3, Arg4, X, Y) :-
  239  call(Dcg_4, Arg1, Arg2, Arg3, Arg4, X, Y).
  240
  241
  242dcg_call(Dcg_5, Arg1, Arg2, Arg3, Arg4, Arg5, X, Y) :-
  243  call(Dcg_5, Arg1, Arg2, Arg3, Arg4, Arg5, X, Y).
  250dcg_char(Char) -->
  251  {var(Char)}, !,
  252  [Code],
  253  {char_code(Char, Code)}.
  254dcg_char(Char) -->
  255  {char_code(Char, Code)},
  256  [Code].
 dcg_peek(+Length:nonneg)//
  262dcg_peek(Len, Codes, Codes) :-
  263  length(Prefix, Len),
  264  prefix(Prefix, Codes),
  265  string_codes(String, Prefix),
  266  format(user_output, "\n|~s|\n", [String]).
 dcg_pp_boolean(+Boolean:boolean)// is det
  272dcg_pp_boolean(false) --> !, "❌".
  273dcg_pp_boolean(true) --> "✓".
 dcg_string(:Dcg_1, ?String)//
  279dcg_string(Dcg_1, String) -->
  280  {var(String)}, !,
  281  dcg_call(Dcg_1, Codes),
  282  {string_codes(String, Codes)}.
  283dcg_string(Dcg_1, String) -->
  284  {string_codes(String, Codes)},
  285  dcg_call(Dcg_1, Codes).
 dcg_with_output_to(:Dcg_0) is nondet
 dcg_with_output_to(+Sink, :Dcg_0) is nondet
  292dcg_with_output_to(Dcg_0) :-
  293  dcg_with_output_to(current_output, Dcg_0).
  294
  295
  296dcg_with_output_to(Sink, Dcg_0) :-
  297  phrase(Dcg_0, Codes),
  298  with_output_to(Sink, put_codes(Codes)).
 default(:Dcg_0, ?Default_0)//
  304default(Dcg_0, _) -->
  305  Dcg_0, !.
  306default(_, Default_0) -->
  307  Default_0.
 digit_weight(?Weight:between(0,9))//
  313digit_weight(Weight) -->
  314  parsing, !,
  315  [Code],
  316  {code_type(Code, digit(Weight))}.
  317digit_weight(Weight) -->
  318  {code_type(Code, digit(Weight))},
  319  [Code].
 ellipsis(+Original:text, +MaxLength:beteen(2,inf))// is det
Arguments:
MaxLength- The maximum length of the generated ellipsed string.
  328ellipsis(Original, MaxLength) -->
  329  {string_ellipsis(Original, MaxLength, Ellipsed)},
  330  atom(Ellipsed).
 error_location(+SyntaxError:compound, +Input:list(code)) is det
 error_location(+SyntaxError:compound, +Input:list(code), +Length:nonneg) is det
  337error_location(Error, Input) :-
  338  error_location(Error, Input, 80).
  339
  340
  341error_location(error(syntax_error(What),Location), Input, Length) :-
  342  subsumes_term(end_of_file-CharCount, Location),
  343  end_of_file-CharCount = Location,
  344  length(After, CharCount),
  345  % BUG: Should have detected determinism.
  346  once(append(Before, After, Input)),
  347  length(Before, BL),
  348  string_codes("…", Elipsis),
  349  string_codes("\n**here**\n", Here),
  350  (   BL =< Length
  351  ->  BC = Before
  352  ;   length(BC0, Length),
  353      % BUG: Should have detected determinism.
  354      once(append(_, BC0, Before)),
  355      append(Elipsis, BC0, BC)
  356  ),
  357  length(After, AL),
  358  (   AL =< Length
  359  ->  AC = After
  360  ;   length(AC0, Length),
  361      % BUG: Should have detected determinism.
  362      once(append(AC0, _, After)),
  363      append(AC0, Elipsis, AC)
  364  ),
  365  % BUG: Should have detected determinism.
  366  once(append(Here, AC, HAC)),
  367  append([0'\n|BC], HAC, ContextCodes),
  368  string_codes(Context, ContextCodes), !,
  369  syntax_error(error_location(What,Context)).
  370error_location(Error, _, _) :-
  371  throw(Error).
 indent(+Indent:nonneg)// is det
  377indent(0) --> !, "".
  378indent(N1) -->
  379  "  ", !,
  380  {N2 is N1 - 1},
  381  indent(N2).
 must_see(:Dcg_0)//
  387must_see(Dcg_0, X, Y) :-
  388  call(Dcg_0, X, Y), !.
  389must_see(_:Dcg_0) -->
  390  {
  391    Dcg_0 =.. [Pred|_],
  392    format(string(Call), "~w", [Dcg_0])
  393  },
  394  syntax_error(expected(Pred,Call)).
 must_see_code(+Code, :Skip_0)//
  400must_see_code(Code, Skip_0) -->
  401  [Code], !,
  402  Skip_0.
  403must_see_code(Code, _) -->
  404  {char_code(Char, Code)},
  405  syntax_error(expected(Char)).
 nl// is det
  411nl -->
  412  "\n".
 nonblank//
Wrapper around nonblank//1 from library(dcg/basics).
  420nonblank -->
  421  nonblank(_).
 nonblanks//
  427nonblanks -->
  428  nonblanks(_).
 parsing// is semidet
Succeeds if currently parsing a list of codes (rather than generating a list of codes).
  437parsing(H, H) :-
  438  nonvar(H).
 remainder_as_atom(-Atom:atom)// is det
  444remainder_as_atom(Atom) -->
  445  remainder(Codes),
  446  {atom_codes(Atom, Codes)}.
 remainder_as_string(-Remainder:string)// is det
  452remainder_as_string(String) -->
  453  remainder(Codes),
  454  {string_codes(String, Codes)}.
 string_phrase(:Dcg_0, ?String) is nondet
 string_phrase(:Dcg_0, +String1, ?String2) is nondet
  461string_phrase(Dcg_0, String) :-
  462  var(String), !,
  463  phrase(Dcg_0, Codes),
  464  string_codes(String, Codes).
  465string_phrase(Dcg_0, String) :-
  466  string_codes(String, Codes),
  467  phrase(Dcg_0, Codes).
  468
  469
  470string_phrase(Dcg_0, String1, String2) :-
  471  string_codes(String1, Codes1),
  472  phrase(Dcg_0, Codes1, Codes2),
  473  string_codes(String2, Codes2).
 tab(+N:nonneg)// is det
  479tab(0) --> !, "".
  480tab(N1) -->
  481  " ",
  482  {N2 is N1 - 1},
  483  tab(N2).
 term(+Term)// is det
  489term(Term) -->
  490  {format(atom(Atom), "~w", [Term])},
  491  atom(Atom).
 thousands(+N:integer)// is det
  497thousands(N) -->
  498  {format(atom(Atom), "~D", [N])},
  499  atom(Atom).
 ws// is det
  505ws --> white.
  506% NO-BREAK SPACE (0240, 0xA0)
  507ws --> [160]