1/* Part of SWI-Prolog 2 3 Author: Wouter Jansweijer and Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 1985-2013, University of Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(readln, 36 [ readln/1, % -Line 37 readln/2, % -Line, +EOL 38 readln/5 % See above 39 ]). 40:- autoload(library(lists),[append/3,member/2]). 41 42 43/** <module> Read line as list of tokens 44 45Read a sentence from the current input stream and convert it into a list 46of atoms and numbers: 47 48 - Letters(A-Z, a-z) are converted to atoms 49 - Digits (0-9) (and a '.' if a real number) are converted to numbers 50 Some obscure 'rounding' is done, so you have most of the times 51 only 6 significant digits with an exponent part. (This is caused 52 by the system predicate 'name'. If you want looonnnggg numbers 53 then define digits as parts of words). 54 (N.B. reals work only if '.' is not defined as 'stop-char' but 55 'escape' will work in this case) 56 57 The reader is _flexible_, you can define yourself: 58 59 - the character on which reading will stop 60 (this character is escapable with \ 61 to read a \ type this character twice!!) 62 - the character(s) that make up a word (execpt the 63 characters A-Z, a-z that always make up words!! 64 and (real)-numbers that always are grouped together!!) 65 - whether you want conversion of uppercase letters to 66 lowercase letters. 67 68 readln/1 69 The default setting for readln/1 is 70 - read up till newline 71 - see underscore('_') and numbers 0-9 as part of words 72 - make lowercase 73 74 - If nothing is read readln/1 succeeds with [] 75 - If an end_of_file is read readln/1 succeeds with [..|end_of_file] 76 77 78 readln/5 79 This predicate gives you the flexibility. 80 It succeeds with arg1 = list of word&atoms 81 arg2 = Ascii code of last character 82 (but '-1' in case of ^D). 83 To change one or more of the defaults you have to 84 instantiate argument3 and/or argument4 and/or argument5. 85 !! Uninstantiated arguments are defaulted !! 86 - stop character(s): 87 instantiate argument 3 with the list of ASCII code's 88 of the desired stop characters (Note: you can also 89 say: ".!?", what is equivalent to [46,33,63]). 90 - word character(s): 91 instantiate argument 4 with the list of ASCII code's 92 of the desired word-part characters (Note: wou can also 93 say: "", what is equivalent to [] ; i.e. no extra 94 characters). 95 - lowercase conversion: 96 instantiate argument 5 with lowercase 97 98 99Main predicates provided: 100 101 readln(P) - Read a sentence up till NewLine and 102 unify <P> with the list of atoms/numbers 103 (identical to: 104 readln(P, [10],"_01213456789",uppercase).) 105 readln(P, LastCh) - idem as above but the second argument is unified 106 with the last character read (the ascii-code for 107 the stop-character or -1) 108 readln(P, LastCh, Arg1, Arg2, Arg3) 109 - idem as above but the default setting is changed 110 for the instantiated args: 111 Arg1: List of stop characters 112 Arg2: List of word_part characters 113 Arg3: uppercase/lowercase conversion 114 115Examples: 116 read_sentence(P,Case) :- 117 readln(P,_,".!?","_0123456789",Case). 118 119 read_in(P) :- % with numbers as separate 120 readln(P,Eof,_,"", _). % entities. 121 122 read_atom(A) :- % stop on newline, 123 readln(A,_,_," ",_). % space is part of word 124 125@deprecated Old code. Not maintained and probably not at the 126 right level of abstraction. Not locale support. 127@see library(readutil), nlp package. 128*/ 129 130 131readln(Read) :- % the default is read up to EOL 132 string_codes("_0123456789", Arg2), 133 rl_readln(Line, LastCh, [10], Arg2, uppercase), 134 ( LastCh == end_of_file 135 -> append(Line,[end_of_file], Read) 136 ; Read = Line 137 ). 138 139readln(Read, LastCh):- 140 string_codes("_0123456789", Arg2), 141 rl_readln(Read, LastCh, [10], Arg2, uppercase). 142 143readln(P, EOF, StopChars, WordChars, Case) :- 144 ( var(StopChars) 145 -> Arg1 = [10] 146 ; Arg1 = StopChars 147 ), 148 ( var(WordChars) 149 -> string_codes("01234567890_", Arg2) 150 ; Arg2 = WordChars 151 ), 152 ( var(Case) 153 -> Arg3 = lowercase 154 ; Arg3 = Case 155 ), 156 rl_readln(P, EOF, Arg1, Arg2, Arg3). 157 158rl_readln(P, EOF, StopChars, WordChars, Case) :- 159 rl_initread(L, EOF, StopChars), 160 rl_blanks(L, LL), 161 !, 162 rl_words(P, LL,[], options(WordChars, Case)), 163 !. 164 165rl_initread(S, EOF, StopChars) :- 166 get_code(K), 167 rl_readrest(K, S, EOF, StopChars). 168 169rl_readrest(-1, [], end_of_file, _) :- !. 170rl_readrest(0'\\, [K1|R], EOF, StopChars) :- 171 get_code(K1), % skip it, take next char 172 get_code(K2), 173 rl_readrest(K2, R, EOF, StopChars). 174rl_readrest(K, [K], K, StopChars) :- % the stop char(s) 175 member(K, StopChars), 176 !. 177rl_readrest(K, [K|R], EOF, StopChars) :- % the normal case 178 get_code(K1), 179 rl_readrest(K1, R, EOF, StopChars). 180 181rl_words([W|Ws], S1, S4, Options) :- 182 rl_word(W, S1, S2, Options), 183 !, 184 rl_blanks(S2, S3), 185 rl_words(Ws, S3, S4, Options). 186rl_words([], S1, S2, _) :- 187 rl_blanks(S1, S2), 188 !. 189rl_words([], S, S, _). 190 191rl_word(N, [46|S1], S3, _) :- % the dot can be in the beginning of 192 rl_basic_num(N1, S1, S2), % a real number. 193 !, 194 rl_basic_nums(Rest, S2, S3, dot), % only ONE dot IN a number !! 195 name(N,[48, 46, N1|Rest]). % i.e '0.<number>' 196rl_word(N, S0, S2, _) :- 197 rl_basic_num(N1, S0, S1), 198 !, 199 rl_basic_nums(Rest, S1, S2, _), 200 name(N,[N1|Rest]). 201rl_word(W, S0, S2, Options) :- 202 rl_basic_char(C1, S0, S1, Options), 203 !, 204 rl_basic_chars(Rest, S1, S2, Options), 205 name(W, [C1|Rest]). 206rl_word(P,[C|R], R, _) :- 207 name(P, [C]), 208 !. 209 210rl_basic_chars([A|As], S0, S2, Options) :- 211 rl_basic_char(A, S0, S1, Options), 212 !, 213 rl_basic_chars(As, S1, S2, Options). 214rl_basic_chars([], S, S, _). 215 216rl_basic_nums([46,N|As], [46|S1], S3, Dot) :- % a dot followed by >= one digit 217 var(Dot), % but not found a dot already 218 rl_basic_num(N, S1, S2), 219 !, 220 rl_basic_nums(As, S2, S3, dot). 221rl_basic_nums([A|As], S0, S2, Dot) :- 222 rl_basic_num(A, S0, S1), 223 !, 224 rl_basic_nums(As, S1, S2, Dot). 225rl_basic_nums([], S, S, _). 226 227rl_blanks([C|S0], S1) :- 228 rl_blank(C), 229 !, 230 rl_blanks(S0, S1). 231rl_blanks(S, S). 232 233/* Basic Character types that form rl_words together */ 234 235rl_basic_char(A, [C|S], S, options(WordChars, Case)) :- 236 rl_lc(C, A, WordChars, Case). 237 238rl_basic_num(N, [N|R], R) :- 239 code_type(N, digit). 240 241rl_blank(X) :- 242 code_type(X, space). 243 244rl_lc(X, X1, _, Case) :- 245 code_type(X, upper), 246 !, 247 rl_fix_case(Case, X, X1). 248rl_lc(X, X, _, _) :- 249 code_type(X, lower). 250rl_lc(X, X, WordChars, _) :- 251 memberchk(X, WordChars). 252 253rl_fix_case(lowercase, U, L) :- 254 !, 255 code_type(L, lower(U)). 256rl_fix_case(_, C, C)