. (utf8) 2:- module( 3 stream_ext, 4 [ 5 copy_stream_type/2, % +In, +Out 6 guess_encoding/2, % +In, -Encoding 7 number_of_open_files/1, % -N 8 read_line_to_atom/2, % +In, -Atom 9 read_line_to_number/2, % +In, -N 10 read_stream_to_atom/2, % +In, -Atom 11 read_stream_to_string/2, % +In, -String 12 recode_stream/3, % +In, +FromEncoding, -Out 13 stream_line/2, % +In, -Line 14 stream_line_column/3, % +Stream, -Line, -Column 15 stream_metadata/2, % +Stream, -Metadata 16 wc/2 % +In, -Stats 17 ] 18).
27:- use_module(library(aggregate)). 28:- use_module(library(apply)). 29:- use_module(library(archive)). 30:- use_module(library(error)). 31:- use_module(library(lists)). 32:- use_module(library(readutil)). 33:- use_module(library(yall)). 34 35:- use_module(library(dcg)). 36:- use_module(library(dict)). 37:- use_module(library(debug_ext)). 38:- use_module(library(file_ext), []). 39:- use_module(library(hash_stream)). 40:- use_module(library(os_ext)). 41:- use_module(library(string_ext)). 42:- use_module(library(thread_ext)). 43 44:- thread_local 45 debug_indent/1. 46 47debug_indent(0).
58copy_stream_type(In, Out) :-
59 stream_property(In, type(Type)),
60 set_stream(Out, type(Type)),
61 copy_stream_data(In, Out).
70% The value bom causes the stream to check whether the current 71% character is a Unicode BOM marker. If a BOM marker is found, the 72% encoding is set accordingly and the call succeeds; otherwise the 73% call fails. 74guess_encoding(In, utf8) :- 75 set_stream(In, encoding(bom)), !. 76guess_encoding(In, Enc) :- 77 process_create( 78 path(uchardet), 79 [], 80 [stdin(pipe(ProcIn)),stdout(pipe(ProcOut))] 81 ), 82 set_stream(ProcIn, encoding(octet)), 83 call_cleanup( 84 copy_stream_data(In, ProcIn), 85 close(ProcIn) 86 ), 87 call_cleanup( 88 ( 89 read_string(ProcOut, String1), 90 string_strip(String1, "\n", String2), 91 atom_string(Enc0, String2) 92 ), 93 close(ProcOut) 94 ), 95 clean_encoding_(Enc0, Enc), 96 (Enc == unknown -> throw(error(cannot_guess_encoding,guess_encoding/2)) ; true).
102number_of_open_files(N) :-
103 expand_file_name('/proc/self/fd/*', Files),
104 length(Files, N).
110read_line_to_atom(In, Atom) :-
111 repeat,
112 read_line_to_codes(In, Codes),
113 ( Codes == end_of_file
114 -> !, fail
115 ; atom_codes(Atom, Codes)
116 ).
122read_line_to_number(In, N) :-
123 read_line_to_atom(In, Atom),
124 atom_number(Atom, N).
130read_stream_to_atom(In, Atom) :-
131 read_stream_to_codes(In, Codes),
132 atom_codes(Atom, Codes).
138read_stream_to_string(In, String) :-
139 read_stream_to_codes(In, Codes),
140 string_codes(String, Codes).
See the output of command ~iconv -l~ for the supported encodings.
Assumes that In and Out are binary streams.
152recode_stream(In, Enc0, Out) :-
153 clean_encoding_(Enc0, Enc),
154 process_create(
155 path(iconv),
156 ['-c','-f',Enc,'-t','utf-8',-],
157 [stdin(pipe(ProcIn)),stdout(pipe(ProcOut))]
158 ),
159 create_detached_thread(
160 call_cleanup(
161 copy_stream_type(In, ProcIn),
162 close(ProcIn)
163 )
164 ),
165 set_stream(ProcOut, type(binary)),
166 ( ground(Out)
167 -> call_cleanup(
168 copy_stream_data(ProcOut, Out),
169 close(ProcOut)
170 )
171 ; ProcOut = Out
172 ).
178stream_line(In, Line) :-
179 repeat,
180 read_line_to_string(In, Line),
181 (Line == end_of_file -> !, fail ; true).
187stream_line_column(Stream, Line, Column) :-
188 stream_property(Stream, position(Pos)),
189 stream_position_data(line_count, Pos, Line),
190 stream_position_data(line_position, Pos, Column).
196stream_metadata(Stream, Meta) :- 197 stream_property(Stream, position(Pos)), 198 stream_position_data(byte_count, Pos, NumBytes), 199 stream_position_data(char_count, Pos, NumChars), 200 stream_position_data(line_count, Pos, NumLines), 201 stream_property(Stream, newline(Newline)), 202 Meta = _{ 203 bytes: NumBytes, 204 characters: NumChars, 205 lines: NumLines, 206 newline: Newline 207 }. 208 209 210 211/* 212%! wc(+In:istream, -Lines:nonneg) is det. 213% 214% Native implementation of line count. 215% 216% @tbd Compare performance with wc/4. 217 218wc(In, Lines) :- 219 Counter = count(0), 220 repeat, 221 read_line_to_codes(In, Codes), 222 ( Codes == end_of_file 223 -> !, 224 arg(1, Counter, Lines) 225 ; arg(1, Counter, Count1), 226 Count2 is Count1 + 1, 227 nb_setarg(1, Counter, Count2), 228 fail 229 ). 230*/
236wc(In, Stats) :- 237 setup_call_cleanup( 238 process_create(path(wc), [], [stdin(pipe(ProcIn)),stdout(pipe(Out))]), 239 ( 240 create_detached_thread( 241 call_cleanup( 242 copy_stream_data(In, ProcIn), 243 close(ProcIn) 244 ) 245 ), 246 read_wc(Out, Lines, Words, Bytes) 247 ), 248 close(Out) 249 ), 250 Stats = _{ 251 number_of_bytes: Bytes, 252 number_of_lines: Lines, 253 number_of_words: Words 254 }. 255 256read_wc(Out, Lines, Words, Bytes) :- 257 read_stream_to_codes(Out, Codes), 258 phrase(read_wc(Lines, Words, Bytes), Codes, _). 259 260% E.g., `427 1818 13512 README.md`. 261read_wc(Lines, Words, Bytes) --> 262 whites, 263 integer(Lines), 264 whites, 265 integer(Words), 266 whites, 267 integer(Bytes). 268 269 270 271 272 273% GENERICS %
277clean_encoding_(Enc1, Enc4) :- 278 downcase_atom(Enc1, Enc2), 279 (encoding_alias_(Enc2, Enc3) -> true ; Enc3 = Enc2), 280 (encoding_promotion_(Enc3, Enc4) -> true ; Enc4 = Enc3). 281 282encoding_alias_('iso-8859-1', iso_latin_1). 283encoding_alias_('iso-8859-15.latin1', 'iso8859-15'). 284encoding_alias_(macroman, macintosh). 285encoding_alias_('us-ascii', ascii). 286encoding_alias_('utf-8', utf8). 287 288encoding_promotion_(ascii, utf8)
Support for working with streams.
Some predicates in the library use the external programs âiconvâ and âuchardetâ.
*/