. (utf8) 2:- module( 3 http_client2, 4 [ 5 http_call/2, % +Uri, :Goal_1 6 http_call/3, % +Uri, :Goal_1, +Options 7 http_download/1, % +Uri 8 http_download/2, % +Uri, ?File 9 http_download/3, % +Uri, ?File, +Options 10 http_head2/2, % +Uri, +Options 11 http_header_name_label/2, % +Name, -Label 12 http_last_modified/2, % +Uri, -Time 13 http_metadata_content_type/2, % +Metas, -MediaType 14 http_metadata_file_name/2, % +Metas, -File 15 http_metadata_final_uri/2, % +Metas, -Uri 16 http_metadata_last_modified/2, % +Metas, -Time 17 http_metadata_link/3, % +Metas, +Relation, -Uri 18 http_metadata_status/2, % +Metas, -Status 19 http_open2/2, % +CurrentUri, -In 20 http_open2/3, % +CurrentUri, -In, +Options 21 http_status_reason/2, % ?Status, ?Reason 22 http_sync/1, % +Uri 23 http_sync/2, % +Uri, ?File 24 http_sync/3, % +Uri, ?File, +Options 25 % DEBUGGING 26 curl/0, 27 nocurl/0 28 ] 29).
45:- use_module(library(aggregate)). 46:- use_module(library(apply)). 47:- use_module(library(debug)). 48:- use_module(library(error)). 49:- use_module(library(http/http_client), []). 50:- use_module(library(http/http_cookie), []). 51:- use_module(library(http/http_json)). 52:- use_module(library(http/http_path)). 53:- use_module(library(http/json)). 54:- use_module(library(yall)). 55:- use_module(library(zlib)). 56 57:- use_module(library(atom_ext)). 58:- use_module(library(call_ext)). 59:- use_module(library(dcg)). 60:- use_module(library(dict)). 61:- use_module(library(file_ext)). 62:- use_module(library(list_ext)). 63:- use_module(library(media_type)). 64:- use_module(library(stream_ext)). 65:- use_module(library(string_ext)). 66:- use_module(library(uri_ext)). 67 68:- use_module(http/http_open_cp, []). 69 70:- meta_predicate 71 http_call( , ), 72 http_call( , , ). 73 74:- multifile 75 http:encoding_filter/3, 76 http_header:status_comment//1, 77 http_header:status_number_fact/2, 78 http:post_data_hook/3. 79 80httpencoding_filter('application/gzip', In1, In2) :- 81 http:encoding_filter(gzip, In1, In2). 82httpencoding_filter('x-gzip', In1, In2) :- 83 http:encoding_filter(gzip, In1, In2). 84 85errorhas_type(http_status, Status) :- 86 http_header:status_number_fact(_, Status). 87 88http_headerstatus_number_fact(522, 522). 89http_headerstatus_number_fact(523, 523). 90 91http_headerstatus_comment(522) --> 92 "CloudFlare: Connection timed out". 93http_headerstatus_comment(523) --> 94 "CloudFlare: Origin is unreachable". 95 96:- public 97 ssl_verify/5. 98 99ssl_verify(_SSL, _ProblemCertificate, _AllCertificates, _FirstCertificate, _Error).
Accept = #( media-range [ accept-params ] ) media-range = ( "*/*" / ( type "/" "*" ) / ( type "/" subtype ) ) *( OWS ";" OWS parameter ) parameter = token "=" ( token / quoted-string ) accept-params = weight *( accept-ext ) weight = OWS ";" OWS "q=" qvalue accept-ext = OWS ";" OWS token [ "=" ( token / quoted-string ) ]
120http_accept_value(MediaTypes, Accept) :- 121 length(MediaTypes, NumMediaTypes), 122 Interval is 1.0 / NumMediaTypes, 123 atom_phrase(accept_(MediaTypes, Interval, Interval), Accept). 124 125accept_([], _, _) --> !, "". 126accept_([H|T], N1, Interval) --> 127 media_type(H), 128 weight_(N1), 129 {N2 is N1 + Interval}, 130 ({T = []} -> "" ; ", "), 131 accept_(T, N2, Interval). 132 133weight_(N) --> 134 {format(atom(Atom), ";q=~3f", [N])}, 135 atom(Atom).
Detects cycles in HTTP Link header referals, in which case the cyclic_link_header/1 is thrown.
157http_call(Uri, Goal_1) :- 158 http_call(Uri, Goal_1, options{}). 159 160 161http_call(FirstUri0, Goal_1, Options1) :- 162 ensure_uri_(FirstUri0, FirstUri), 163 State = state(FirstUri), 164 % Non-deterministically enumerate over URIs that appear in HTTP Link 165 % headers with the ânextâ keyword. 166 repeat, 167 State = state(CurrentUri), 168 merge_dicts(options{next: NextUri}, Options1, Options2), 169 ( http_open2(CurrentUri, In, Options2) 170 -> ( % There is a next URI: keep the choicepoint open. 171 atom(NextUri) 172 -> State = state(CurrentUri), 173 % Detect directly cyclic `Link' headers. 174 ( CurrentUri == NextUri 175 -> throw(error(http_error(cyclic_link_header,NextUri),http_call/3)) 176 ; nb_setarg(1, State, NextUri) 177 ) 178 ; % There is no next URI: abandon choicepoint. 179 ! 180 ), 181 call_cleanup( 182 call(Goal_1, In), 183 close(In) 184 ) 185 ; !, fail 186 ).
196http_download(Uri) :- 197 http_download(Uri, _). 198 199 200http_download(Uri, File) :- 201 http_download(Uri, File, options{}). 202 203 204http_download(Uri, File, Options) :- 205 uri_to_file_(Uri, File), 206 http_download_(Uri, File, Options).
212http_head2(Uri, Options1) :-
213 merge_dicts(options{method: head}, Options1, Options2),
214 http_call(Uri, true, Options2).
220http_header_name_label(Name, Label) :-
221 atomic_list_concat(Atoms, -, Name),
222 maplist(atom_capitalize, Atoms, CAtoms),
223 atomics_to_string(CAtoms, " ", Label).
229http_last_modified(Uri, Time) :-
230 http_head2(Uri, options{metadata: Metas}),
231 http_metadata_last_modified(Metas, Time).
247http_metadata_content_type(Metas, MediaType) :-
248 Metas = [Meta|_],
249 dict_get('content-type', Meta.headers, [ContentType|T]),
250 assertion(T == []),
251 atom_phrase(media_type(MediaType), ContentType).
257http_metadata_file_name(Metas, File) :-
258 Metas = [Meta|_],
259 dict_get('content-disposition', Meta.headers, [ContentDisposition|T]),
260 assertion(T == []),
261 split_string(ContentDisposition, ";", " ", ["attachment"|Params]),
262 member(Param, Params),
263 split_string(Param, "=", "\"", ["filename",File0]), !,
264 atom_string(File, File0).
270http_metadata_final_uri(Metas, Uri) :-
271 first(Metas, Meta),
272 http{uri: Uri} :< Meta.
278http_metadata_last_modified(Metas, Time) :-
279 first(Metas, Meta),
280 dict_get('last-modified', Meta.headers, [LMod]),
281 parse_time(LMod, Time).
287http_metadata_link(Metas, Relation, Uri) :-
288 [Meta|_] = Metas,
289 dict_get(link, Meta.headers, Links),
290 % This header may appear multiple times.
291 atomic_list_concat(Links, ;, Link),
292 atom_string(Relation, Relation0),
293 split_string(Link, ",", " ", Comps),
294 member(Comp, Comps),
295 split_string(Comp, ";", "<> ", [Uri0|Params]),
296 member(Param, Params),
297 split_string(Param, "=", "\"", ["rel",Relation0]), !,
298 atom_string(Uri, Uri0).
304http_metadata_status(Metas, Status) :-
305 Metas = [Meta|_],
306 Status = Meta.status.
373http_open2(CurrentUri, In) :- 374 http_open2(CurrentUri, In, options{}). 375 376 377http_open2(CurrentUri0, In, Options1) :- 378 ensure_uri_(CurrentUri0, CurrentUri), 379 % Allow the next/1 option to be instantiated later. 380 ignore(option{next: NextUri} :< Options1), 381 % Allow the metadata/1 optiont to be instantiated later. 382 ignore(dict_get(metadata, Options1, Metas)), 383 http_options_(CurrentUri, Options1, State, Options2), 384 http_open2_(CurrentUri, In, State, Metas0, Options2), 385 reverse(Metas0, Metas), 386 % Instantiate the next/1 option. 387 ignore(http_metadata_link(Metas, next, NextUri)), 388 first(Metas, Meta), 389 http{status: Status, uri: FinalUri} :< Meta, 390 ignore(option{final_uri: FinalUri} :< Options1), 391 ( option{status: Status} :< Options1 392 -> true 393 ; http_status_(In, Status, FinalUri, Options1) 394 ).
401http_status_(In, Status, FinalUri, Options) :- 402 dict_get(failure, Options, 400, Failure), 403 dict_get(success, Options, 200, Success), 404 ( % HTTP failure codes. 405 between(400, 599, Status) 406 -> call_cleanup( 407 read_string(In, 1 000, Content), 408 close(In) 409 ), 410 ( Failure == warning 411 -> print_message( 412 warning, 413 error(http_error(status,Status,Content,FinalUri),http_status_error/3) 414 ) 415 ; must_be(between(400,599), Failure), 416 Status =:= Failure 417 -> fail 418 ; % Throw an exception, indicating an error. 419 throw(error(http_error(status,Status,Content,FinalUri),http_status_error/3)) 420 ) 421 ; % HTTP success codes. The asserion indicates that we do not 422 % expect a 1xx or 3xx status code here. 423 assertion(between(200, 299, Status)) 424 -> (number(Success) -> Status =:= Success ; true) 425 ). 426 427http_options_(Uri, Options1, State, Options3) :- 428 ( dict_select(accept, Options1, Options2, Accept) 429 -> http_open2_accept_(Accept, Atom) 430 ; Atom = '*/*', 431 Options2 = Options1 432 ), 433 merge_dicts(options{request_header: 'Accept'=Atom}, Options2, Options3), 434 dict_get(number_of_hops, Options3, 5, MaxHops), 435 dict_get(number_of_retries, Options3, 1, MaxRetries), 436 State = state{ 437 maximum_number_of_hops: MaxHops, 438 maximum_number_of_retries: MaxRetries, 439 number_of_retries: 1, 440 visited: [Uri] 441 }. 442 443http_open2_(Uri, In2, State1, [Meta|Metas], Options1) :- 444 ( debugging(http(send_request)), 445 options{post: RequestBody} :< Options1 446 -> debug(http(send_request), "REQUEST BODY\n~w\n", [RequestBody]) 447 ; true 448 ), 449 merge_dicts( 450 options{ 451 cert_verify_hook: cert_accept_any, 452 raw_headers: HeaderLines, 453 redirect: false, 454 status_code: Status, 455 timeout: 60, 456 version: Major-Minor 457 }, 458 Options1, 459 Options2 460 ), 461 get_time(Start), 462 dict_terms(Options2, Options3), 463 http_open_cp:http_open(Uri, In1, Options3), 464 ignore(options{status_code: Status} :< Options2), 465 get_time(End), 466 http_lines_pairs(HeaderLines, HeaderPairs), 467 ( memberchk(location-[Location], HeaderPairs) 468 -> State2 = State1.put(_{location: Location}) 469 ; State2 = State1 470 ), 471 dict_pairs(HeadersMeta, HeaderPairs), 472 Meta = http{ 473 headers: HeadersMeta, 474 status: Status, 475 timestamp: Start-End, 476 uri: Uri, 477 version: version{major: Major, minor: Minor} 478 }, 479 State3 = State2.put(_{meta: Meta}), 480 % Print status codes and reply headers as debug messages. 481 % Use curl/0 to show these debug messages. 482 ( debugging(http(receive_reply)) 483 -> debug(http(receive_reply), "", []), 484 http_status_reason(Status, Reason), 485 debug(http(receive_reply), "< ~d (~s)", [Status,Reason]), 486 maplist(debug_header, HeaderPairs), 487 debug(http(receive_reply), "", []) 488 ; true 489 ), 490 ( dict_get('content-type', HeadersMeta, [ContentType|_]), 491 atom_phrase(media_type(MediaType), ContentType), 492 media_type_encoding(MediaType, Encoding) 493 -> set_stream(In1, encoding(Encoding)) 494 ; true 495 ), 496 http_open2_(Uri, In1, Status, State3, In2, Metas, Options1). 497 498debug_header(Key-Values) :- 499 maplist(debug_header(Key), Values). 500 501debug_header(Key, Value) :- 502 debug(http(receive_reply), "< ~a: ~w", [Key,Value]).
508% list of Media Types 509http_open2_accept_(MediaTypes, Atom) :- 510 is_list(MediaTypes), !, 511 http_accept_value(MediaTypes, Atom). 512% file name extension 513http_open2_accept_(Ext, Atom) :- 514 atom(Ext), !, 515 ( media_type_extension(MediaType, Ext) 516 -> http_open2_accept_([MediaType], Atom) 517 ; existence_error(media_type_extension, Ext) 518 ). 519% Media Type 520http_open2_accept_(MediaType, Atom) :- 521 http_open2_accept_([MediaType], Atom). 522 523% succes status code 524http_open2_(Uri, In1, Status, State, In2, [], _) :- 525 between(200, 299, Status), !, 526 http_open2_success_(Uri, In1, State, In2). 527% redirect status code 528http_open2_(Uri1, In1, Status, State1, In2, Metas, Options) :- 529 between(300, 399, Status), !, 530 close(In1), 531 _{location: Location, visited: Visited1} :< State1, 532 uri_resolve(Location, Uri1, Uri2), 533 Visited2 = [Uri2|Visited1], 534 ( length(Visited2, NumVisited), 535 _{maximum_number_of_hops: MaxHops} :< State1, 536 NumVisited >= MaxHops 537 -> Metas = [], 538 reverse(Visited2, Visited3), 539 % Wait until redirect loops have reached the maximum number of 540 % hops. The same URI can sometimes be legitimately requested 541 % more than once, e.g., without and with a cookie. 542 ( memberchk(Uri2, Visited1) 543 -> throw(error(http_error(redirect_loop,Visited3),http_open2_/7)) 544 ; throw(error(http_error(max_redirect,NumVisited,Visited3),http_open2_/7)) 545 ) 546 ; State2 = State1.put(_{visited: Visited2}), 547 http_open2_(Uri2, In2, State2, Metas, Options) 548 ). 549% authentication error status code 550http_open2_(_, In, Status, _, In, [], _) :- 551 Status =:= 401, !. 552% non-authentication error status code 553http_open2_(Uri, In1, Status, State1, In2, Metas, Options) :- 554 between(400, 599, Status), !, 555 _{ 556 maximum_number_of_retries: MaxRetries, 557 number_of_retries: Retries1 558 } :< State1, 559 Retries2 is Retries1 + 1, 560 ( Retries2 >= MaxRetries 561 -> In2 = In1, 562 Metas = [] 563 ; close(In1), 564 State2 = State1.put(_{number_of_retries: Retries2}), 565 http_open2_(Uri, In2, State2, Metas, Options) 566 ). 567% unrecognized status code 568http_open2_(_, In, Status, _, _, [], _) :- 569 close(In), 570 domain_error(http_status, Status). 571 572% Change the input stream encoding based on the value of the 573% `Content-Type' header. 574http_open2_success_(_, In, State, In) :- 575 _{meta: Meta} :< State, 576 http_metadata_content_type([Meta], _), !, 577 ( debugging(http(peek)) 578 -> peek_string(In, 1 000, String), 579 debug(http(peek), "~s", [String]) 580 ; true 581 ). 582% If there is no `Content-Type' header, then there MUST be no content 583% either. 584http_open2_success_(Uri, In, _, In) :- 585 ( at_end_of_stream(In) 586 -> true 587 ; print_message(warning, error(http_error(no_content_type,Uri),http_open2_success_/4)) 588 ). 589 590http_lines_pairs(Lines, Groups) :- 591 aggregate_all( 592 set(Key-Value), 593 ( 594 member(Line, Lines), 595 % HTTP header parsing may fail, e.g., due to obsolete line 596 % folding (where one header is spread over multiple lines). 597 phrase(http_parse_header_simple(Key, Value), Line) 598 ), 599 Pairs 600 ), 601 group_pairs_by_key(Pairs, Groups).
header-field = field-name ":" OWS field-value OWS field-name = token OWS = *( SP | HTAB )
611http_parse_header_simple(Key, Value) --> 612 string_without(":", KeyCodes), 613 ":", 614 { 615 atom_codes(Key0, KeyCodes), 616 downcase_atom(Key0, Key) 617 }, 618 remainder_as_string(String0), 619 { 620 string_strip(String0, "\s\t", String), 621 atom_string(Value, String) 622 }, !. 623 624httppost_data_hook(string(String), Out, HdrExtra) :- 625 atom_string(Atom, String), 626 http_header_cp:http_post_data(atom(Atom), Out, HdrExtra). 627httppost_data_hook(string(MediaType,String), Out, HdrExtra) :- 628 atom_string(Atom, String), 629 http_header_cp:http_post_data(atom(MediaType,Atom), Out, HdrExtra).
638http_status_reason(100, "Continue"). 639http_status_reason(101, "Switching Protocols"). 640http_status_reason(200, "OK"). 641http_status_reason(201, "Created"). 642http_status_reason(202, "Accepted"). 643http_status_reason(203, "Non-Authoritative Information"). 644http_status_reason(204, "No Content"). 645http_status_reason(205, "Reset Content"). 646http_status_reason(206, "Partial Content"). 647http_status_reason(300, "Multiple Choices"). 648http_status_reason(301, "Moved Permanently"). 649http_status_reason(302, "Moved Temporarily"). 650http_status_reason(303, "See Other"). 651http_status_reason(304, "Not Modified"). 652http_status_reason(305, "Use Proxy"). 653http_status_reason(306, "Switch Proxy"). 654http_status_reason(307, "Temporary Redirect"). 655http_status_reason(400, "Bad Request"). 656http_status_reason(401, "Unauthorized"). 657http_status_reason(402, "Payment Required"). 658http_status_reason(403, "Forbidden"). 659http_status_reason(404, "Not Found"). 660http_status_reason(405, "Method Not Allowed"). 661http_status_reason(406, "Not Acceptable"). 662http_status_reason(407, "Proxy Authentication Required"). 663http_status_reason(408, "Request Time-out"). 664http_status_reason(409, "Conflict"). 665http_status_reason(410, "Gone"). 666http_status_reason(411, "Length Required"). 667http_status_reason(412, "Precondition Failed"). 668http_status_reason(413, "Request Entity Too Large"). 669http_status_reason(414, "Request-URI Too Large"). 670http_status_reason(415, "Unsupported Media Type"). 671http_status_reason(500, "Internal Server Error"). 672http_status_reason(501, "Not Implemented"). 673http_status_reason(502, "Bad Gateway"). 674http_status_reason(503, "Service Unavailable"). 675http_status_reason(504, "Gateway Time-out"). 676http_status_reason(505, "HTTP Version not supported"). 677http_status_reason(522, "CloudFlare: Connection timed out"). 678http_status_reason(523, "CloudFlare: Origin is unreachable").
691http_sync(Uri) :- 692 http_sync(Uri, _). 693 694 695http_sync(Uri, File) :- 696 http_sync(Uri, File, options{}). 697 698 699http_sync(Uri0, File, Options) :- 700 ensure_uri_(Uri0, Uri), 701 uri_to_file_(Uri, File), 702 (exists_file(File) -> true ; http_download_(Uri, File, Options)). 703 704 705 706 707 708% DEBUGGING %
714curl :-
715 debug(http(receive_reply)),
716 debug(http(send_request)).
724nocurl :- 725 nodebug(http(receive_reply)), 726 nodebug(http(send_request)). 727 728 729 730 731 732% GENERICS %
738ensure_uri_(uri(Scheme,Authority,Segments,Query,Fragment), Uri) :- !, 739 uri_comps(Uri, uri(Scheme,Authority,Segments,Query,Fragment)). 740ensure_uri_(Uri, Uri) :- 741 must_be(atom, Uri).
747http_download_(Uri, File, Options) :- 748 file_name_extensions(File, Name, Exts), 749 file_name_extensions(TmpFile, Name, [tmp|Exts]), 750 write_to_file( 751 TmpFile, 752 http_download_stream_(Uri, Options), 753 options{type: binary} 754 ), 755 rename_file(TmpFile, File). 756 757http_download_stream_(Uri, Options, Out) :- 758 http_call(Uri, {Out}/[In0]>>copy_stream_data(In0, Out), Options).
765uri_to_file_(_, File) :- 766 ground(File), !, 767 must_be(atom, File). 768uri_to_file_(Uri, File) :- 769 uri_data_file(Uri, data, File)
HTTP Client
Alternative to the HTTP client that is part of the SWI-Prolog standard library.
Debugging
The following debug flags are used:
http(peek)
http(receive_reply)
http(send_request)
*/