1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2016-2022, VU University Amsterdam
    7			      SWI-Prolog Solutions b.v.
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rocksdb,
   37	  [ rocks_open/3,		% +Directory, -RocksDB, +Options
   38	    rocks_close/1,		% +RocksDB
   39	    rocks_alias_lookup/2,	% +Name, -RocksDB
   40
   41	    rocks_put/3,		% +RocksDB, +Key, +Value
   42	    rocks_put/4,		% +RocksDB, +Key, +Value, +Options
   43	    rocks_merge/3,		% +RocksDB, +Key, +Value
   44	    rocks_merge/4,		% +RocksDB, +Key, +Value, +Options
   45	    rocks_delete/2,		% +RocksDB, +Key
   46	    rocks_delete/3,		% +RocksDB, +Key, +Options
   47	    rocks_batch/2,		% +RocksDB, +Actions
   48	    rocks_batch/3,		% +RocksDB, +Actions, +Options
   49
   50	    rocks_get/3,		% +RocksDB, +Key, -Value
   51	    rocks_get/4,		% +RocksDB, +Key, -Value, +Options
   52	    rocks_enum/3,		% +RocksDB, ?Key, ?Value
   53	    rocks_enum/4,		% +RocksDB, ?Key, ?Value, +Options
   54	    rocks_enum_from/4,		% +RocksDB, ?Key, ?Value, +From
   55	    rocks_enum_from/5,		% +RocksDB, ?Key, ?Value, +From, +Options
   56	    rocks_enum_prefix/4,	% +RocksDB, ?Suffix, ?Value, +Prefix
   57	    rocks_enum_prefix/5,	% +RocksDB, ?Suffix, ?Value, +Prefix, +Options
   58
   59            rocks_property/2            % +RocksDB, ?Property
   60	  ]).   61:- use_module(library(option)).   62:- use_module(library(error)).   63:- use_foreign_library(foreign(rocksdb4pl)).   64
   65:- meta_predicate
   66	rocks_open(+, -, :).   67
   68:- predicate_options(rocks_open/3, 3,
   69		     [ alias(atom),
   70		       mode(oneof([read_only,read_write])),
   71		       key(oneof([atom,string,binary,int32,int64,
   72				  float,double,term])),
   73		       value(any),
   74		       merge(callable),
   75                       debug(boolean),
   76                       prepare_for_bulk_load(oneof([true])),
   77                       optimize_for_small_db(oneof([true])),
   78                       increase_parallelism(oneof([true])),
   79                       create_if_missing(boolean),
   80                       create_missing_column_families(boolean),
   81                       error_if_exists(boolean),
   82                       paranoid_checks(boolean),
   83                       track_and_verify_wals_in_manifest(boolean),
   84                       info_log_level(oneof([debug,info,warn,error,fatal,header])), % default: info
   85                       env(boolean),
   86                       max_open_files(integer),
   87                       max_file_opening_threads(integer),
   88                       max_total_wal_size(integer),
   89                       statistics(boolean), % TODO: this only creates a Statistics object
   90                       use_fsync(boolean),
   91                       db_log_dir(string),
   92                       wal_dir(string),
   93                       delete_obsolete_files_period_micros(integer),
   94                       max_background_jobs(integer),
   95                       max_subcompactions(integer),
   96                       max_log_file_size(integer),
   97                       log_file_time_to_roll(integer),
   98                       keep_log_file_num(integer),
   99                       recycle_log_file_num(integer),
  100                       max_manifest_file_size(integer),
  101                       table_cache_numshardbits(integer),
  102                       wal_ttl_seconds(integer),
  103                       wal_size_limit_mb(integer),
  104                       manifest_preallocation_size(integer),
  105                       allow_mmap_reads(boolean),
  106                       allow_mmap_writes(boolean),
  107                       use_direct_reads(boolean),
  108                       use_direct_io_for_flush_and_compaction(boolean),
  109                       allow_fallocate(boolean),
  110                       is_fd_close_on_exec(boolean),
  111                       stats_dump_period_sec(integer),
  112                       stats_persist_period_sec(integer),
  113                       persist_stats_to_disk(boolean),
  114                       stats_history_buffer_size(integer),
  115                       advise_random_on_open(boolean),
  116                       db_write_buffer_size(integer),
  117                       write_buffer_manager(boolean),
  118                       % new_table_reader_for_compaction_inputs(boolean),  % TODO: removed from rocksdb/options.h?
  119                       compaction_readahead_size(integer),
  120                       random_access_max_buffer_size(integer),
  121                       writable_file_max_buffer_size(integer),
  122                       use_adaptive_mutex(boolean),
  123                       bytes_per_sync(integer),
  124                       wal_bytes_per_sync(integer),
  125                       strict_bytes_per_sync(integer),
  126                       enable_thread_tracking(boolean),
  127                       delayed_write_rate(integer),
  128                       enable_pipelined_write(boolean),
  129                       unordered_write(boolean),
  130                       allow_concurrent_memtable_write(boolean),
  131                       enable_write_thread_adaptive_yield(boolean),
  132                       max_write_batch_group_size_bytes(integer),
  133                       write_thread_max_yield_usec(integer),
  134                       write_thread_slow_yield_usec(integer),
  135                       skip_stats_update_on_db_open(boolean),
  136                       skip_checking_sst_file_sizes_on_db_open(boolean),
  137                       allow_2pc(boolean),
  138                       fail_ifoptions_file_error(boolean),
  139                       dump_malloc_stats(boolean),
  140                       avoid_flush_during_recovery(boolean),
  141                       avoid_flush_during_shutdown(boolean),
  142                       allow_ingest_behind(boolean),
  143                       % preserve_deletes(boolean), % TODO: removed: https://github.com/facebook/rocksdb/issues/9090
  144                       two_write_queues(boolean),
  145                       manual_wal_flush(boolean),
  146                       atomic_flush(boolean),
  147                       avoid_unnecessary_blocking_io(boolean),
  148                       write_dbid_to_manifest(boolean),
  149                       log_readahead_size(boolean),
  150                       best_efforts_recovery(boolean),
  151                       max_bgerror_resume_count(integer),
  152                       bgerror_resume_retry_interval(integer),
  153                       allow_data_in_errors(boolean),
  154                       db_host_id(string)
  155		     ]).  156:- predicate_options(rocks_get/4, 4,
  157                     [
  158                      readahead_size(integer),
  159                      max_skippable_internal_keys(integer),
  160                      verify_checksums(boolean),
  161                      fill_cache(boolean),
  162                      tailing(boolean),
  163                      total_order_seek(boolean),
  164                      auto_prefix_mode(boolean),
  165                      prefix_same_as_start(boolean),
  166                      pin_data(boolean),
  167                      background_purge_on_iterator_cleanup(boolean),
  168                      ignore_range_deletions(boolean),
  169                      % iter_start_seqnum(integer), % TODO: removed https://github.com/facebook/rocksdb/issues/9090
  170                      io_timeout(integer),
  171                      value_size_soft_limit(integer)
  172                     ]).  173:- predicate_options(rocks_enum/4, 4,
  174                     [ pass_to(rocks_get/4, 4)
  175                     ]).  176:- predicate_options(rocks_enum_from/5, 5,
  177                     [ pass_to(rocks_get/4, 4)
  178                     ]).  179:- predicate_options(rocks_enum_prefix/5, 5,
  180                     [ pass_to(rocks_get/4, 4)
  181                     ]).  182:- predicate_options(rocks_put/4, 4,
  183                     [ sync(boolean),
  184                       disableWAL(boolean),
  185                       ignore_missing_column_families(boolean),
  186                       no_slowdown(boolean),
  187                       low_pri(boolean),
  188                       memtable_insert_hint_per_batch(boolean)
  189                     ]).  190:- predicate_options(rocks_delete/3, 3,
  191                     [ pass_to(rocks_put/4, 4)
  192                     ]).  193:- predicate_options(rocks_merge/4, 4,
  194                     [ pass_to(rocks_put/4, 4)
  195                     ]).  196:- predicate_options(rocks_batch/4, 4,
  197                     [ pass_to(rocks_put/4, 4)
  198                     ]).

RocksDB interface

RocksDB is an embeddable persistent key-value store for fast storage. The store can be used only from one process at the same time. It may be used from multiple Prolog threads though. This library provides a SWI-Prolog binding for RocksDB. RocksDB just associates byte arrays. This interface defines several mappings between Prolog datastructures and byte arrays that may be configured to store both keys and values. See rocks_open/3 for details.

See also
- http://rocksdb.org/ */
 rocks_open(+Directory, -RocksDB, +Options) is det
Open a RocksDB database in Directory and unify RocksDB with a handle to the opened database. In general, this predicate throws an exception on failure; if an error occurs in the rocksdb library, the error term is of the form rocks_error(Message) or rocks_error(Message,Blob).

Most of the DBOptions in rocksdb/include/rocksdb/options.h are supported. create_if_exists defaults to true. Additional options are:

alias(+Name)
Give the database a name instead of using an anonymous handle. A named database is not subject to GC and must be closed explicitly. When the database is opened, RocksDB unifies with Name (the underlying handle can obtained using rocks_alias_lookup2).
  232%	  - key(+Type)
  233%	  - value(+Type)
  234%	  Define the type for the key and value. These must be
  235%	  consistent over multiple invocations. Default is `atom`.
  236%	  Defined types are:
  237%	    - atom
  238%	      Accepts an atom or string.  Unifies the result with an
  239%	      atom.  Data is stored as a UTF-8 string in RocksDB.
  240%	    - string
  241%	      Accepts an atom or string.  Unifies the result with a
  242%	      string.  Data is stored as a UTF-8 string in RocksDB.
  243%	    - binary
  244%	      Accepts an atom or string with codes in the range 0..255.
  245%	      Unifies the result with a string. Data is stored as a
  246%	      sequence of bytes in RocksDB.
  247%	    - int32
  248%	      Maps to a Prolog integer in the range
  249%	      -2,147,483,648...2,147,483,647.  Stored as a 4 bytes in
  250%	      native byte order.
  251%	    - int64
  252%	      Maps to a Prolog integer in the range
  253%	      -9223372036854775808..9223372036854775807 Stored as a 8
  254%	      bytes in native byte order.
  255%	    - float
  256%	      Value is mapped to a 32-bit floating point number.
  257%	    - double
  258%	      Value is mapped to a 64-bit floating point number (double).
  259%	    - term
  260%	      Stores any Prolog term. Stored using PL_record_external().
  261%	      The PL_record_external() function serializes the internal
  262%	      structure of a term, including _cycles_, _sharing_ and
  263%	      _attributes_.  This means that if the key is a term, it
  264%	      only matches if the the same cycles and sharing is
  265%	      used. For example, `X = f(a), Key = k(X,X)` is a different
  266%	      key from `Key = k(f(a),f(a))` and `X = [a|X]` is a
  267%	      different key from `X = [a,a|X]`. Applications for which
  268%	      such keys should match must first normalize the key.
  269%	      Normalization can be based on term_factorized/3 from
  270%	      library(terms).
  271%	  In addition, `value` accepts one of list(type) or set(type),
  272%	  currently only for the numeric types.  This causes
  273%	  rocks_put/3 and rocks_get/3 to exchange the value as a
  274%	  list and installs a built-in merge function.
  275%	  - merge(:Goal)
  276%	  Define RocksDB value merging.  See rocks_merge/3.
  277%	  - mode(+Mode)
  278%	  One of `read_write` (default) or `read_only`.  The latter
  279%	  uses OpenForReadOnly() to open the database. It is allowed
  280%	  to have multiple `read_only` opens, but only one
  281%	  `read_write` (which also precludes having any `read_only`);
  282%	  however, it is recommended to only open a databse once.
  283%         - debug(true) Output more information when displaying
  284%           the rocksdb "blob".
  285% @see https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide
  286% @see http://rocksdb.org/blog/2018/08/01/rocksdb-tuning-advisor.html
  287% @see https://github.com/EighteenZi/rocksdb_wiki/blob/master/RocksDB-Tuning-Guide.md
  288
  289%
  290% @bug You must call rocks_close(Directory) to ensure clean shutdown
  291%	Failure to call rdb_close/1 usually doesn't result in data
  292%	loss because rocksdb can recover, depending on the setting of
  293%	the `sync` option. However, it is recommended that you do a
  294%	clean shutdown if possible, such as using at_halt/1 or
  295%	setup_call_cleanup/3 is used to ensure clean shutdown.
  296
  297% @see https://github.com/facebook/rocksdb/wiki/Known-Issues
  298
  299rocks_open(Dir, DB, Options0) :-
  300	meta_options(is_meta, Options0, Options),
  301        absolute_file_name(Dir, DirAbs),
  302	rocks_open_(DirAbs, DB, Options).
  303
  304is_meta(merge).
 rocks_close(+RocksDB) is det
Destroy the RocksDB handle. Note that anonymous handles are subject to (atom) garbage collection, which will call rocks_close/1 as part of the garbage collection; however, there is no guarantee that an anonymous handle will be garbage collected, so it is suggested that at_halt/1 or setup_call_cleanup/3 is used to ensure that rocks_close/1 is called.

rocks_close/1 throws an existence error if RocksDB isn't a valid handle or alias from rocks_open/3. If RocksDB is an anonymous handle that has been closed, rocks_close/1 silently succeeds; if it's an alias name that's already been closed, an existence error is raised (this behavior may change in future).

If you call rocks_close/1 while there is an iterator open (e.g., from rocks_enum/3 that still has a choicepoint), the results are unpredicatable. The code attempts to avoid crashes by reference counting iterators and only allowing a close if there are no active iterators for a database.

 rocks_alias_lookup(+Name, -RocksDB) is semidet
Look up an alias Name (as specified in rocks_open/3 alias option and unify RocksDb with the underlying handle; fails if there is no open file with the alias Name.

This predicate has two uses:

 rocks_put(+RocksDB, +Key, +Value) is det
 rocks_put(+RocksDB, +Key, +Value, Options) is det
Add Key-Value to the RocksDB database. If Key already has a value, the existing value is silently replaced by Value. If the value type is list(Type) or set(Type), Value must be a list. For set(Type) the list is converted into an ordered set.
  355rocks_put(RocksDB, Key, Value) :-
  356    rocks_put(RocksDB, Key, Value, []).
 rocks_merge(+RocksDB, +Key, +Value) is det
 rocks_merge(+RocksDB, +Key, +Value, +Options) is det
Merge Value with the already existing value for Key. Requires the option merge(:Merger) or the value type to be one of list(Type) or set(Type) to be used when opening the database. Using rocks_merge/3 rather than rocks_get/2, update and rocks_put/3 makes the operation atomic and reduces disk accesses.

Options are the same as for rocks_put/4.

Merger is called as below, where two clauses are required: one with How set to partial and one with How set to full. If full, MergeValue is a list of values that need to be merged, if partial, MergeValue is a single value.

call(:Merger, +How, +Key, +Value0, +MergeValue, -Value)

If Key is not in RocksDB, Value0 is unified with a value that depends on the value type. If the value type is an atom, it is unified with the empty atom; if it is string or binary it is unified with an empty string; if it is int32 or int64 it is unified with the integer 0; and finally if the type is term it is unified with the empty list.

For example, if the value is a set of Prolog values we open the database with value(term) to allow for Prolog lists as value and we define merge_set/5 as below.

merge(partial, _Key, Left, Right, Result) :-
    ord_union(Left, Right, Result).
merge(full, _Key, Initial, Additions, Result) :-
    append([Initial|Additions], List),
    sort(List, Result).

If the merge callback fails or raises an exception the merge operation fails and the error is logged through the RocksDB logging facilities. Note that the merge callback can be called in a different thread or even in a temporary created thread if RocksDB decides to merge remaining values in the background.

Errors
- permission_error(merge, rocksdb RocksDB) if the database was not opened with the merge(Merger) option.
See also
- https://github.com/facebook/rocksdb/wiki/Merge-Operator for understanding the concept of value merging in RocksDB.
  408rocks_merge(RocksDB, Key, Value) :-
  409    rocks_merge(RocksDB, Key, Value, []).
 rocks_delete(+RocksDB, +Key) is semidet
 rocks_delete(+RocksDB, +Key, +Options) is semidet
Delete Key from RocksDB. Fails if Key is not in the database.

Options are the same as for rocks_put/4.

  418rocks_delete(RocksDB, Key) :-
  419    rocks_delete(RocksDB, Key, []).
 rocks_get(+RocksDB, +Key, -Value) is semidet
 rocks_get(+RocksDB, +Key, -Value, +Options) is semidet
True when Value is the current value associated with Key in RocksDB. If the value type is list(Type) or set(Type) this returns a Prolog list.
  428rocks_get(RocksDB, Key, Value) :-
  429    rocks_get(RocksDB, Key, Value, []).
 rocks_enum(+RocksDB, -Key, -Value) is nondet
 rocks_enum(+RocksDB, -Key, -Value, +Options) is nondet
True when Value is the current value associated with Key in RocksDB. This enumerates all keys in the database. If the value type is list(Type) or set(Type) Value is a list.

Options are the same as for rocks_get/4.

  440rocks_enum(RocksDB, Key, Value) :-
  441    rocks_enum(RocksDB, Key, Value, []).
 rocks_enum_from(+RocksDB, -Key, -Value, +Prefix) is nondet
 rocks_enum_from(+RocksDB, -Key, -Value, +Prefix, +Options) is nondet
As rocks_enum/3, but starts enumerating from Prefix. The key type must be one of atom, string or binary. To only iterate all keys with Prefix, use rocks_enum_prefix/4 or the construct below.

Options are the same as for rocks_get/4.

    rocks_enum_from(DB, Key, Value, Prefix),
    (   sub_atom(Key, 0, _, _, Prefix)
    ->  handle(Key, Value)
    ;   !, fail
    )
  461rocks_enum_from(RocksDB, Key, Value, Prefix) :-
  462    rocks_enum_from(RocksDB, Key, Value, Prefix, []).
 rocks_enum_prefix(+RocksDB, -Suffix, -Value, +Prefix) is nondet
 rocks_enum_prefix(+RocksDB, -Suffix, -Value, +Prefix, +Options) is nondet
True for all keys that start with Prefix. Instead of returning the full key this predicate returns the suffix of the matching key. This predicate succeeds deterministically if no next key exists or the next key does not match Prefix.

Options are the same as for rocks_get/4.

  474rocks_enum_prefix(RocksDB, Suffix, Value, Prefix) :-
  475    rocks_enum_prefix(RocksDB, Suffix, Value, Prefix, []).
 rocks_batch(+RocksDB, +Actions:list) is det
 rocks_batch(+RocksDB, +Actions:list, +Options) is det
Perform a batch of operations on RocksDB as an atomic operation.

Options are the same as for rocks_put/4.

Actions is a list of:

delete(+Key)
As rocks_delete/2.
put(+Key, +Value)
As rocks_put/3.

The following example is translated from the RocksDB documentation:

  rocks_get(RocksDB, key1, Value),
  rocks_batch(RocksDB,
              [ delete(key1),
                put(key2, Value)
              ])
  504rocks_batch(RocksDB, Actions) :-
  505    rocks_batch(RocksDB, Actions, []).
 rocks_property(+RocksDB, ?Property) is nondet
  510rocks_property(RocksDB, Property) :-
  511    var(Property), !,
  512    rocks_property(P),
  513    rocks_property(RocksDB, P, Value),
  514    Property =.. [P,Value].
  515rocks_property(RocksDB, Property) :-
  516    Property =.. [P,Value], !,
  517    rocks_property(RocksDB, P, Value).
  518rocks_property(_RocksDB, Property) :-
  519    type_error(property, Property).
  520
  521rocks_property(estimate_num_keys)