1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2016-2022, VU University Amsterdam 7 SWI-Prolog Solutions b.v. 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(rocksdb, 37 [ rocks_open/3, % +Directory, -RocksDB, +Options 38 rocks_close/1, % +RocksDB 39 rocks_alias_lookup/2, % +Name, -RocksDB 40 41 rocks_put/3, % +RocksDB, +Key, +Value 42 rocks_put/4, % +RocksDB, +Key, +Value, +Options 43 rocks_merge/3, % +RocksDB, +Key, +Value 44 rocks_merge/4, % +RocksDB, +Key, +Value, +Options 45 rocks_delete/2, % +RocksDB, +Key 46 rocks_delete/3, % +RocksDB, +Key, +Options 47 rocks_batch/2, % +RocksDB, +Actions 48 rocks_batch/3, % +RocksDB, +Actions, +Options 49 50 rocks_get/3, % +RocksDB, +Key, -Value 51 rocks_get/4, % +RocksDB, +Key, -Value, +Options 52 rocks_enum/3, % +RocksDB, ?Key, ?Value 53 rocks_enum/4, % +RocksDB, ?Key, ?Value, +Options 54 rocks_enum_from/4, % +RocksDB, ?Key, ?Value, +From 55 rocks_enum_from/5, % +RocksDB, ?Key, ?Value, +From, +Options 56 rocks_enum_prefix/4, % +RocksDB, ?Suffix, ?Value, +Prefix 57 rocks_enum_prefix/5, % +RocksDB, ?Suffix, ?Value, +Prefix, +Options 58 59 rocks_property/2 % +RocksDB, ?Property 60 ]). 61:- use_module(library(option)). 62:- use_module(library(error)). 63:- use_foreign_library(foreign(rocksdb4pl)). 64 65:- meta_predicate 66 rocks_open( , , ). 67 68:- predicate_options(rocks_open/3, 3, 69 [ alias(atom), 70 mode(oneof([read_only,read_write])), 71 key(oneof([atom,string,binary,int32,int64, 72 float,double,term])), 73 value(any), 74 merge(callable), 75 debug(boolean), 76 prepare_for_bulk_load(oneof([true])), 77 optimize_for_small_db(oneof([true])), 78 increase_parallelism(oneof([true])), 79 create_if_missing(boolean), 80 create_missing_column_families(boolean), 81 error_if_exists(boolean), 82 paranoid_checks(boolean), 83 track_and_verify_wals_in_manifest(boolean), 84 info_log_level(oneof([debug,info,warn,error,fatal,header])), % default: info 85 env(boolean), 86 max_open_files(integer), 87 max_file_opening_threads(integer), 88 max_total_wal_size(integer), 89 statistics(boolean), % TODO: this only creates a Statistics object 90 use_fsync(boolean), 91 db_log_dir(string), 92 wal_dir(string), 93 delete_obsolete_files_period_micros(integer), 94 max_background_jobs(integer), 95 max_subcompactions(integer), 96 max_log_file_size(integer), 97 log_file_time_to_roll(integer), 98 keep_log_file_num(integer), 99 recycle_log_file_num(integer), 100 max_manifest_file_size(integer), 101 table_cache_numshardbits(integer), 102 wal_ttl_seconds(integer), 103 wal_size_limit_mb(integer), 104 manifest_preallocation_size(integer), 105 allow_mmap_reads(boolean), 106 allow_mmap_writes(boolean), 107 use_direct_reads(boolean), 108 use_direct_io_for_flush_and_compaction(boolean), 109 allow_fallocate(boolean), 110 is_fd_close_on_exec(boolean), 111 stats_dump_period_sec(integer), 112 stats_persist_period_sec(integer), 113 persist_stats_to_disk(boolean), 114 stats_history_buffer_size(integer), 115 advise_random_on_open(boolean), 116 db_write_buffer_size(integer), 117 write_buffer_manager(boolean), 118 % new_table_reader_for_compaction_inputs(boolean), % TODO: removed from rocksdb/options.h? 119 compaction_readahead_size(integer), 120 random_access_max_buffer_size(integer), 121 writable_file_max_buffer_size(integer), 122 use_adaptive_mutex(boolean), 123 bytes_per_sync(integer), 124 wal_bytes_per_sync(integer), 125 strict_bytes_per_sync(integer), 126 enable_thread_tracking(boolean), 127 delayed_write_rate(integer), 128 enable_pipelined_write(boolean), 129 unordered_write(boolean), 130 allow_concurrent_memtable_write(boolean), 131 enable_write_thread_adaptive_yield(boolean), 132 max_write_batch_group_size_bytes(integer), 133 write_thread_max_yield_usec(integer), 134 write_thread_slow_yield_usec(integer), 135 skip_stats_update_on_db_open(boolean), 136 skip_checking_sst_file_sizes_on_db_open(boolean), 137 allow_2pc(boolean), 138 fail_ifoptions_file_error(boolean), 139 dump_malloc_stats(boolean), 140 avoid_flush_during_recovery(boolean), 141 avoid_flush_during_shutdown(boolean), 142 allow_ingest_behind(boolean), 143 % preserve_deletes(boolean), % TODO: removed: https://github.com/facebook/rocksdb/issues/9090 144 two_write_queues(boolean), 145 manual_wal_flush(boolean), 146 atomic_flush(boolean), 147 avoid_unnecessary_blocking_io(boolean), 148 write_dbid_to_manifest(boolean), 149 log_readahead_size(boolean), 150 best_efforts_recovery(boolean), 151 max_bgerror_resume_count(integer), 152 bgerror_resume_retry_interval(integer), 153 allow_data_in_errors(boolean), 154 db_host_id(string) 155 ]). 156:- predicate_options(rocks_get/4, 4, 157 [ 158 readahead_size(integer), 159 max_skippable_internal_keys(integer), 160 verify_checksums(boolean), 161 fill_cache(boolean), 162 tailing(boolean), 163 total_order_seek(boolean), 164 auto_prefix_mode(boolean), 165 prefix_same_as_start(boolean), 166 pin_data(boolean), 167 background_purge_on_iterator_cleanup(boolean), 168 ignore_range_deletions(boolean), 169 % iter_start_seqnum(integer), % TODO: removed https://github.com/facebook/rocksdb/issues/9090 170 io_timeout(integer), 171 value_size_soft_limit(integer) 172 ]). 173:- predicate_options(rocks_enum/4, 4, 174 [ pass_to(rocks_get/4, 4) 175 ]). 176:- predicate_options(rocks_enum_from/5, 5, 177 [ pass_to(rocks_get/4, 4) 178 ]). 179:- predicate_options(rocks_enum_prefix/5, 5, 180 [ pass_to(rocks_get/4, 4) 181 ]). 182:- predicate_options(rocks_put/4, 4, 183 [ sync(boolean), 184 disableWAL(boolean), 185 ignore_missing_column_families(boolean), 186 no_slowdown(boolean), 187 low_pri(boolean), 188 memtable_insert_hint_per_batch(boolean) 189 ]). 190:- predicate_options(rocks_delete/3, 3, 191 [ pass_to(rocks_put/4, 4) 192 ]). 193:- predicate_options(rocks_merge/4, 4, 194 [ pass_to(rocks_put/4, 4) 195 ]). 196:- predicate_options(rocks_batch/4, 4, 197 [ pass_to(rocks_put/4, 4) 198 ]).
rocks_error(Message)
or rocks_error(Message,Blob)
.
Most of the DBOptions in rocksdb/include/rocksdb/options.h
are supported. create_if_exists
defaults to true
.
Additional options are:
232% - key(+Type) 233% - value(+Type) 234% Define the type for the key and value. These must be 235% consistent over multiple invocations. Default is `atom`. 236% Defined types are: 237% - atom 238% Accepts an atom or string. Unifies the result with an 239% atom. Data is stored as a UTF-8 string in RocksDB. 240% - string 241% Accepts an atom or string. Unifies the result with a 242% string. Data is stored as a UTF-8 string in RocksDB. 243% - binary 244% Accepts an atom or string with codes in the range 0..255. 245% Unifies the result with a string. Data is stored as a 246% sequence of bytes in RocksDB. 247% - int32 248% Maps to a Prolog integer in the range 249% -2,147,483,648...2,147,483,647. Stored as a 4 bytes in 250% native byte order. 251% - int64 252% Maps to a Prolog integer in the range 253% -9223372036854775808..9223372036854775807 Stored as a 8 254% bytes in native byte order. 255% - float 256% Value is mapped to a 32-bit floating point number. 257% - double 258% Value is mapped to a 64-bit floating point number (double). 259% - term 260% Stores any Prolog term. Stored using PL_record_external(). 261% The PL_record_external() function serializes the internal 262% structure of a term, including _cycles_, _sharing_ and 263% _attributes_. This means that if the key is a term, it 264% only matches if the the same cycles and sharing is 265% used. For example, `X = f(a), Key = k(X,X)` is a different 266% key from `Key = k(f(a),f(a))` and `X = [a|X]` is a 267% different key from `X = [a,a|X]`. Applications for which 268% such keys should match must first normalize the key. 269% Normalization can be based on term_factorized/3 from 270% library(terms). 271% In addition, `value` accepts one of list(type) or set(type), 272% currently only for the numeric types. This causes 273% rocks_put/3 and rocks_get/3 to exchange the value as a 274% list and installs a built-in merge function. 275% - merge(:Goal) 276% Define RocksDB value merging. See rocks_merge/3. 277% - mode(+Mode) 278% One of `read_write` (default) or `read_only`. The latter 279% uses OpenForReadOnly() to open the database. It is allowed 280% to have multiple `read_only` opens, but only one 281% `read_write` (which also precludes having any `read_only`); 282% however, it is recommended to only open a databse once. 283% - debug(true) Output more information when displaying 284% the rocksdb "blob". 285% @see https://github.com/facebook/rocksdb/wiki/RocksDB-Tuning-Guide 286% @see http://rocksdb.org/blog/2018/08/01/rocksdb-tuning-advisor.html 287% @see https://github.com/EighteenZi/rocksdb_wiki/blob/master/RocksDB-Tuning-Guide.md 288 289% 290% @bug You must call rocks_close(Directory) to ensure clean shutdown 291% Failure to call rdb_close/1 usually doesn't result in data 292% loss because rocksdb can recover, depending on the setting of 293% the `sync` option. However, it is recommended that you do a 294% clean shutdown if possible, such as using at_halt/1 or 295% setup_call_cleanup/3 is used to ensure clean shutdown. 296 297% @see https://github.com/facebook/rocksdb/wiki/Known-Issues 298 299rocks_open(Dir, DB, Options0) :- 300 meta_options(is_meta, Options0, Options), 301 absolute_file_name(Dir, DirAbs), 302 rocks_open_(DirAbs, DB, Options). 303 304is_meta(merge).
rocks_close/1 throws an existence error if RocksDB isn't a valid handle or alias from rocks_open/3. If RocksDB is an anonymous handle that has been closed, rocks_close/1 silently succeeds; if it's an alias name that's already been closed, an existence error is raised (this behavior may change in future).
If you call rocks_close/1 while there is an iterator open (e.g., from rocks_enum/3 that still has a choicepoint), the results are unpredicatable. The code attempts to avoid crashes by reference counting iterators and only allowing a close if there are no active iterators for a database.
alias
option and unify RocksDb with the underlying handle; fails if
there is no open file with the alias Name.
This predicate has two uses:
rocks_open(...,RocksDB,[alias(Name)])
unifies
RocksDB with Name; if alias(Name)
is not specified, RocksDB
is unified with the underlying handle.list(Type)
or set(Type)
, Value must be a list. For
set(Type)
the list is converted into an ordered set.
355rocks_put(RocksDB, Key, Value) :-
356 rocks_put(RocksDB, Key, Value, []).
list(Type)
or set(Type)
to be used when opening the database.
Using rocks_merge/3 rather than rocks_get/2, update and
rocks_put/3 makes the operation atomic and reduces disk
accesses.
Options are the same as for rocks_put/4.
Merger is called as below, where two clauses are required:
one with How set to partial
and one with How set to
full
. If full
, MergeValue is a list of values that need
to be merged, if partial
, MergeValue is a single value.
call(:Merger, +How, +Key, +Value0, +MergeValue, -Value)
If Key is not in RocksDB, Value0 is unified with a value that
depends on the value type. If the value type is an atom, it is
unified with the empty atom; if it is string
or binary
it is
unified with an empty string; if it is int32
or int64
it is
unified with the integer 0; and finally if the type is term
it
is unified with the empty list.
For example, if the value is a set of Prolog values we open the
database with value(term)
to allow for Prolog lists as value and
we define merge_set/5 as below.
merge(partial, _Key, Left, Right, Result) :- ord_union(Left, Right, Result). merge(full, _Key, Initial, Additions, Result) :- append([Initial|Additions], List), sort(List, Result).
If the merge callback fails or raises an exception the merge operation fails and the error is logged through the RocksDB logging facilities. Note that the merge callback can be called in a different thread or even in a temporary created thread if RocksDB decides to merge remaining values in the background.
408rocks_merge(RocksDB, Key, Value) :-
409 rocks_merge(RocksDB, Key, Value, []).
Options are the same as for rocks_put/4.
418rocks_delete(RocksDB, Key) :-
419 rocks_delete(RocksDB, Key, []).
list(Type)
or set(Type)
this
returns a Prolog list.
428rocks_get(RocksDB, Key, Value) :-
429 rocks_get(RocksDB, Key, Value, []).
list(Type)
or set(Type)
Value is a list.
Options are the same as for rocks_get/4.
440rocks_enum(RocksDB, Key, Value) :-
441 rocks_enum(RocksDB, Key, Value, []).
atom
, string
or binary
. To only
iterate all keys with Prefix, use rocks_enum_prefix/4 or the
construct below.
Options are the same as for rocks_get/4.
rocks_enum_from(DB, Key, Value, Prefix), ( sub_atom(Key, 0, _, _, Prefix) -> handle(Key, Value) ; !, fail )
461rocks_enum_from(RocksDB, Key, Value, Prefix) :-
462 rocks_enum_from(RocksDB, Key, Value, Prefix, []).
Options are the same as for rocks_get/4.
474rocks_enum_prefix(RocksDB, Suffix, Value, Prefix) :-
475 rocks_enum_prefix(RocksDB, Suffix, Value, Prefix, []).
Options are the same as for rocks_put/4.
Actions is a list of:
The following example is translated from the RocksDB documentation:
rocks_get(RocksDB, key1, Value), rocks_batch(RocksDB, [ delete(key1), put(key2, Value) ])
504rocks_batch(RocksDB, Actions) :-
505 rocks_batch(RocksDB, Actions, []).
510rocks_property(RocksDB, Property) :- 511 var(Property), !, 512 rocks_property(P), 513 rocks_property(RocksDB, P, Value), 514 Property =.. [P,Value]. 515rocks_property(RocksDB, Property) :- 516 Property =.. [P,Value], !, 517 rocks_property(RocksDB, P, Value). 518rocks_property(_RocksDB, Property) :- 519 type_error(property, Property). 520 521rocks_property(estimate_num_keys)
RocksDB interface
RocksDB is an embeddable persistent key-value store for fast storage. The store can be used only from one process at the same time. It may be used from multiple Prolog threads though. This library provides a SWI-Prolog binding for RocksDB. RocksDB just associates byte arrays. This interface defines several mappings between Prolog datastructures and byte arrays that may be configured to store both keys and values. See rocks_open/3 for details.