diff --git a/NEWS b/NEWS index bde2bcd..57ae1eb 100644 --- a/NEWS +++ b/NEWS @@ -1,9 +1,77 @@ -GNU dbm NEWS -- history of user-visible changes. 2021-06-17 +GNU dbm NEWS -- history of user-visible changes. 2021-09-02 Copyright (C) 1990-2021 Free Software Foundation, Inc. See the end of file for copying conditions. Please send gdbm bug reports to . +Version 1.21, 2021-09-02 + +* Crash tolerance + +By default it is possible for an abrupt crash (e.g., power failure, +OS kernel panic, or application process crash) to corrupt the gdbm +database file. A new Linux-only mechanism enables applications to +recover the database state corresponding to the most recent +successful gdbm_sync() call before the crash. See the chapter 17 +"Crash Tolerance" in the GDBM manual. + +* New database file format: numsync + +The new "numsync" database format is designed to better support +crash tolerance. To create a database in numsync format, the gdbm_open +(or gdbm_fd_open) function must be given the GDBM_NEWDB|GDBM_NUMSYNC +flags. The GDBM_NUMSYNC flag also takes effect when used together +with GDBM_WRCREAT, provided that the new file is created. + +New function gdbm_convert() is provided for converting the databases +from standard GDBM format to numsync and vice versa. + +The gdbmtool tool can also be used for converting databases between +these two formats. + +* Changes in gdbmtool + +** Fix string output in non-ASCII encodings + +Printable multi-byte sequences are correctly represented on output. +This also fixes octal representation of unprintable characters. + +** The filename variable + +This variable supplies the name of database file for use in "open" +command, if the latter is called without arguments. If "open" is +called with the file name argument, the "filename" variable is +initialized to this value. + +** The fd variable + +If set, its value must be an open file descriptor referring to a +GDBM database file. The "open" command will use gdbm_fd_open +function to use this file. Upon closing the database, this +descriptor will be closed and the variable will be unset. + +The file descriptor to use can also be supplied using the +-d (--db-descriptor) command line option. + +** The format variable + +Defines the format in which new databases will be created. Allowed +values are: "standard" (default) and "numsync". + +** New commands: upgrade and downgrade + +The "upgrade" command converts current database to the numsync +(extended) format. The "downgrade" command converts current database +to the standard format. + +** New command: snapshot + +The "snapshot" command is part of the new crash tolerance support. +Given the names of two snapshot files, it analyzes them and selects +the one to be used for database recovery. See the GDBM manual, +section 17.5 "Manual crash recovery" for a detailed discussion of its +use. + Version 1.20, 2021-06-17 * New bucket cache diff --git a/README b/README index cfac716..14c97a9 100644 --- a/README +++ b/README @@ -21,7 +21,7 @@ the documentation can be accessed by running `man gdbm' and * Overview -GNU dbm is a set of database routines that use extendible hashing and +GNU dbm is a set of database routines that use extendable hashing and works similar to the standard UNIX dbm routines. The library provides also an optional compatibility layer for UNIX-like dbm and ndbm calls. diff --git a/THANKS b/THANKS index 081f753..df48524 100644 --- a/THANKS +++ b/THANKS @@ -11,4 +11,5 @@ Jakub Bogusz Lionel Debroux Matthew Burgess Tanaka Akira +Terence Kelly Thomas Klausner diff --git a/configure.ac b/configure.ac index 8e4e1de..d174978 100644 --- a/configure.ac +++ b/configure.ac @@ -15,7 +15,7 @@ # along with GDBM. If not, see . */ m4_define([_GDBM_VERSION_MAJOR], 1) -m4_define([_GDBM_VERSION_MINOR], 20) +m4_define([_GDBM_VERSION_MINOR], 21) AC_INIT([gdbm], _GDBM_VERSION_MAJOR._GDBM_VERSION_MINOR[]m4_ifdef([_GDBM_VERSION_PATCH],._GDBM_VERSION_PATCH), @@ -160,6 +160,30 @@ fi AM_CONDITIONAL([GDBM_COND_READLINE], [test "$status_readline" = "yes"]) +# CoW crash tolerance support +AC_ARG_ENABLE([crash-tolerance], + AC_HELP_STRING( + [--enable-crash-tolerance], + [Enable crash tolerance, based on clone/copy-on-write (needs support for FICLONE)]), + [status_ficlone=$enableval], + [status_ficlone=probe]) + +if test $status_ficlone = probe; then + AC_CHECK_HEADER([linux/fs.h], + [AC_MSG_CHECKING([for FICLONE ioctl support]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + #include + #include ], + [ioctl(0, FICLONE, 1);])], + [status_ficlone=yes], + [status_ficlone=no]) + AC_MSG_RESULT($status_ficlone)], + [status_ficlone=no]) +fi +if test $status_ficlone = yes; then + AC_DEFINE([GDBM_FAILURE_ATOMIC], 1, [Define if support for atomic failures is enabled]) +fi + # Additional debugging AC_ARG_ENABLE([debug], AC_HELP_STRING([--enable-debug], @@ -205,6 +229,7 @@ Compatibility library ......................... $status_compat Memory mapped I/O ............................. $mapped_io GNU Readline .................................. $status_readline Debugging support ............................. $status_debug +Reflink crash tolerance ....................... $status_ficlone ******************************************************************* EOF @@ -223,7 +248,8 @@ fi mapped_io=$mapped_io status_readline=$status_readline status_debug=$status_debug -want_gdbmtool_debug=$want_gdbmtool_debug]) +want_gdbmtool_debug=$want_gdbmtool_debug +status_ficlone=$status_ficlone]) AC_CONFIG_FILES([Makefile src/Makefile diff --git a/doc/Makefile.am b/doc/Makefile.am index 674087d..dfb1a2a 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -23,7 +23,7 @@ gdbm_TEXINFOS=\ dist_man_MANS = gdbm.3 gdbm_dump.1 gdbm_load.1 gdbmtool.1 GENDOCS = perl gendocs.pl -EXTRA_DIST = gendocs.pl webdoc.init +EXTRA_DIST = gendocs.pl webdoc.init htmlxref.cnf TEXI2DVI=texi2dvi -t '@set $(RENDITION)' diff --git a/doc/gdbm.3 b/doc/gdbm.3 index cfef634..6f569dc 100644 --- a/doc/gdbm.3 +++ b/doc/gdbm.3 @@ -13,7 +13,7 @@ .\" .\" You should have received a copy of the GNU General Public License .\" along with GDBM. If not, see . */ -.TH GDBM 3 "March 21, 2021" "GDBM" "GDBM User Reference" +.TH GDBM 3 "July 31, 2021" "GDBM" "GDBM User Reference" .SH NAME GDBM \- The GNU database manager. Includes \fBdbm\fR and \fBndbm\fR compatibility. @@ -63,6 +63,12 @@ compatibility. .br .BI "int gdbm_avail_verify (GDBM_FILE " dbf ");" .PP +.SS Crash Tolerance (see below): +.PP +.BI "int gdbm_failure_atomic (GDBM_FILE " dbf ", const char *" even ", const char *" odd ");" +.br +.BI "int gdbm_latest_snapshot (const char *" even ", const char *" odd ", const char **" result ");" +.PP .SS DBM Compatibility routines: .PP .B #include @@ -425,6 +431,25 @@ All users in compatibility mode are assumed to be writers. If the also try to open it as a reader. All returned pointers in datum structures point to data that \fBgdbm\fR WILL free. They should be treated as static pointers (as standard UNIX \fBdbm\fR does). + +.SH CRASH TOLERANCE + +By default \fBGNU dbm\fR does not protect the integrity of its +databases from corruption or destruction due to failures such as +power outages, operating system kernel panics, or application process +crashes. Such failures could damage or destroy the underlying +database. + +Starting with release 1.21 \fBGNU dbm\fR includes a mechanism that, +if used correctly, enables post-crash recovery to a consistent state +of the underlying database. This mechanism requires OS and +filesystem support and must be requested when \fBgdbm\fR is compiled. +The crash-tolerance mechanism is a "pure opt-in" feature, in the +sense that it has no effects whatsoever except on those applications +that explicitly request it. For details, see the chapter +.B "Crash Tolerance" +in the +.BR "GDBM manual" . .SH LINKING This library is accessed by specifying \fI\-lgdbm\fR as the last parameter to the compile line, e.g.: @@ -452,7 +477,8 @@ Send bug reports to . .BR gdbm_load (1), .BR gdbmtool (1). .SH AUTHORS -by Philip A. Nelson, Jason Downs and Sergey Poznyakoff. +by Philip A. Nelson, Jason Downs and Sergey Poznyakoff; +crash tolerance by Terence Kelly. .SH COPYRIGHT Copyright \(co 1990 - 2021 Free Software Foundation, Inc. @@ -488,6 +514,11 @@ You may contact the current maintainers by: and e-mail: gray@gnu.org +For questions and feedback regarding crash tolerance, you may contact +Terence Kelly at: +.br + e-mail: tpkelly @ { acm.org, cs.princeton.edu, eecs.umich.edu } + .\" Local variables: .\" eval: (add-hook 'write-file-hooks 'time-stamp) .\" time-stamp-start: ".TH GDBM 3 \"" diff --git a/doc/gdbm.texi b/doc/gdbm.texi index 1aec85a..ee24b48 100644 --- a/doc/gdbm.texi +++ b/doc/gdbm.texi @@ -1,5 +1,4 @@ \input texinfo @c -*- Texinfo -*- -@comment $Id$ @comment %**start of header (This is for running Texinfo on a region.) @setfilename gdbm.info @include version.texi @@ -63,8 +62,8 @@ Documentation License.'' @sp 2 @center by Philip A. Nelson, Jason Downs and Sergey Poznyakoff @sp 4 -@center Manual by Pierre Gaumond, Philip A. Nelson, Jason Downs -@center and Sergey Poznyakoff +@center Manual by Pierre Gaumond, Philip A. Nelson, Jason Downs, +@center Sergey Poznyakoff, and Terence Kelly @sp 1 @center Edition @value{EDITION} @sp 1 @@ -83,18 +82,16 @@ Documentation License.'' @ifnottex @node Top -@top The GNU database manager. +@top The GNU database manager GNU @command{dbm} is a library of functions implementing a hashed database -on a disk file. This manual documents GNU @command{dbm} Version @value{VERSION} -(@code{gdbm}). The software was originally written by Philip A.@: -Nelson. This document was originally written by Pierre Gaumond from -texts written by Phil. +on a disk file. This manual documents GNU @command{dbm} Version +@value{VERSION} (@code{gdbm}). The software was originally written by +Philip A.@: Nelson. This document was originally written by Pierre +Gaumond from texts written by Phil. @end ifnottex @menu -Introduction: - * Copying:: Your rights. * Intro:: Introduction to GNU dbm. @@ -109,20 +106,23 @@ Functions: * Sequential:: Sequential access to records. * Reorganization:: Database reorganization. * Sync:: Insure all writes to disk have competed. +* Database format:: GDBM database formats. * Flat files:: Export and import to Flat file format. * Errors:: Error handling. +* Database consistency:: Structural and logical consistency. * Recovery:: Recovery from fatal errors. +* Crash Tolerance:: Ensuring recovery to a consistent state. * Options:: Setting internal options. * Locking:: File locking. * Variables:: Useful global variables. -* Additional functions:: +* Additional functions:: Functions for verifying internal structures. * Error codes:: Error codes returned by @code{gdbm} calls. * Compatibility:: Compatibility with UNIX dbm and ndbm. Programs * gdbmtool:: Examine and modify a GDBM database. -* gdbm_dump:: Dump the database into a flat file. +* gdbm_dump:: Dump the database into a flat file. * gdbm_load:: Load the database from a flat file. * Exit codes:: Exit codes returned by GDBM utilities. @@ -138,10 +138,32 @@ Other topics: * This Manual in Other Formats:: @end ifhtml @end ifset + +@detailmenu + --- The Detailed Node Listing --- + +Compatibility with standard @command{dbm} and @command{ndbm} + +* ndbm:: NDBM interface functions. +* dbm:: DBM interface functions. + +Examine and modify a GDBM database + +* invocation:: +* shell:: + +gdbmtool interactive mode + +* variables:: shell variables. +* commands:: shell commands. +* definitions:: how to define structured data. +* startup files:: + +@end detailmenu @end menu @node Copying -@chapter Copying Conditions. +@chapter Copying Conditions This library is @dfn{free}; this means that everyone is free to use it and free to redistribute it on a free basis. GNU @command{dbm} (@code{gdbm}) is not in the public domain; it is copyrighted and there @@ -176,7 +198,7 @@ Public License.) A copy the GNU General Public License is included with the distribution of @code{gdbm}. @node Intro -@chapter Introduction to GNU @command{dbm}. +@chapter Introduction to GNU @command{dbm} GNU @command{dbm} (@code{gdbm}) is a library of database functions that use extensible hashing and works similar to the standard UNIX @command{dbm} @@ -198,7 +220,10 @@ typedef struct @} datum; @end example -This structure allows for arbitrary sized keys and data items. +This structure allows for arbitrary sized keys and data items. In +particular, zero-length keys or data (@code{dsize = 0}) are allowed. +However, the @code{dptr} field is required to point to a valid memory +location. In other words, @code{dptr} cannot be NULL. The key/data pairs are stored in a @code{gdbm} disk file, called a @dfn{gdbm database}. An application must open a @code{gdbm} database @@ -206,9 +231,14 @@ to be able manipulate the keys and data contained in the database. @code{gdbm} allows an application to have multiple databases open at the same time. When an application opens a @code{gdbm} database, it is designated as a @code{reader} or a @code{writer}. A @code{gdbm} -database can be opened by at most one writer at a time. However, many readers -may open the database simultaneously. Readers and writers can not -open the @code{gdbm} database at the same time. +database can be opened by at most one writer at a time. However, many +readers may open the database simultaneously. Readers and writers can +not open the @code{gdbm} database at the same time. + +Speaking about @dfn{application} we usually mean a separate process. +However, it is entirely normal for a multi-thread program to operate +as a @code{gdbm} reader in one thread and writer in another, provided, +of course, that the two threads don't operate on the same database. @flindex gdbm.h To use the @code{gdbm} functions, the programmer must first include @@ -216,7 +246,7 @@ the header file @file{gdbm.h}. @tpindex GDBM_FILE This file defines, among others, the @code{GDBM_FILE} data type, an -opaque pointer to the structure that represents the opened gdbm +opaque pointer to the structure that represents the opened @code{gdbm} database. To access the database, the programmer must first open it using the @code{gdbm_open} function. The function takes several arguments, the name of the database file being one of them, and @@ -241,7 +271,7 @@ main (int argc, char **argv) datum key, content; /* Key and content data */ int status = 0; /* Exit status of the program: 0 - OK, 1 - key not found, 2 - error. */ - + /* * Validate arguments. */ @@ -254,7 +284,7 @@ main (int argc, char **argv) /* * Open the database. The GDBM_READER flag indicates that we only intend * to read from it. - */ + */ gdbf = gdbm_open (argv[1], 0, GDBM_READER, 0, NULL); if (gdbf == NULL) @{ @@ -339,7 +369,7 @@ no such key @end example @node Open -@chapter Opening the database. +@chapter Opening the database @cindex opening the database @cindex database, opening or creating @@ -362,7 +392,7 @@ memory. This parameter is ignored if the file has been previously initialized. If the value is less than 512, the file system block size is used instead. The size is adjusted so that the block can hold exact number of directory entries, so that the effective block size -can be slightly greater than requested. However, if the +can be slightly greater than requested. However, if the @code{GDBM_BSEXACT} flag is set and the size needs to be adjusted, the function will return with error status, setting the @code{gdbm_errno} variable to @code{GDBM_BLOCK_SIZE_ERROR}. @@ -381,20 +411,22 @@ to @code{GDBM_WRCREAT}, the user wants both read and write access to the database and wants it created if it does not already exist. If @code{flags} is set to @code{GDBM_NEWDB}, the user want a new database created, regardless of whether one existed, and wants read and write -access to the new database. +access to the new database. If an existing database file is opened with +the @code{GDBM_NEWDB} flag, the existing data are destroyed, and an +empty database structure is created in its place. -@kwindex GDBM_SYNC -@kwindex GDBM_NOLOCK -@kwindex GDBM_NOMMAP The following constants may also be logically or'd into the database flags: @table @code @kwindex GDBM_SYNC @item GDBM_SYNC -Synchronize all database operations to disk immediately. This -provides for the best database consistency at the expense of severe -performance degradation. +Synchronize all database operations to disk immediately. Notice, that +this option entails severe performance degradation and does not +necessarily ensure that the resulting database state is consistent. +In general, we discourage its use (@pxref{Sync}). +@xref{Crash Tolerance}, for a discussion of how to ensure database +consistency with minimal performance overhead. @kwindex GDBM_FAST @item GDBM_FAST @@ -402,6 +434,14 @@ A reverse of @code{GDBM_SYNC}. Synchronize writes only when needed. This is the default. The flag is provided for compatibility with previous versions of @command{GDBM}. +@kwindex GDBM_NUMSYNC +@item GDBM_NUMSYNC +Useful only together with @code{GDBM_NEWDB}, this bit instructs +@code{gdbm_open} to create new database in @dfn{extended database +format}, suitable for effective crash recovery. @xref{Numsync}, for a +detailed discussion of this format, and @ref{Crash Tolerance}, for a +discussion of crash recovery. + @kwindex GDBM_NOLOCK @item GDBM_NOLOCK Don't lock the database file. Use this flag if you intend to do @@ -417,7 +457,7 @@ When mapping GDBM file to memory, read its contents immediately, instead of when needed (@dfn{prefault reading}). This can be advantageous if you open a @emph{read-only} database and are going to do a lot of look-ups on it. In this case entire database will be -pre-read and look-ups will operate on an in-memory copy. In the +pre-read and look-ups will operate on an in-memory copy. In contrast, @code{GDBM_PREREAD} should not be used if you open a database (even in read-only mode) only to do a couple of look-ups. Finally, never use @code{GDBM_PREREAD} when opening a database for @@ -438,14 +478,8 @@ variable to @code{GDBM_BLOCK_SIZE_ERROR} and return @code{NULL}. @cindex close-on-exec @item GDBM_CLOEXEC Set the close-on-exec flag on the database file descriptor. The -@code{libc} must support the @code{O_CLOEXEC} flag@footnote{ -@ifhtml -(@uref{http://www.manpagez.com/man/2/open, open(2)}) -@end ifhtml -@ifnothtml -@xref{open,,,open(2),open(2) man page} -@end ifnothtml -} +@code{libc} must support the @code{O_CLOEXEC} flag +(@pxref{O_CLOEXEC,,,open(2),open(2) man page}). @kwindex GDBM_XVERIFY @item GDBM_XVERIFY @@ -458,21 +492,8 @@ on large databases, it can slow down the opening process. @end table @item mode -File mode@footnote{See -@ifhtml -@uref{http://www.manpagez.com/man/2/chmod, chmod(2)}, -@end ifhtml -@ifnothtml -@xref{chmod,,change permissions of a file,chmod(2), -chmod(2) man page}, -@end ifnothtml -and -@ifhtml -@uref{http://www.manpagez.com/man/2/open, open(2)}), -@end ifhtml -@ifnothtml -@ref{open,,open a file,open(2), open(2) man page}.}, -@end ifnothtml +File mode@footnote{@xref{chmod,,,chmod(2),chmod(2) man page}, +and @xref{open,,open a file,open(2), open(2) man page}.}, which is used if the file is created. @item fatal_func A function for @code{gdbm} to call if it detects a fatal error. The only @@ -490,6 +511,7 @@ In all of the following calls, the parameter @var{dbf} refers to the pointer returned from @code{gdbm_open}. @end deftypefn +@anchor{gdbm_fd_open} @deftypefn {gdbm interface} GDBM_FILE gdbm_fd_open (int @var{fd},@ const char *@var{name}, int @var{block_size}, @ int @var{flags}, int @var{mode}, void (*@var{fatal_func})(const char *)) @@ -507,12 +529,13 @@ Copy file ownership and mode from @var{src} to @var{dst}. @end deftypefn @node Close -@chapter Closing the database. +@chapter Closing the database @cindex closing database @cindex database, closing -It is important that every file opened is also closed. This is needed to -update the reader/writer count on the file: +It is important that every file opened is also closed. This is needed +to properly update its disk structure and maintain a consistent +locking state on the file. @deftypefn {gdbm interface} int gdbm_close (GDBM_FILE @var{dbf}) This function closes the @code{gdbm} file and frees all memory @@ -548,7 +571,7 @@ and returns -1. @end deftypefn @node Store -@chapter Inserting and replacing records in the database. +@chapter Inserting and replacing records in the database @cindex storing records @cindex records, storing @@ -581,18 +604,58 @@ This function can return the following values: @item 0 Success. The value of @var{content} is keyed by @var{key} in the database. + @item -1 -The item was not stored in the database because the caller was not an -official writer or either @var{key} or @var{content} have a -@code{NULL} @code{dptr} field. +An error occurred which prevented the item from being stored in the +database. Examine the @code{gdbm_errno} variable to determine the +actual cause of the error. -Both @var{key} and @var{content} must have the @code{dptr} field be a -non-@code{NULL} value. Since a @code{NULL} @code{dptr} field is used by -other functions to indicate an error, it cannot be valid data. @item +1 The item was not stored because the argument @var{flag} was @code{GDBM_INSERT} and the @var{key} was already in the database. +The @code{gdbm_errno} variable is set to @code{GDBM_CANNOT_REPLACE}. +@end table + +If the function returns -1, @code{gdbm_errno} can have the following +values: + +@table @code +@item GDBM_READER_CANT_STORE +Database was open in read-only mode, i.e. with the @code{GDBM_READER} +flag. @xref{Open}. + +@item GDBM_MALFORMED_DATA +Either @var{key} or @var{content} had their @code{dptr} field set to +@code{NULL}. + +It is OK to have a @dfn{zero-length} key or content, i.e. a datum with +@code{dsize} set to 0, but the @code{dptr} field must always be a +non-NULL value. + +@item GDBM_BAD_HASH_TABLE +Database hash table is malformed. This usually means that some error +in the application or the library caused memory overrun. The database +is marked as needing recovery. All further calls on this database +will return with @code{gdbm_error} set to @code{GDBM_NEED_RECOVERY}. +@xref{Recovery}, for a discussion of database recovery process. + +@item GDBM_BAD_DIR_ENTRY +Database directory entry is corrupted. The database is marked as +needing recovery. @xref{Recovery}. + +@item GDBM_BAD_BUCKET +Database bucket is corrupted. The database is marked as +needing recovery. @xref{Recovery}. + +@item GDBM_BAD_AVAIL +Database available storage index is corrupted. The database is marked as +needing recovery. @xref{Recovery}. + +@item GDBM_FILE_SEEK_ERROR +A seek error occurred on the underlying disk file. Examine the system +@code{errno} variable for more detail. @end table + @end deftypefn If you store data for a @var{key} that is already in the data base, @@ -605,7 +668,7 @@ value for an object of type @code{int} (type of the @code{dsize} member of @code{datum}). @node Fetch -@chapter Searching for records in the database. +@chapter Searching for records in the database @cindex fetching records @cindex looking up records @cindex record, fetching @@ -670,7 +733,7 @@ The search key. @end deftypefn @node Delete -@chapter Removing records from the database. +@chapter Removing records from the database @cindex deleting records @cindex record, deleting @@ -695,7 +758,7 @@ requester is a reader. The return of @code{0} marks a successful delete. @end deftypefn @node Sequential -@chapter Sequential access to records. +@chapter Sequential access to records @cindex sequential access @cindex iterating over records @cindex records, iterating over @@ -791,7 +854,7 @@ will not be visited if a loop like the following is executed: @end example @node Reorganization -@chapter Database reorganization. +@chapter Database reorganization @cindex database reorganization @cindex reorganization, database @@ -820,19 +883,23 @@ correct information about the new file. If an error is detected, the return value is negative. The value zero is returned after a successful reorganization. +@emph{Notice}, that calling this function disables crash tolerance, +reverting the effect of the recent @code{gdbm_failure_atomic} call. +You will have to call @code{gdbm_failure_atomic} again after +@code{gdbm_reorganize} returns successfully. This will be fixed in +future releases. + @node Sync @chapter Database Synchronization @cindex database synchronization @cindex synchronization, database -@kwindex GDBM_SYNC -Unless your database was opened with the @code{GDBM_SYNC} flag, -@code{gdbm} does not wait for writes to be flushed to the disk before -continuing. This allows for faster writing of databases at the risk -of having a corrupted database if the application terminates in an -abnormal fashion. The following function allows the programmer to -make sure the disk version of the database has been completely updated -with all changes to the current time. +Normally, @command{GDBM} functions don't flush changed data to the +disk immediately after a change. This allows for faster writing of +databases at the risk of having a corrupted database if the +application terminates in an abnormal fashion. The following function +allows the programmer to make sure the disk version of the database +has been completely updated with all changes to the current time. @deftypefn {gdbm interface} int gdbm_sync (GDBM_FILE @var{dbf}) Synchronizes the changes in @var{dbf} with its disk file. The @@ -840,6 +907,15 @@ parameter is a pointer returned by @code{gdbm_open}. This function would usually be called after a complete set of changes have been made to the database and before some long waiting time. +This set of changes should preserve application-level invariants. In +other words, call @code{gdbm_sync} only when the database is in a +consistent state with regard to the application logic, a state from +which you are willing and able to recover. You can think about all +database operations between two consecutive @code{gdbm_sync} calls as +constituting a single @dfn{transaction}. @xref{Synchronizing the +Database}, for a detailed discussion about how to properly select +the synchronization points. + The @code{gdbm_close} function automatically calls the equivalent of @code{gdbm_sync} so no call is needed if the database is to be closed immediately after the set of changes have been made. @@ -849,6 +925,57 @@ immediately after the set of changes have been made. describing the error and returns -1. @end deftypefn +@kwindex GDBM_SYNC +Opening the database with @code{GDBM_SYNC} flag ensures that +@code{gdbm_sync} function will be called after each change, thereby +flushing the changes to disk immediately. You are advised against +using this flag, however, because it incurs a severe performance +penalty, while giving only a moderate guarantee that the +@emph{structural} consistency of the database will be preserved in case +of failure, and that only unless the failure occurs while being in the +@code{fsync} call. For the ways to ensure proper @emph{logical} consistency +of the database, see @ref{Crash Tolerance}. + +@node Database format +@chapter Changing database format +As of version @value{VERSION}, @command{GDBM} supports databases in +two formats: @dfn{standard} and @dfn{extended}. The standard format +is used most often. The @dfn{extended} database format is used to +provide additional crash resistance (@pxref{Crash Tolerance}). + +Depending on the value of the @var{flags} parameter in a call to +@code{gdbm_open} (@pxref{Open}), a database can be created in either +format. + +The format of an existing database can be changed using the +@code{gdbm_convert} function: + +@deftypefn {gdbm interface} int gdbm_convert (GDBM_FILE @var{dbf}, @ + int @var{flag}) +Changes the format of the database file @var{dbf}. Allowed values for +@var{flag} are: + +@table @code +@item 0 +Convert database to the standard format. + +@kwindex GDBM_NUMSYNC +@item GDBM_NUMSYNC +Convert database to the extended @dfn{numsync} format (@pxref{Numsync}). +@end table + +On success, the function returns 0. In this case, it should be +followed by a call to @code{gdbm_sync} (@pxref{Sync}) or +@code{gdbm_close} (@pxref{Close}) to ensure the changes are written to +the disk. + +On error, returns -1 and sets the @code{gdbm_errno} variable +(@pxref{Variables, gdbm_errno}). + +If the database is already in the requested format, the function +returns success (0) without doing anything. +@end deftypefn + @node Flat files @chapter Export and Import @cindex Flat file format @@ -863,18 +990,27 @@ stored. Both formats can be used, for example, to migrate between the different versions of @code{gdbm} databases. Generally speaking, flat files are safe to send over the network, and can be used to recreate the database on another machine. The recreated database is -guaranteed to be a byte-to-byte equivalent of the database from which -the flat file was created. This does not necessarily mean, however, -that this file can be used in the same way as the original one. For -example, if the original database contained non-@acronym{ASCII} data -(e.g.@: @acronym{C} structures, integers etc.), the recreated database -can be of any use only if the target machine has the same integer -size and byte ordering as the source one and if its @acronym{C} -compiler uses the same packing conventions as the one which generated -@acronym{C} which populated the original database. In general, such -binary databases are not portable between machines, unless you follow -some stringent rules on what data is written to them and how it is -interpreted. +guaranteed to have the same format and contain the same set of +key/value pairs as the database from which the flat file was created. +However, it will not constitute a byte-to-byte equivalent of the latter. +Various internal structures in the database can differ. In +particular, ordering of key/value pairs can be different and the table +of available file space will most probably differ, too. For databases +in extended format, the @code{numsync} counter will be reset to 0 +(@pxref{Numsync}). These details are not visible to the application +programmer, and are mentioned here only for completeness sake. + +The fact that the restored database contains the same set of key/value +pairs does not necessarily mean, however, that it can be used in the +same way as the original one. For example, if the original database +contained non-@acronym{ASCII} data (e.g.@: @acronym{C} structures, +integers etc.), the recreated database can be of any use only if the +target machine has the same integer size and byte ordering as the +source one and if its @acronym{C} compiler uses the same packing +conventions as the one which generated @acronym{C} which populated the +original database. In general, such binary databases are not portable +between machines, unless you follow some stringent rules on what data +is written to them and how it is interpreted. The GDBM version @value{VERSION} supports two flat file formats. The @dfn{binary} flat file format was first implemented in GDBM version @@ -924,14 +1060,7 @@ If @var{flag} is @code{GDBM_NEWDB}, the function will create a new output file, replacing it if it already exists. @item mode -The permissions to use when creating the output file. -@ifhtml -See @uref{http://www.manpagez.com/man/2/open, open(2)}, -@end ifhtml -@ifnothtml -See @ref{open,,open a file,open(2), open(2) man page}, -@end ifnothtml -for a detailed discussion. +The permissions to use when creating the output file (@pxref{open,,open a file,open(2), open(2) man page}). @end table @end deftypefn @@ -941,10 +1070,10 @@ for a detailed discussion. int @var{meta_mask}, @ unsigned long *@var{errline}) Loads data from the dump file @var{filename} into the database pointed -to by @var{pdbf}. The latter can point to @code{NULL}, in which case +to by @var{pdbf}. The latter can point to @code{NULL}, in which case the function will try to create a new database. If it succeeds, the function will return, in the memory location pointed to by @var{pdbf}, -a pointer to the newly created database. If the dump file carries no +a pointer to the newly created database. If the dump file carries no information about the original database file name, the function will set @code{gdbm_errno} to @code{GDBM_NO_DBNAME} and return @code{-1}, indicating failure. @@ -982,14 +1111,20 @@ Not enough memory to load data. Reading from @var{filename} failed. The @code{errno} variable can be used to get more detail about the failure. -@item GDBM_ILLEGAL_DATA -Input contained some illegal data. +@item GDBM_MALFORMED_DATA +@itemx GDBM_ILLEGAL_DATA +Input contained malformed data, i.e. it is not a valid @code{gdbm} +dump file. This often means that the dump file got corrupted +during the transfer. + +The @code{GDBM_ILLEGAL_DATA} is an alias for this error code, +maintained for backward compatibility. @item GDBM_ITEM_NOT_FOUND This error can occur only when the input file is in ASCII format. It indicates that the data part of the record about to be read lacked length specification. Application developers are advised to treat -this error equally as @code{GDBM_ILLEGAL_DATA}. +this error equally as @code{GDBM_MALFORMED_DATA}. @end table Mild errors mean that the function was able to successfully load and @@ -1034,7 +1169,7 @@ Format of the dump file. See the @var{format} argument to the @code{gdbm_dump} function. @end table @end deftypefn - + @deftypefn {gdbm interface} int gdbm_load_from_file (GDBM_FILE *@var{pdbf}, @ FILE *@var{fp}, int @var{replace}, int @var{meta_mask}, @ unsigned long *@var{line}) @@ -1095,7 +1230,7 @@ gdbm_load_from_file (@var{dbf}, @var{fp}, @var{flag}, 0, NULL); @end deftypefn @node Errors -@chapter Error handling. +@chapter Error handling @cindex gdbm_errno @cindex error strings @cindex global error state @@ -1108,7 +1243,7 @@ To convert this code to human-readable string, use the following function: @deftypefn {gdbm interface} {const char *} gdbm_strerror (gdbm_error @var{errno}) Converts @var{errno} (which is an integer value) into a human-readable descriptive text. Returns a pointer to a static string. The caller -must not alter or free the returned pointer. +must not free the returned pointer or alter the string it points to. @end deftypefn Detailed information about the most recent error that occurred while @@ -1169,30 +1304,71 @@ function is called upon the entry to any GDBM function. @end deftypefn Certain errors (such as write error when saving stored key) can leave -database file in inconsistent state. When such a critical error -occurs, the database file is marked as needing recovery. Subsequent -calls to any GDBM functions for that database file (except -@code{gdbm_recover}), will return immediately with GDBM error value -@code{GDBM_NEED_RECOVERY}. Additionally, the following -function can be used to check the state of the database file: +database file in inconsistent state (@pxref{Database consistency}). +When such a critical error occurs, the database file is marked as +needing recovery. Subsequent calls to any GDBM functions for that +database file (except @code{gdbm_recover}), will return immediately +with GDBM error value @code{GDBM_NEED_RECOVERY}. Additionally, the +following function can be used to check the state of the database file: @deftypefn {gdbm interface} int gdbm_needs_recovery (GDBM_FILE @var{dbf}) Returns @code{1} if the database file @var{dbf} is in inconsistent state and needs recovery. @end deftypefn -The only way to bring the database back to operational state is to -call the @code{gdbm_recover} function (@pxref{Recovery}). +To restore structural consistency of the database, use the +@code{gdbm_recover} function (@pxref{Recovery}). + +Crash tolerance provides a better way of recovery, because it restores +both structural and logical consistency. @xref{Crash Tolerance}, for +a detailed discussion, + +@node Database consistency +@chapter Database consistency + +@cindex consistency, database +In the chapters that follow we will cover different aspects of +@dfn{database consistency} and ways to maintain it. Speaking +about consistency, it is important to distinguish between two different +aspects of it: structural and logical consistency. + +@cindex structural consistency +@dfn{Structural consistency} means that all internal structures of the +database are in good order, contain valid data and are coherent with +one another. Structural consistency means that the database is in +good shape @dfn{technically}, but it does not imply that the data it +contains are in any way meaningful. + +@cindex logical consistency +@dfn{Logical consistency} means that the data stored in the +database are coherent with respect to the application logic. +Usually this implies that structural consistency is observed as well. + +For as long as the program is free from memory management errors and +each opened database is properly closed before the program terminates, +structural consistency is maintained. Maintaining logical consistency +is more complex task and its maintenance is entirely the +responsibility of the application programmer. @xref{Crash Tolerance}, +for a detailed discussion. + +Both consistency aspects can suffer as a result of both application +errors that cause the program to terminate prematurely without properly +saving the database, and hardware errors, such as disk failures or +power outages. When such situations occur, it becomes necessary to +@dfn{recover the database}. + +In the next chapter we will discuss how to recover structural +consistency of a database. @node Recovery -@chapter Recovery +@chapter Recovering structural consistency Certain errors (such as write error when saving stored key) can leave -database file in @dfn{inconsistent state}. When such a critical error -occurs, the database file is marked as needing recovery. Subsequent -calls to any GDBM functions for that database file (except -@code{gdbm_recover}), will return immediately with GDBM error value -@code{GDBM_NEED_RECOVERY}. +database file in @dfn{structurally inconsistent state}. When such a +critical error occurs, the database file is marked as needing +recovery. Subsequent calls to any GDBM functions for that database +file (except @code{gdbm_recover}), will return immediately with GDBM +error value @code{GDBM_NEED_RECOVERY}. To escape from this state and bring the database back to operational state, use the following function: @@ -1208,7 +1384,7 @@ additional statistics about the recovery process (@var{rcvr} can be Each input member has a corresponding flag bit, which must be set in @var{flags}, in order to instruct the function to use it. - + The @code{gdbm_recover} type is defined as: @example @@ -1242,14 +1418,8 @@ If the @code{GDBM_RCVR_ERRFUN} flag bit is set, @code{errfun} points to a function that will be called upon each recoverable or non-fatal error that occurred during the recovery. The @code{data} field of @code{gdbm_recovery} will be passed to it as its first argument. The -@var{fmt} argument is a -@ifhtml -@uref{http://www.manpagez.com/man/3/printf, printf(3)}-like -@end ifhtml -@ifnothtml -@code{printf}-like (@pxref{printf,,format output,printf(3), printf(2) man page}), -@end ifnothtml -format string. The rest of arguments supply parameters for that format. +@var{fmt} argument is a @code{printf}-like (@pxref{Format of the format string,,,printf(3), printf(3) man page}), format string. The rest of +arguments supply parameters for that format. @end deftypecv @deftypecv {input member} gdbm_recovery {void *} data @@ -1315,6 +1485,539 @@ The special flag bit @code{GDBM_RCVR_FORCE} instructs @code{gdbm_recovery} to omit this check and to perform database recovery unconditionally. +@node Crash Tolerance +@chapter Crash Tolerance + +Crash tolerance is a new (as of release 1.21) feature that can be +enabled at compile time, and used in environments with appropriate +support from the OS and the filesystem. As of version +@value{VERSION}, this means a Linux kernel 5.12.12 or later and +a filesystem that supports reflink copying, such as XFS, BtrFS, or +OCFS2. If these prerequisites are met, crash tolerance code will +be enabled automatically by the @command{configure} script when +building the package. + +The crash-tolerance mechanism, when used correctly, guarantees that a +logically consistent (@pxref{Database consistency}) recent state of +application data can be recovered following a crash. Specifically, it +guarantees that the state of the database file corresponding to the +most recent successful @code{gdbm_sync} call can be recovered. + +If the new mechanism is used correctly, crashes such as power +outages, OS kernel panics, and (some) application process crashes +will be tolerated. Non-tolerated failures include physical +destruction of storage devices and corruption due to bugs in +application logic. For example, the new mechanism won't help if a +pointer bug in your application corrupts @command{GDBM}'s private in-memory +data which in turn corrupts the database file. + +In the following sections we will describe how to enable crash +tolerance in your application and what to do if a crash occurs. + +The design rationale of the crash tolerance mechanism is described in +detail in the article, @cite{Crashproofing the Original NoSQL Key-Value +Store}, by Terence Kelly, @cite{ACM Queue magazine}, July/August 2021, +available from the @uref{https://queue.acm.org/DrillBits5/, ACM Digital Library}. +If you have difficulty retrieving this paper, please contact the +author at @email{tpkelly@@acm.org}, @email{tpkelly@@cs.princeton.edu}, +or @email{tpkelly@@eecs.umich.edu}. + +@node Filesystems supporting crash tolerance +@section Using Proper Filesystem + +Use a filesystem that supports reflink copying. Currently XFS, BtrFS, +and OCFS2 support reflink. You can create such a filesystem if you +don't have one already. (Note that reflink support may require that +special options be specified at the time of filesystem creation; this +is true of XFS.) The most conventional way to create a filesystem is +on a dedicated storage device. However it is also possible to create +a filesystem @emph{within an ordinary file} on some other filesystem. + +For example, the following commands, executed as root, will create a +smallish XFS filesystem inside a file on another filesystem: + +@example +mkdir XFS +cd XFS +truncate --size 512m XFSfile +mkfs -t xfs -m crc=1 -m reflink=1 XFSfile +mkdir XFSmountpoint +mount -o loop XFSfile XFSmountpoint +@end example + +The XFS filesystem is now available in directory +@file{XFSmountpoint}. Now, create a directory where your +unprivileged user account may create and delete files: + +@example +cd XFSmountpoint +mkdir test +chown @var{user}:@var{group} test +@end example + +@noindent +(where @var{user} and @var{group} are the user and group names of the +unprivileged account the application uses). + +Reflink copying via @code{ioctl(FICLONE)} should work for files in and +below this directory. You can test reflink copying using the GNU +@command{cp} program: + +@example +cp --reflink=always file1 file2 +@end example + +@xref{cp invocation, reflink, reflink, coreutils, @sc{gnu} Coreutils}. + +Your GNU dbm database file and two @dfn{snapshot} files described below must +all reside on the same reflink-capable filesystem. + +@node Enabling crash tolerance +@section Enabling crash tolerance + +Open a GNU dbm database with @code{gdbm_open}. Whenever possible, use +the extended @command{GDBM} format (@pxref{Numsync}). Generally +speaking, this means using the @code{GDBM_NUMSYNC} flag when creating +the database. Unless you know what you are doing, do not specify +the @code{GDBM_SYNC} flag when opening the database. The reason is that +you want your application to explicitly control when @code{gdbm_sync} +is called; you don't want an implicit sync on every database +operation (@pxref{Sync}). + +Request crash tolerance by invoking the following interface: + +@example +int gdbm_failure_atomic (GDBM_FILE @var{dbf}, const char *@var{even}, + const char *@var{odd}); +@end example + +The @var{even} and @var{odd} arguments are the pathnames of two files that +will be created and filled with @dfn{snapshots} of the database file. +These two files must not exist when @code{gdbm_failure_atomic} is +called and must reside on the same reflink-capable filesystem as the +database file. + +After you call @code{gdbm_failure_atomic}, every call to +@code{gdbm_sync} will make an efficient reflink snapshot of the +database file in either the @var{even} or the @var{odd} snapshot file; +consecutive @code{gdbm_sync} calls alternate between the two, hence +the names. The permission bits and @code{mtime} timestamps on the +snapshot files determine which one contains the state of the database +file corresponding to the most recent successful @code{gdbm_sync}. +@xref{Crash recovery}, for discussion of crash recovery. + +@node Synchronizing the Database +@section Synchronizing the Database + +When your application knows that the state of the database is +consistent (i.e., all relevant application-level invariants hold), +you may call @code{gdbm_sync}. For example, if your application +manages bank accounts, transferring money from one account to another +should maintain the invariant that the sum of the two accounts is the +same before and after the transfer: It is correct to decrement account +@samp{A} by $7, increment account @samp{B} by $7, and then call +@code{gdbm_sync}. However it is @emph{not} correct to call +@code{gdbm_sync} @emph{between} the decrement of @samp{A} and the +increment of @samp{B}, because a crash immediately after that call +would destroy money. The general rule is simple, sensible, and +memorable: Call @code{gdbm_sync} only when the database is in a state +from which you are willing and able to recover following a crash. (If +you think about it you'll realize that there's never any other moment +when you'd really want to call @code{gdbm_sync}, regardless of whether +crash-tolerance is enabled. Why on earth would you push the state of +an inconsistent unrecoverable database down to durable media?). + +@node Crash recovery +@section Crash recovery + +If a crash occurs, the snapshot file (@var{even} or @var{odd}) +containing the database state reflecting the most recent successful +@code{gdbm_sync} call is the snapshot file whose permission bits are +read-only and whose last-modification timestamp is greatest. If both +snapshot files are readable, we choose the one with the most recent +last-modification timestamp. Modern operating systems record +timestamps in nanoseconds, which gives sufficient confidence that the +timestamps of the two snapshots will differ. However, one can't rule +out the possibility that the two snapshot files will both be readable +and have identical timestamps@footnote{This can happen, for example, +if the storage is very fast and the system clock is low-resolution, or +if the system administrator sets the system clock backwards. In the +latter case one can end up with the most recent snapshot file having +modification time earlier than that of the obsolete snapshot.}. To +cope with this, @command{GDBM} version 1.21 introduced the new +@dfn{extended database format}, which stores in the database file +header the number of synchronizations performed so far. This number +can reliably be used to select the most recent snapshot, independently +of its timestamp. We strongly suggest using this new format when +writing crash-tolerant applications. @xref{Numsync}, for a detailed +discussion. + +The @code{gdbm_latest_snapshot} function is provided, that selects the +right snapshot among the two. Invoke it as: + +@example +@group +const char *recovery_file = NULL; +result = gdbm_latest_snapshot (even, odd, &recovery_file); +@end group +@end example + +@noindent +where @var{even} and @var{odd} are names of the snapshot files. On +success, it stores the pointer to the most recent snapshot file name +in @var{recovery_file} and returns @code{GDBM_SNAPSHOT_OK}. To +finalize the recovery, rename this file to the name of your database +file and re-open it using @code{gdbm_open}. You should discard the +remaining snapshot. + +If an error occurs, @code{gdbm_latest_snapshot} returns one of the +following error codes. + +@defvr {gdbm_latest_snapshot} GDBM_SNAPSHOT_BAD +Neither snapshot file is readable. This means that the crash has occurred +before @code{gdbm_failure_atomic} completed. In this case, it is best +to fall back on a safe backup copy of the data file. +@end defvr + +@defvr {gdbm_latest_snapshot} GDBM_SNAPSHOT_ERR +System error occurred in @code{gdbm_latest_snapshot}. Examine the +system @code{errno} variable for details. Its possible values are: + +@table @code +@item EACCES +The file mode of one of the snapshot files was incorrect. Each snapshot +file can be either readable (0400) or writable (0200), but not both. +This probably means that someone touched one or both snapshot files +after the crash and before your attempt to recover from it. This case +needs additional investigation. If you're sure that the only change +someone made to the files is altering their modes, and your database +is in @dfn{numsync} format (@pxref{Numsync}), you can reset the modes +to 0400 and retry the recovery. + +This error can also be returned by underlying @code{stat} call, +meaning that search permission was denied for one of the directories +in the path prefix of a snapshot file name. That again means that +someone has messed with permissions after the crash. + +@item EINVAL +Some arguments passed to @code{gdbm_latest_snapshot} were not valid. +It is a programmer's error which means that your application needs to be +fixed. + +@item ENOSYS +Function is not implemented. This means @code{GDBM} was built without +crash-tolerance support. + +@item Other value (@code{EBADF}, @code{EFAULT}, etc) +An error occurred when trying to @code{stat} the snapshot file. +@xref{ERRORS,,,stat(2),stat(2) man page}, for a discussion of +possible @code{errno} values. +@end table +@end defvr + +@defvr {gdbm_latest_snapshot} GDBM_SNAPSHOT_SAME +File modes and modification dates of both snapshot files are exactly +the same. This can happen only if numsync is not available +(@pxref{Numsync}). +@end defvr + +@defvr {gdbm_latest_snapshot} GDBM_SNAPSHOT_SUSPICIOUS +For the database in extended @dfn{numsync} format (@pxref{Numsync}): +the @code{numsync} values of the two snapshot differ by more than +one. Check the arguments to the @code{gdbm_latest_snapshot} function. +The most probably reason of such an error is that the @var{even} and +@var{odd} parameters point to snapshot files belonging to different +database files. +@end defvr + +If you get any of these errors, we strongly suggest to undertake +@dfn{manual recovery}. + +@node Manual crash recovery +@section Manual crash recovery + +@dfn{Manual recovery} is usually performed with the help of the +@command{gdbmtool} utility. Start @command{gdbmtool} in read-only +mode (the @option{-r}) option. Once in the command shell, issue the +following command: + +@example +snapshot @var{a} @var{b} +@end example + +@noindent +where @var{a} and @var{b} are names of the two snapshot files you +configured using the @code{gdbm_failure_atomic} function. This +command investigates both files and prints out detailed +diagnostics. + +Its output begins with a line listing one of the error codes above, +followed by a colon and a textual description of the error. The lines +that follow show details for each snapshot file. + +Each snapshot description begins with the snapshot file name followed +by a colon and four fields, in this order: + +@enumerate 1 +@item File permission bits in octal. +@item File permission bits in @command{ls -l} notation. +@item Modification timestamp. +@item Numsync counter. +For databases in standard GDBM format, this field is @samp{N/A}. If +the counter cannot be obtained because of error, this field is @samp{?}. +@end enumerate + +Any errors or inconsistencies discovered are reported in the lines +that follow, one error per line. Here's an example of the +@command{snapshot} command output, describing the +@code{GDBM_SNAPSHOT_ERR} condition: + +@example +@group +gdbmtool> snapshot even.dbf odd.dbf +GDBM_SNAPSHOT_ERR: Error selecting snapshot. +even.dbf: 200 -w------- 1627820627.485681330 ? +odd.dbf: 600 rw------- 1627820627.689503918 301 +odd.dbf: ERROR: bad file mode +@end group +@end example + +Line 2 lists the meta-data of the snapshot @file{even.dbf}. The +@code{numsync} field contains question mark because the file +permissions (write-only) prevented @command{gdbmtool} from opening it. + +The lines for @file{odd.dbf} show the actual reason for the error: bad +file mode (read-write). Apparently, the file mode has been changed +manually after the crash. The timestamp of the file, which is more +recent than that of @file{even.dbf}, suggests that it might be used for +recovery. To confirm this guess, change the mode of the +@file{even.dbf} to read-only and repeat the @command{snapshot} command: + +@example +@group +gdbmtool> ! chmod 400 even.dbf +gdbmtool> snapshot even.dbf odd.dbf +GDBM_SNAPSHOT_ERR: Error selecting snapshot. +even.dbf: 400 r-------- 1627820627.485681330 300 +odd.dbf: 600 rw------- 1627820627.689503918 301 +odd.dbf: ERROR: bad file mode +@end group +@end example + +This shows the numsync value of the @file{even.dbf} file, which is +exactly one less than that of @file{odd.dbf}. This means that the +latter should be selected for recovery. + +For completeness sake, you can change the mode of @file{odd.dbf} to +read-only as well and repeat the @command{snapshot} command. In this +case you will see: + +@example +@group +gdbmtool> ! chmod 400 odd.dbf +gdbmtool> snapshot even.dbf odd.dbf +GDBM_SNAPSHOT_OK: Selected the most recent snapshot. +odd.dbf: 400 r-------- 1627820627.689503918 301 +@end group +@end example + +@node Performance Impact +@section Performance Impact + +The purpose of a parachute is not to hasten descent. Crash tolerance +is a safety mechanism, not a performance accelerator. Reflink +copying is designed to be as efficient as possible, but making +snapshots of the GNU dbm database file on every @code{gdbm_sync} call +entails overheads. The performance impact of GDBM crash tolerance +will depend on many factors including the type and configuration of +the underlying storage system, how often the application calls +@code{gdbm_sync}, and the extent of changes to the database file +between consecutive calls to @code{gdbm_sync}. + +@node Availability +@section Availability + +To ensure that application data can survive the failure of one or +more storage devices, replicated storage (e.g., RAID) may be used +beneath the reflink-capable filesystem. Some cloud providers offer +block storage services that mimic the interface of individual storage +devices but that are implemented as high-availability fault-tolerant +replicated distributed storage systems. Installing a reflink-capable +filesystem atop a high-availability storage system is a good starting +point for a high-availability crash-tolerant GDBM. + +@node Numsync +@section Numsync Extension + +In @ref{Crash recovery}, we have shown that for database recovery, +one should select the snapshot whose permission bits are read-only and +whose last-modification timestamp is greatest. However, there may be +cases when a crash occurs at such a time that both snapshot files +remain readable. It may also happen, that their permissions had +been reset to read-only and/or modification times inadvertently +changed before recovery. To make it possible to select the right +snapshot in such cases, a new @dfn{extended database format} was +introduced in @command{GDBM} version 1.21. This format adds to the +database header the @code{numsync} field, which holds the number of +synchronizations the database underwent before being closed or +abandoned due to a crash. + +A readable snapshot is a consistent copy of the database at a given point of +time. Thus, if both snapshots of a database in extended format are +readable, it will suffice to examine their @code{numsync} counters +and select the one whose @code{numsync} is greater. That's what +the @code{gdbm_latest_snapshot} function does in this case. + +It is worth noticing, that the two counters should differ exactly by +one. If the difference is greater than that, @code{gdbm_latest_snapshot} +will return a special status code, @code{GDBM_SNAPSHOT_SUSPICIOUS}. +If, during a recovery attempt, you get this status code, we recommend +to proceed with the manual recovery (@pxref{Manual crash recovery}). + +To create a database in extended format, call @code{gdbm_open} with +both @code{GDBM_NEWDB} and @code{GDBM_NUMSYNC} flags: + +@example +dbf = gdbm_open(dbfile, 0, GDBM_NEWDB|GDBM_NUMSYNC, 0600, NULL); +@end example + +@noindent +Notice, that this flag must always be used together with +@code{GDBM_NEWDB} (@pxref{Open}). It is silently ignored when used +together with another opening flag. + +A standard @command{GDBM} database can be converted to the extended +format and vice versa. To convert an existing database to the +extended format, use the @code{gdbm_convert} function (@pxref{Database +format}): + +@example + rc = gdbm_convert(dbf, GDBM_NUMSYNC); +@end example + +You can do the same using the @command{gdbmtool} utility +(@pxref{commands, upgrade}): + +@example +gdbmtool @var{dbname} upgrade +@end example + +To convert a database from extended format back to the standard +@command{GDBM} format, do: + +@example + rc = gdbm_convert(dbf, 0); +@end example + +To do the same from the command line, run: + +@example +gdbmtool @var{dbname} downgrade +@end example + +@node Crash Tolerance API +@section Crash Tolerance API + +@deftypefn {gdbm interface} int gdbm_failure_atomic (GDBM_FILE @var{dbf}, @ + const char *@var{even}, const char *@var{odd}) +Enables crash tolerance for the database file @var{dbf}. The +@var{even} and @var{odd} arguments are the pathnames of two files that +will be created and filled with snapshots of the database file. +These two files must not exist when @code{gdbm_failure_atomic} is +called and must reside on the same reflink-capable filesystem as the +database file. + +Returns 0 on success. On failure, returns -1 and sets +@code{gdbm_errno} to one of the following values: + +@table @code +@item GDBM_ERR_USAGE +Improper function usage. Either @var{even} or @var{odd} is +@code{NULL}, or they point to the same string. + +@item GDBM_NEED_RECOVERY +The database needs recovery. @xref{Recovery}. + +@item GDBM_ERR_SNAPSHOT_CLONE +Failed to clone the database file into a snapshot. Examine the system +@code{errno} variable for details. +@end table + +If one of the following error codes is returned, examine the system +@code{errno} variable for details: + +@table @code +@item GDBM_ERR_REALPATH +Call to @code{realpath} function failed. @code{realpath} is used to +determine actual path names of the snapshot files. + +@item GDBM_FILE_OPEN_ERROR +Unable to create snapshot file. + +@item GDBM_FILE_SYNC_ERROR +Failed to sync a snapshot file or one of directories in its pathname, +during initial synchronization. + +@item GDBM_FILE_CLOSE_ERROR +Failed to close a snapshot file or one of directories in its pathname, +during initial synchronization. + +@item GDBM_ERR_FILE_MODE +The @code{fchmod} call on one of the snapshot files failed. +@end table + +Notes: + +@itemize @bullet +@item It is not an error to call @code{gdbm_failure_atomic} several times. +Each subsequent call closes the previously configured snapshot files +and installs new ones instead. + +@item Crash tolerance settings are cleared by functions +@code{gdbm_recover} (@pxref{Recovery}) and @code{gdbm_reorganize} +(@pxref{Reorganization}). In case of @code{gdbm_recover}, it should +not be a problem, because if you enabled crash tolerance, the +procedure described in @ref{Crash recovery} is the preferred way of +recovering the database. If, however, you decided to call either +function even though you had enabled crash tolerance previously, be +sure to call @code{gdbm_failure_atomic} again with the same arguments +as before (provided that the call returns successfully). +@end itemize +@end deftypefn + +@deftypefn {gdbm interface} int gdbm_latest_snapshot (const char *@var{even}, @ + const char *@var{odd}, const char **@var{retval}) +@kwindex GDBM_SNAPSHOT_OK +@kwindex GDBM_SNAPSHOT_BAD +@kwindex GDBM_SNAPSHOT_ERR +@kwindex GDBM_SNAPSHOT_SAME +Selects between two snapshots, @var{even} and @var{odd}, the one to be +used for crash recovery. On success, stores a pointer to the selected +filename in the memory location pointed to by @var{retval} and returns +@code{GDBM_SNAPSHOT_OK}. If neither snapshot file is usable, the +function returns @code{GDBM_SNAPSHOT_BAD}. If a system error occurs, it +returns @code{GDBM_SNAPSHOT_ERR} and sets @code{errno} to the error code +describing the problem. Finally, in the unlikely case that it cannot +select between the two snapshots (this means they are both readable +and have exactly the same @code{mtime} timestamp), the function returns +@code{GDBM_SNAPSHOT_SAME}. + +@kwindex GDBM_SNAPSHOT_SUSPICIOUS +If the @samp{numsync} extension is enabled (@pxref{Numsync}), the +function can also return the @code{GDBM_SNAPSHOT_SUSPICIOUS} status +code. This happens when the @code{numsync} counters in the two +snapshots differ by more than one. + +@xref{Crash recovery}, for a detailed description of possible return +codes and their interpretation. + +If any value other than @code{GDBM_SNAPSHOT_OK} is returned, it is +guaranteed that the function did not touch @var{retval}. In this case +it is recommended to switch to manual recovery procedure, letting the +user examine the snapshots and take the appropriate action. +@pxref{Manual crash recovery}, for details. +@end deftypefn + @node Options @chapter Setting options @cindex database options @@ -1515,7 +2218,7 @@ Return the block size in bytes. The @var{value} should point to @code{int}. @end table @node Locking -@chapter File Locking. +@chapter File Locking @cindex locking @kwindex GDBM_NOLOCK @@ -1533,7 +2236,7 @@ calls. @end deftypefn @node Variables -@chapter Useful global variables. +@chapter Useful global variables The following global variables and constants are available: @@ -1646,157 +2349,168 @@ needing recovery (@pxref{Recovery}) and return -1. This chapter summarizes error codes which can be set by the functions in @code{gdbm} library. -@table @asis -@kwindex GDBM_NO_ERROR -@item GDBM_NO_ERROR +@defvr {Error Code} GDBM_NO_ERROR No error occurred. +@end defvr -@kwindex GDBM_MALLOC_ERROR -@item GDBM_MALLOC_ERROR +@defvr {Error Code} GDBM_MALLOC_ERROR Memory allocation failed. Not enough memory. +@end defvr -@kwindex GDBM_BLOCK_SIZE_ERROR +@defvr {Error Code} GDBM_BLOCK_SIZE_ERROR @kwindex GDBM_BSEXACT -@item GDBM_BLOCK_SIZE_ERROR This error is set by the @code{gdbm_open} function (@pxref{Open}), if the value of its @var{block_size} argument is incorrect and the @code{GDBM_BSEXACT} flag is set. +@end defvr -@kwindex GDBM_FILE_OPEN_ERROR -@item GDBM_FILE_OPEN_ERROR +@defvr {Error Code} GDBM_FILE_OPEN_ERROR The library was not able to open a disk file. This can be set by @code{gdbm_open} (@pxref{Open}), @code{gdbm_export} and @code{gdbm_import} functions (@pxref{Flat files}). Inspect the value of the system @code{errno} variable to get more detailed diagnostics. +@end defvr -@kwindex GDBM_FILE_WRITE_ERROR -@item GDBM_FILE_