From 371bb85fe378ffd0ed6ddc81985d450cef5835a3 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Thu, 24 May 2018 11:35:24 +0300 Subject: More error checking; improve gdbm_recover * Makefile.am (set-dist-date): New rule (dist-hook): Catch FIXMEs in NEWS. * NEWS: Updated. * src/findkey.c (gdbm_bucket_element_valid_p): New function. (_gdbm_read_entry): Validate the retrieved bucket element. * src/gdbm.h.in (gdbm_recovery): New member: duplicate_keys. (GDBM_BAD_HASH_TABLE): New error code. * src/gdbmdefs.h (TYPE_WIDTH,SIGNED_TYPE_MAXIMUM) (OFF_T_MAX): New defines. (off_t_sum_ok): New function. (gdbm_bucket_element_valid_p): New prototype. * src/gdbmerrno.c: Support for GDBM_BAD_HASH_TABLE code. * src/gdbmtool.c (recover_handler): Fix argument counting. New argument 'summary' prints statistics summary at the end of the run. (export_handler,import_handler): Fix argument counting. * src/mmap.c (SUM_FILE_SIZE): Rewrite as inlined function. Add error checking. (_gdbm_mapped_remap): More error checking. * src/recover.c (run_recovery): Don't bail out on GDBM_CANNOT_REPLACE. (gdbm_recover): Initialize duplicate_keys * src/systems.h: Include limits.h --- Makefile.am | 16 +++++++++++++++- NEWS | 31 ++++++++++++++++++++++++++++++- src/findkey.c | 24 ++++++++++++++++++++++-- src/gdbm.h.in | 4 +++- src/gdbmdefs.h | 17 +++++++++++++++++ src/gdbmerrno.c | 3 ++- src/gdbmtool.c | 27 +++++++++++++++++++++++---- src/mmap.c | 22 ++++++++++++++++++++-- src/recover.c | 32 ++++++++++++++++++++++++-------- src/systems.h | 1 + 10 files changed, 157 insertions(+), 20 deletions(-) diff --git a/Makefile.am b/Makefile.am index 6e00dec..ecc03df 100644 --- a/Makefile.am +++ b/Makefile.am @@ -24,12 +24,26 @@ SUBDIRS = po src doc $(MAYBE_COMPAT) tests AM_DISTCHECK_CONFIGURE_FLAGS = --enable-libgdbm-compat -dist-hook: +.PHONY: set-dist-date +set-dist-date: rm -f $(distdir)/src/version.c; \ d=`date '+%d/%m/%Y'`; \ sed 's|/\*@DIST_DATE@\*/|"'"$$d"'"|' $(srcdir)/src/version.c > \ $(distdir)/src/version.c +dist-hook: ChangeLog set-dist-date + @if test -f ChangeLog && test -f NEWS; then \ + PATCHLEV=`echo "$(PACKAGE_VERSION)" | sed -r "s/[0-9]+\.[0-9]+\.?//"`;\ + if test $${PATCHLEV:-0} -lt 50; then \ + if grep -q FIXME NEWS; then \ + echo >&2 "*** NEWS file contains FIXMEs"; \ + echo >&2 "*** Aborting"; \ + exit 1; \ + fi; \ + fi; \ + fi + + gen_start_date = 2016-07-08 prev_change_log = ChangeLog.cvs diff --git a/NEWS b/NEWS index e004eec..24180d8 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -GNU dbm NEWS -- history of user-visible changes. 2018-05-19 +GNU dbm NEWS -- history of user-visible changes. 2018-05-24 Copyright (C) 1990-2018 Free Software Foundation, Inc. See the end of file for copying conditions. @@ -6,12 +6,41 @@ Please send gdbm bug reports to . Version 1.14.90 +FIXME: BUMP VI_MAJOR + * Implement database consistency checks * Improved error checking * Removed gdbm-1.8.3 compatibility layer +* Commands can be given to gdbmtool in the command line + +The syntax is: + + gdbmtool DBNAME COMMAND [ARGS...] + +Multiple commands are separated by semicolon (take care to escape it), +e.g.: + + gdbmtool t.db count\; avail + +* Fixed data conversion bugs in storing structured keys or content + +* New member in the gdbm_recovery structure: duplicate_keys. + +Upon return from gdbm_recover, this member holds the number of keys +that were not recovered, because the same key has already been stored +in the database. The actual number of stored keys is thus +recovered_keys - duplicate_keys. + +* New error codes. + + GDBM_BAD_BUCKET "Malformed bucket header" + GDBM_BAD_HEADER "Malformed database file header" + GDBM_BAD_AVAIL "Malformed avail_block" + GDBM_BAD_HASH_TABLE "Malformed hash table" + Version 1.14.1 - 2018-01-03 diff --git a/src/findkey.c b/src/findkey.c index 7638b04..bd9fd83 100644 --- a/src/findkey.c +++ b/src/findkey.c @@ -22,7 +22,21 @@ #include "gdbmdefs.h" - +int +gdbm_bucket_element_valid_p (GDBM_FILE dbf, int elem_loc) +{ + return + elem_loc < dbf->header->bucket_elems + && dbf->bucket->h_table[elem_loc].hash_value != -1 + && dbf->bucket->h_table[elem_loc].key_size >= 0 + && off_t_sum_ok (dbf->bucket->h_table[elem_loc].data_pointer, + dbf->bucket->h_table[elem_loc].key_size) + && dbf->bucket->h_table[elem_loc].data_size >= 0 + && off_t_sum_ok (dbf->bucket->h_table[elem_loc].data_pointer + + dbf->bucket->h_table[elem_loc].key_size, + dbf->bucket->h_table[elem_loc].data_size); +} + /* Read the data found in bucket entry ELEM_LOC in file DBF and return a pointer to it. Also, cache the read value. */ @@ -34,11 +48,17 @@ _gdbm_read_entry (GDBM_FILE dbf, int elem_loc) int data_size; off_t file_pos; data_cache_elem *data_ca; - + /* Is it already in the cache? */ if (dbf->cache_entry->ca_data.elem_loc == elem_loc) return dbf->cache_entry->ca_data.dptr; + if (!gdbm_bucket_element_valid_p (dbf, elem_loc)) + { + GDBM_SET_ERRNO (dbf, GDBM_BAD_HASH_TABLE, TRUE); + return NULL; + } + /* Set sizes and pointers. */ key_size = dbf->bucket->h_table[elem_loc].key_size; data_size = dbf->bucket->h_table[elem_loc].data_size; diff --git a/src/gdbm.h.in b/src/gdbm.h.in index 61d5707..e576c69 100644 --- a/src/gdbm.h.in +++ b/src/gdbm.h.in @@ -152,6 +152,7 @@ typedef struct gdbm_recovery_s size_t recovered_buckets; size_t failed_keys; size_t failed_buckets; + size_t duplicate_keys; char *backup_name; } gdbm_recovery; @@ -222,9 +223,10 @@ extern int gdbm_copy_meta (GDBM_FILE dst, GDBM_FILE src); # define GDBM_BAD_BUCKET 32 # define GDBM_BAD_HEADER 33 # define GDBM_BAD_AVAIL 34 +# define GDBM_BAD_HASH_TABLE 35 # define _GDBM_MIN_ERRNO 0 -# define _GDBM_MAX_ERRNO GDBM_BAD_AVAIL +# define _GDBM_MAX_ERRNO GDBM_BAD_HASH_TABLE /* This one was never used and will be removed in the future */ # define GDBM_UNKNOWN_UPDATE GDBM_UNKNOWN_ERROR diff --git a/src/gdbmdefs.h b/src/gdbmdefs.h index 5305b0d..1bb519b 100644 --- a/src/gdbmdefs.h +++ b/src/gdbmdefs.h @@ -26,6 +26,22 @@ #define _(s) gettext (s) #define N_(s) s +/* The width in bits of the integer type or expression T. */ +#define TYPE_WIDTH(t) (sizeof (t) * CHAR_BIT) + +#define SIGNED_TYPE_MAXIMUM(t) \ + ((t) ((((t) 1 << (TYPE_WIDTH (t) - 2)) - 1) * 2 + 1)) + +/* Maximum value for off_t */ +#define OFF_T_MAX SIGNED_TYPE_MAXIMUM (off_t) + +/* Return true if A can be added to B without integer overflow */ +static inline off_t +off_t_sum_ok (off_t a, off_t b) +{ + return OFF_T_MAX - a >= b; +} + /* The type definitions are next. */ /* The available file space is stored in an "avail" table. The one with @@ -93,6 +109,7 @@ typedef struct int data_size; /* Size of associated data in the file. */ } bucket_element; +extern int gdbm_bucket_element_valid_p (GDBM_FILE dbf, int elem_loc); /* A bucket is a small hash table. This one consists of a number of bucket elements plus some bookkeeping fields. The number of elements diff --git a/src/gdbmerrno.c b/src/gdbmerrno.c index 896bf70..52cfe30 100644 --- a/src/gdbmerrno.c +++ b/src/gdbmerrno.c @@ -134,7 +134,8 @@ const char * const gdbm_errlist[_GDBM_MAX_ERRNO+1] = { [GDBM_DIR_OVERFLOW] = N_("Bucket directory overflow"), [GDBM_BAD_BUCKET] = N_("Malformed bucket header"), [GDBM_BAD_HEADER] = N_("Malformed database file header"), - [GDBM_BAD_AVAIL] = N_("Malforemd avail_block") + [GDBM_BAD_AVAIL] = N_("Malformed avail_block"), + [GDBM_BAD_HASH_TABLE] = N_("Malformed hash table") }; const char * diff --git a/src/gdbmtool.c b/src/gdbmtool.c index 33bdf93..9c6eebe 100644 --- a/src/gdbmtool.c +++ b/src/gdbmtool.c @@ -552,7 +552,7 @@ err_printer (void *data GDBM_ARG_UNUSED, char const *fmt, ...) fprintf (stderr, "\n"); } -/* recover verbose backup max-failed-keys=N max-failed-buckets=N max-failures=N */ +/* recover sumamry verbose backup max-failed-keys=N max-failed-buckets=N max-failures=N */ void recover_handler (struct handler_param *param) { @@ -561,8 +561,9 @@ recover_handler (struct handler_param *param) int rc; int i; char *p; + int summary = 0; - for (i = 1; i < param->argc; i++) + for (i = 0; i < param->argc; i++) { char *arg = PARAM_STRING (param, i); if (strcmp (arg, "verbose") == 0) @@ -570,6 +571,10 @@ recover_handler (struct handler_param *param) rcvr.errfun = err_printer; flags |= GDBM_RCVR_ERRFUN; } + else if (strcmp (arg, "summary") == 0) + { + summary = 1; + } else if (strcmp (arg, "backup") == 0) { rcvr.errfun = err_printer; @@ -617,6 +622,19 @@ recover_handler (struct handler_param *param) if (rc == 0) { fprintf (param->fp, _("Recovery succeeded.\n")); + if (summary) + { + fprintf (param->fp, + _("Keys recovered: %lu, failed: %lu, duplicate: %lu\n"), + (unsigned long) rcvr.recovered_keys, + (unsigned long) rcvr.failed_keys, + (unsigned long) rcvr.duplicate_keys); + fprintf (param->fp, + _("Buckets recovered: %lu, failed: %lu\n"), + (unsigned long) rcvr.recovered_buckets, + (unsigned long) rcvr.failed_buckets); + } + if (rcvr.backup_name) { fprintf (param->fp, @@ -925,7 +943,7 @@ export_handler (struct handler_param *param) int i; int filemode; - for (i = 1; i < param->argc; i++) + for (i = 0; i < param->argc; i++) { if (strcmp (PARAM_STRING (param, i), "truncate") == 0) flags = GDBM_NEWDB; @@ -959,7 +977,7 @@ import_handler (struct handler_param *param) int i; int rc; - for (i = 1; i < param->argc; i++) + for (i = 0; i < param->argc; i++) { if (strcmp (PARAM_STRING (param, i), "replace") == 0) flag = GDBM_REPLACE; @@ -1220,6 +1238,7 @@ struct command command_tab[] = { { S(recover), T_CMD, checkdb, recover_handler, NULL, { { "[verbose]", GDBM_ARG_STRING }, + { "[summary]", GDBM_ARG_STRING }, { "[backup]", GDBM_ARG_STRING }, { "[max-failed-keys=N]", GDBM_ARG_STRING }, { "[max-failed-buckets=N]", GDBM_ARG_STRING }, diff --git a/src/mmap.c b/src/mmap.c index 114d8b2..24ede29 100644 --- a/src/mmap.c +++ b/src/mmap.c @@ -44,8 +44,15 @@ # define _GDBM_NEED_REMAP(dbf) \ (!(dbf)->mapped_region || (dbf)->mapped_pos == (dbf)->mapped_size) /* Return the sum of the currently mapped size and DELTA */ -# define SUM_FILE_SIZE(dbf, delta) \ - ((dbf)->mapped_off + (dbf)->mapped_size + (delta)) +static inline off_t +SUM_FILE_SIZE (GDBM_FILE dbf, off_t delta) +{ + if (delta >= 0 + && off_t_sum_ok (dbf->mapped_off, dbf->mapped_size) + && off_t_sum_ok (dbf->mapped_off + dbf->mapped_size, delta)) + return dbf->mapped_off + dbf->mapped_size + delta; + return -1; +} /* Store the size of the GDBM file DBF in *PSIZE. Return 0 on success and -1 on failure. */ @@ -182,6 +189,17 @@ _gdbm_mapped_remap (GDBM_FILE dbf, off_t size, int flag) { off_t file_size, pos; + if (size < 0) + { + errno = EINVAL; + GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE); + return -1; + } + + if (size < dbf->mapped_size) + /* Nothing to do */ + return 0; + if (_gdbm_file_size (dbf, &file_size)) { SAVE_ERRNO (_gdbm_mapped_unmap (dbf)); diff --git a/src/recover.c b/src/recover.c index d6d4ff9..721c23f 100644 --- a/src/recover.c +++ b/src/recover.c @@ -312,15 +312,30 @@ run_recovery (GDBM_FILE dbf, GDBM_FILE new_dbf, gdbm_recovery *rcvr, int flags) if (gdbm_store (new_dbf, key, data, GDBM_INSERT) != 0) { - if (flags & GDBM_RCVR_ERRFUN) - rcvr->errfun (rcvr->data, - _("fatal: can't store element %d:%d (%lu:%d): %s"), - bucket_dir, i, - (unsigned long) dbf->bucket->h_table[i].data_pointer, - dbf->bucket->h_table[i].key_size + switch (gdbm_last_errno (new_dbf)) + { + case GDBM_CANNOT_REPLACE: + rcvr->duplicate_keys++; + if (flags & GDBM_RCVR_ERRFUN) + rcvr->errfun (rcvr->data, + _("ignoring duplicate key %d:%d (%lu:%d)"), + bucket_dir, i, + (unsigned long) dbf->bucket->h_table[i].data_pointer, + dbf->bucket->h_table[i].key_size + + dbf->bucket->h_table[i].data_size); + break; + + default: + if (flags & GDBM_RCVR_ERRFUN) + rcvr->errfun (rcvr->data, + _("fatal: can't store element %d:%d (%lu:%d): %s"), + bucket_dir, i, + (unsigned long) dbf->bucket->h_table[i].data_pointer, + dbf->bucket->h_table[i].key_size + dbf->bucket->h_table[i].data_size, - gdbm_db_strerror (new_dbf)); - return -1; + gdbm_db_strerror (new_dbf)); + return -1; + } } } } @@ -356,6 +371,7 @@ gdbm_recover (GDBM_FILE dbf, gdbm_recovery *rcvr, int flags) rcvr->recovered_buckets = 0; rcvr->failed_keys = 0; rcvr->failed_buckets = 0; + rcvr->duplicate_keys = 0; rcvr->backup_name = NULL; rc = 0; diff --git a/src/systems.h b/src/systems.h index 66955dd..c678573 100644 --- a/src/systems.h +++ b/src/systems.h @@ -33,6 +33,7 @@ #include #include #include +#include #ifndef SEEK_SET # define SEEK_SET 0 -- cgit v1.2.1