diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-05-25 14:17:11 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-05-25 15:00:01 +0300 |
commit | 655cd193549e20ea8a8e77125adec7c5909c067e (patch) | |
tree | d59f997331bfcec8c0c17afb7bc8438215f09102 | |
parent | 8d2f483b28f8418703982658b3e7dda7a96ad335 (diff) | |
download | gdbm-655cd193549e20ea8a8e77125adec7c5909c067e.tar.gz gdbm-655cd193549e20ea8a8e77125adec7c5909c067e.tar.bz2 |
More database consistency checks
* NEWS: Update.
* THANKS: Update.
* src/bucket.c (_gdbm_get_bucket): Check if directory entry is
valid. Don't cache invalid buckets.
* src/gdbm.h.in (GDBM_BAD_DIR_ENTRY): New error code.
* src/gdbmerrno.c: Likewise.
* src/gdbmopen.c (validate_header): Compute expected
number of bucket elements based on the bucket size, not on
the block size.
(_gdbm_init_cache_entry): New function.
* src/proto.h (_gdbm_init_cache_entry): New proto.
* src/recover.c (gdbm_recover): Clear error state after return
from check_db indicating failure.
-rw-r--r-- | NEWS | 6 | ||||
-rw-r--r-- | THANKS | 2 | ||||
-rw-r--r-- | src/bucket.c | 67 | ||||
-rw-r--r-- | src/gdbm.h.in | 3 | ||||
-rw-r--r-- | src/gdbmdefs.h | 2 | ||||
-rw-r--r-- | src/gdbmerrno.c | 3 | ||||
-rw-r--r-- | src/gdbmopen.c | 25 | ||||
-rw-r--r-- | src/gdbmtool.c | 2 | ||||
-rw-r--r-- | src/mmap.c | 1 | ||||
-rw-r--r-- | src/proto.h | 1 | ||||
-rw-r--r-- | src/recover.c | 1 |
11 files changed, 79 insertions, 34 deletions
@@ -1,18 +1,21 @@ -GNU dbm NEWS -- history of user-visible changes. 2018-05-24 +GNU dbm NEWS -- history of user-visible changes. 2018-05-25 Copyright (C) 1990-2018 Free Software Foundation, Inc. See the end of file for copying conditions. Please send gdbm bug reports to <bug-gdbm@gnu.org>. Version 1.14.90 FIXME: BUMP VI_MAJOR * Implement database consistency checks +Special thanks to Lionel Debroux and Craig Young for investing +their time and efforts in testing and providing valuable feedback. + * Improved error checking * Removed gdbm-1.8.3 compatibility layer * Commands can be given to gdbmtool in the command line @@ -37,12 +40,13 @@ recovered_keys - duplicate_keys. * New error codes. GDBM_BAD_BUCKET "Malformed bucket header" GDBM_BAD_HEADER "Malformed database file header" GDBM_BAD_AVAIL "Malformed avail_block" GDBM_BAD_HASH_TABLE "Malformed hash table" + GDBM_BAD_DIR_ENTRY "Invalid directory entry" Version 1.14.1 - 2018-01-03 * Increment soname current version number. @@ -2,10 +2,12 @@ GDBM THANKS file. Many people further contributed to GDBM by reporting problems, suggesting various improvements or submitting actual code. Here is a list of these people. Help us keep it complete and exempt of errors. Bill Jones <rj7252@att.com> +Craig Young <csy@ecraig.com> Jakub Bogusz <qboosh@pld-linux.org> +Lionel Debroux <lionel_debroux@yahoo.fr> Matthew Burgess <matthew@linuxfromscratch.org> Tanaka Akira <akr@fsij.org> Thomas Klausner <tk@giga.or.at> diff --git a/src/bucket.c b/src/bucket.c index ce1915a..1d961e2 100644 --- a/src/bucket.c +++ b/src/bucket.c @@ -39,26 +39,49 @@ _gdbm_new_bucket (GDBM_FILE dbf, hash_bucket *bucket, int bits) /* Initialize all bucket elements. */ for (index = 0; index < dbf->header->bucket_elems; index++) bucket->h_table[index].hash_value = -1; } +/* Return true if the directory entry at DIR_INDEX can be considered + valid. This means that DIR_INDEX is in the valid range for addressing + the dir array, and the offset stored in dir[DIR_INDEX] points past + first two blocks in file. This does not necessarily mean that there's + a valid bucket or data block at that offset. All this implies is that + it is safe to use the offset for look up in the bucket cache and to + attempt to read a block at that offset. */ +int +gdbm_dir_entry_valid_p (GDBM_FILE dbf, int dir_index) +{ + return dir_index >= 0 + && dir_index < GDBM_DIR_COUNT (dbf) + && dbf->dir[dir_index] >= 2*dbf->header->block_size; +} + /* Find a bucket for DBF that is pointed to by the bucket directory from location DIR_INDEX. The bucket cache is first checked to see if it is already in memory. If not, a bucket may be tossed to read the new - bucket. In any case, the requested bucket becomes the "current" bucket - and dbf->bucket points to the correct bucket. */ + bucket. On success, the requested bucket becomes the "current" bucket + and dbf->bucket points to the correct bucket. On error, the current + bucket remains unchanged. */ int _gdbm_get_bucket (GDBM_FILE dbf, int dir_index) { int rc; off_t bucket_adr; /* The address of the correct hash bucket. */ off_t file_pos; /* The return address for lseek. */ int index; /* Loop index. */ + if (!gdbm_dir_entry_valid_p (dbf, dir_index)) + { + /* FIXME: negative caching? */ + GDBM_SET_ERRNO (dbf, GDBM_BAD_DIR_ENTRY, TRUE); + return -1; + } + /* Initial set up. */ dbf->bucket_dir = dir_index; bucket_adr = dbf->dir[dir_index]; if (dbf->bucket_cache == NULL) { @@ -66,51 +89,53 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index) { _gdbm_fatal (dbf, _("couldn't init cache")); return -1; } } - /* Is that one is not already current, we must find it. */ + /* If that one is not already current, we must find it. */ if (dbf->cache_entry->ca_adr != bucket_adr) { + size_t lru; + /* Look in the cache. */ for (index = 0; index < dbf->cache_size; index++) { if (dbf->bucket_cache[index].ca_adr == bucket_adr) { dbf->bucket = dbf->bucket_cache[index].ca_bucket; dbf->cache_entry = &dbf->bucket_cache[index]; return 0; } } /* It is not in the cache, read it from the disk. */ - dbf->last_read = (dbf->last_read + 1) % dbf->cache_size; - if (dbf->bucket_cache[dbf->last_read].ca_changed) - { - if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read])) - return -1; - } - dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr; - dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket; - dbf->cache_entry = &dbf->bucket_cache[dbf->last_read]; - dbf->cache_entry->ca_data.elem_loc = -1; - dbf->cache_entry->ca_changed = FALSE; - /* Read the bucket. */ + /* Position the file pointer */ file_pos = GDBM_DEBUG_OVERRIDE ("_gdbm_get_bucket:seek-failure", __lseek (dbf, bucket_adr, SEEK_SET)); if (file_pos != bucket_adr) { - _gdbm_fatal (dbf, _("lseek error")); GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE); + _gdbm_fatal (dbf, _("lseek error")); return -1; } + /* Flush and drop the last recently used cache entry */ + lru = (dbf->last_read + 1) % dbf->cache_size; + if (dbf->bucket_cache[lru].ca_changed) + { + if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[lru])) + return -1; + } + _gdbm_init_cache_entry (dbf, lru); + + /* Read the bucket. */ rc = GDBM_DEBUG_OVERRIDE ("_gdbm_get_bucket:read-failure", - _gdbm_full_read (dbf, dbf->bucket, dbf->header->bucket_size)); + _gdbm_full_read (dbf, dbf->bucket_cache[lru].ca_bucket, + dbf->header->bucket_size)); if (rc) { GDBM_DEBUG (GDBM_DEBUG_ERR, "%s: error reading bucket: %s", dbf->name, gdbm_db_strerror (dbf)); dbf->need_recovery = TRUE; @@ -123,14 +148,20 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index) && dbf->bucket->count <= dbf->header->bucket_elems && dbf->bucket->av_count <= dbf->header->bucket_elems)) { GDBM_SET_ERRNO (dbf, GDBM_BAD_BUCKET, TRUE); return -1; } + /* Finally, store it in cache */ + dbf->last_read = lru; + dbf->bucket_cache[lru].ca_adr = bucket_adr; + dbf->bucket = dbf->bucket_cache[lru].ca_bucket; + dbf->cache_entry = &dbf->bucket_cache[lru]; + dbf->cache_entry->ca_data.elem_loc = -1; + dbf->cache_entry->ca_changed = FALSE; } - return 0; } int _gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket, size_t size) diff --git a/src/gdbm.h.in b/src/gdbm.h.in index e576c69..e6bdc58 100644 --- a/src/gdbm.h.in +++ b/src/gdbm.h.in @@ -221,15 +221,16 @@ extern int gdbm_copy_meta (GDBM_FILE dst, GDBM_FILE src); # define GDBM_BACKUP_FAILED 30 # define GDBM_DIR_OVERFLOW 31 # define GDBM_BAD_BUCKET 32 # define GDBM_BAD_HEADER 33 # define GDBM_BAD_AVAIL 34 # define GDBM_BAD_HASH_TABLE 35 +# define GDBM_BAD_DIR_ENTRY 36 # define _GDBM_MIN_ERRNO 0 -# define _GDBM_MAX_ERRNO GDBM_BAD_HASH_TABLE +# define _GDBM_MAX_ERRNO GDBM_BAD_DIR_ENTRY /* This one was never used and will be removed in the future */ # define GDBM_UNKNOWN_UPDATE GDBM_UNKNOWN_ERROR typedef int gdbm_error; extern int *gdbm_errno_location (void); diff --git a/src/gdbmdefs.h b/src/gdbmdefs.h index 37a5637..a53f637 100644 --- a/src/gdbmdefs.h +++ b/src/gdbmdefs.h @@ -222,13 +222,13 @@ struct gdbm_file_info ACM Trans on Database Systems, Vol 4, No 3. Sept 1979, 315-344 */ off_t *dir; /* The bucket cache. */ cache_elem *bucket_cache; size_t cache_size; - int last_read; + size_t last_read; /* Points to the current hash bucket in the cache. */ hash_bucket *bucket; /* The directory entry used to get the current hash bucket. */ int bucket_dir; diff --git a/src/gdbmerrno.c b/src/gdbmerrno.c index 52cfe30..8e24b82 100644 --- a/src/gdbmerrno.c +++ b/src/gdbmerrno.c @@ -132,13 +132,14 @@ const char * const gdbm_errlist[_GDBM_MAX_ERRNO+1] = { [GDBM_NEED_RECOVERY] = N_("Database needs recovery"), [GDBM_BACKUP_FAILED] = N_("Failed to create backup copy"), [GDBM_DIR_OVERFLOW] = N_("Bucket directory overflow"), [GDBM_BAD_BUCKET] = N_("Malformed bucket header"), [GDBM_BAD_HEADER] = N_("Malformed database file header"), [GDBM_BAD_AVAIL] = N_("Malformed avail_block"), - [GDBM_BAD_HASH_TABLE] = N_("Malformed hash table") + [GDBM_BAD_HASH_TABLE] = N_("Malformed hash table"), + [GDBM_BAD_DIR_ENTRY] = N_("Invalid directory entry") }; const char * gdbm_strerror (gdbm_error error) { if (error < _GDBM_MIN_ERRNO || error > _GDBM_MAX_ERRNO) diff --git a/src/gdbmopen.c b/src/gdbmopen.c index 52b2739..974e9e2 100644 --- a/src/gdbmopen.c +++ b/src/gdbmopen.c @@ -47,16 +47,15 @@ compute_directory_size (blksize_t block_size, *ret_dir_size = dir_size; *ret_dir_bits = dir_bits; } static inline int -bucket_element_count (gdbm_file_header const *hdr) +bucket_element_count (size_t bucket_size) { - return (hdr->block_size - sizeof (hash_bucket)) - / sizeof (bucket_element) + 1; + return (bucket_size - sizeof (hash_bucket)) / sizeof (bucket_element) + 1; } int gdbm_avail_table_valid_p (GDBM_FILE dbf, avail_block const *av) { return gdbm_avail_block_valid_p (av) && av->size <= dbf->header->avail.size; @@ -112,13 +111,13 @@ validate_header (gdbm_file_header const *hdr, struct stat const *st) if (hdr->dir_bits != dir_bits) return GDBM_BAD_HEADER; if (!(hdr->bucket_size > 0 && hdr->bucket_size > sizeof(hash_bucket))) return GDBM_BAD_HEADER; - if (hdr->bucket_elems != bucket_element_count (hdr)) + if (hdr->bucket_elems != bucket_element_count (hdr->bucket_size)) return GDBM_BAD_HEADER; /* Validate the avail block */ if (!gdbm_avail_block_valid_p (&hdr->avail)) return GDBM_BAD_HEADER; @@ -322,13 +321,13 @@ gdbm_fd_open (int fd, const char *file_name, int block_size, GDBM_SET_ERRNO2 (NULL, GDBM_MALLOC_ERROR, FALSE, GDBM_DEBUG_OPEN); return NULL; } dbf->header->dir = dbf->header->block_size; /* Create the first and only hash bucket. */ - dbf->header->bucket_elems = bucket_element_count (dbf->header); + dbf->header->bucket_elems = bucket_element_count (dbf->header->block_size); dbf->header->bucket_size = dbf->header->block_size; dbf->bucket = calloc (1, dbf->header->bucket_size); if (dbf->bucket == NULL) { if (!(flags & GDBM_CLOERROR)) dbf->desc = -1; @@ -604,17 +603,23 @@ _gdbm_init_cache (GDBM_FILE dbf, size_t size) malloc (dbf->header->bucket_size)); if ((dbf->bucket_cache[index]).ca_bucket == NULL) { GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, TRUE); return -1; } - (dbf->bucket_cache[index]).ca_adr = 0; - (dbf->bucket_cache[index]).ca_changed = FALSE; - (dbf->bucket_cache[index]).ca_data.hash_val = -1; - (dbf->bucket_cache[index]).ca_data.elem_loc = -1; - (dbf->bucket_cache[index]).ca_data.dptr = NULL; + _gdbm_init_cache_entry (dbf, index); } dbf->bucket = dbf->bucket_cache[0].ca_bucket; dbf->cache_entry = &dbf->bucket_cache[0]; } return 0; } + +void +_gdbm_init_cache_entry (GDBM_FILE dbf, int index) +{ + dbf->bucket_cache[index].ca_adr = 0; + dbf->bucket_cache[index].ca_changed = FALSE; + dbf->bucket_cache[index].ca_data.hash_val = -1; + dbf->bucket_cache[index].ca_data.elem_loc = -1; + dbf->bucket_cache[index].ca_data.dptr = NULL; +} diff --git a/src/gdbmtool.c b/src/gdbmtool.c index b3055a6..06cd593 100644 --- a/src/gdbmtool.c +++ b/src/gdbmtool.c @@ -535,13 +535,13 @@ nextkey_handler (struct handler_param *param) void reorganize_handler (struct handler_param *param GDBM_ARG_UNUSED) { if (gdbm_reorganize (gdbm_file)) terror ("%s", _("Reorganization failed.")); else - fprintf (param->fp, _("Reorganization succeeded.")); + fprintf (param->fp, "%s\n", _("Reorganization succeeded.")); } static void err_printer (void *data GDBM_ARG_UNUSED, char const *fmt, ...) { va_list ap; @@ -220,13 +220,12 @@ _gdbm_mapped_remap (GDBM_FILE dbf, off_t size, int flag) if (_gdbm_file_extend (dbf, size)) return -1; file_size = size; } else { - size = file_size; return 0; } } } else { diff --git a/src/proto.h b/src/proto.h index 46cd716..114221e 100644 --- a/src/proto.h +++ b/src/proto.h @@ -45,12 +45,13 @@ int _gdbm_bucket_dir (GDBM_FILE dbf, int hash); /* From update.c */ int _gdbm_end_update (GDBM_FILE); void _gdbm_fatal (GDBM_FILE, const char *); /* From gdbmopen.c */ int _gdbm_init_cache (GDBM_FILE, size_t); +void _gdbm_init_cache_entry (GDBM_FILE, int); /* From mmap.c */ int _gdbm_mapped_init (GDBM_FILE); void _gdbm_mapped_unmap (GDBM_FILE); ssize_t _gdbm_mapped_read (GDBM_FILE, void *, size_t); ssize_t _gdbm_mapped_write (GDBM_FILE, void *, size_t); diff --git a/src/recover.c b/src/recover.c index 721c23f..8f05f71 100644 --- a/src/recover.c +++ b/src/recover.c @@ -374,12 +374,13 @@ gdbm_recover (GDBM_FILE dbf, gdbm_recovery *rcvr, int flags) rcvr->duplicate_keys = 0; rcvr->backup_name = NULL; rc = 0; if ((flags & GDBM_RCVR_FORCE) || check_db (dbf)) { + gdbm_clear_error (dbf); len = strlen (dbf->name); new_name = malloc (len + sizeof (TMPSUF)); if (!new_name) { GDBM_SET_ERRNO (NULL, GDBM_MALLOC_ERROR, FALSE); return -1; |