From cdef488b4ae700136a2a6227cf25e4393e1fe993 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Wed, 17 Mar 2021 15:51:06 +0200 Subject: New functions for traversing the available space stack * src/Makefile.am: Add avail.c * src/avail.c: New file. * src/gdbm.h.in (gdbm_avail_verify): New proto. * src/gdbmdefs.h (GDBM_HEADER_AVAIL_SIZE): New macro. * src/gdbmopen.c (gdbm_avail_table_valid_p) (gdbm_avail_block_validate) (gdbm_bucket_avail_table_validate): Move to avail.c * src/gdbmtool.c (_gdbm_avail_list_size) (_gdbm_print_avail_list): Rewrite using gdbm_avail_traverse. * src/proto.h (gdbm_avail_traverse): New proto. * src/systems.h: Include stddef.h. --- src/Makefile.am | 1 + src/avail.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/gdbm.h.in | 2 + src/gdbmdefs.h | 3 + src/gdbmopen.c | 85 +---------------- src/gdbmtool.c | 104 +++++++------------- src/proto.h | 11 ++- src/systems.h | 1 + 8 files changed, 338 insertions(+), 158 deletions(-) create mode 100644 src/avail.c diff --git a/src/Makefile.am b/src/Makefile.am index 25b0a1f..0d10f23 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -59,6 +59,7 @@ libgdbm_la_SOURCES = \ gdbmsetopt.c\ gdbmstore.c\ gdbmsync.c\ + avail.c\ base64.c\ bucket.c\ falloc.c\ diff --git a/src/avail.c b/src/avail.c new file mode 100644 index 0000000..50c3a44 --- /dev/null +++ b/src/avail.c @@ -0,0 +1,289 @@ +/* avail.c - avail block and stack handling functions. */ + +/* This file is part of GDBM, the GNU data base manager. + Copyright (C) 2021 Free Software Foundation, Inc. + + GDBM is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GDBM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GDBM. If not, see . */ + +#include "autoconf.h" +#include "gdbmdefs.h" + +static int +avail_comp (void const *a, void const *b) +{ + avail_elem const *ava = a; + avail_elem const *avb = b; + return ava->av_size - avb->av_size; +} + +/* Returns true if the avail array AV[0]@COUNT is valid. + + As a side effect, ensures the array is sorted by element size + in increasing order and restores the ordering if necessary. + + The proper ordering could have been clobbered in versions of GDBM<=1.15, + by a call to _gdbm_put_av_elem with the can_merge parameter set to + TRUE. This happened in two cases: either because the GDBM_COALESCEBLKS + was set, and (quite unfortunately) when _gdbm_put_av_elem was called + from pop_avail_block in falloc.c. The latter case is quite common, + which means that there can be lots of existing databases with broken + ordering of avail arrays. Thus, restoring of the proper ordering + is essential for people to be able to use their existing databases. +*/ +static int +gdbm_avail_table_valid_p (GDBM_FILE dbf, avail_elem *av, int count) +{ + off_t prev = 0; + int i; + int needs_sorting = 0; + avail_elem *p = av; + + prev = 0; + for (i = 0; i < count; i++, p++) + { + if (!(p->av_adr >= dbf->header->bucket_size + && p->av_adr + p->av_size <= dbf->header->next_block)) + return 0; + if (p->av_size < prev) + needs_sorting = 1; + prev = p->av_size; + } + + if (needs_sorting && dbf->read_write) + { + GDBM_DEBUG (GDBM_DEBUG_ERR, "%s", "restoring sort order"); + qsort (av, count, sizeof av[0], avail_comp); + } + + return 1; +} + +int +gdbm_avail_block_validate (GDBM_FILE dbf, avail_block *avblk, size_t size) +{ + if (!(size > sizeof (avail_block) + && (avblk->size > 1 && avblk->count >= 0 && avblk->count <= avblk->size) + && ((size - sizeof (avail_block)) / sizeof (avail_elem) + 1) >= avblk->count + && gdbm_avail_table_valid_p (dbf, avblk->av_table, avblk->count))) + { + GDBM_SET_ERRNO (dbf, GDBM_BAD_AVAIL, TRUE); + return -1; + } + return 0; +} + +int +gdbm_bucket_avail_table_validate (GDBM_FILE dbf, hash_bucket *bucket) +{ + if (!(bucket->av_count >= 0 + && bucket->av_count <= BUCKET_AVAIL + && gdbm_avail_table_valid_p (dbf, bucket->bucket_avail, + bucket->av_count))) + { + GDBM_SET_ERRNO (dbf, GDBM_BAD_AVAIL, TRUE); + return -1; + } + return 0; +} + +struct off_map +{ + off_t *map_base; + size_t map_size; + size_t map_max; +}; + +#define OFF_MAP_INITIALIZER { NULL, 0, 0 } + +static void +off_map_free (struct off_map *map) +{ + free (map->map_base); +} + +static int +off_map_expand (struct off_map *map) +{ + size_t n = map->map_max; + void *p; + + if (!map->map_base) + { + if (!n) + n = 64; + } + else + { + if (SIZE_T_MAX / 3 * 2 / sizeof (map->map_base[0]) <= n) + { + errno = ENOMEM; + return -1; + } + n += (n + 1) / 2; + } + + p = realloc (map->map_base, n * sizeof (map->map_base[0])); + if (!p) + return -1; + map->map_base = p; + map->map_max = n; + return 0; +} + +static int +off_map_lookup (struct off_map *map, off_t n) +{ + ssize_t lo, hi, mid; + + if (map->map_size) + { + lo = 0; + hi = map->map_size - 1; + while (lo <= hi) + { + mid = (lo + hi) / 2; + if (map->map_base[mid] > n) + hi = mid - 1; + else if (map->map_base[mid] < n) + lo = mid + 1; + else + return GDBM_CANNOT_REPLACE; + } + } + else + hi = -1; + + if (map->map_size == map->map_max) + { + if (off_map_expand (map)) + return GDBM_MALLOC_ERROR; + } + + hi++; + if (map->map_size > hi) + memmove (map->map_base + hi + 1, map->map_base + hi, + (map->map_size - hi) * sizeof (map->map_base[0])); + map->map_base[hi] = n; + map->map_size++; + return GDBM_NO_ERROR; +} + +/* + * gdbm_avail_traverse - traverse the stack of available space blocks. + * + * Starting from the header, reads in and verifies each avail block. + * If the block is valid and callback function CB is given, calls it + * with the current avail block, its offset in file and user-supplied + * data as arguments. + * + * Traversal stops when one of the following occurs: + * 1) entire stack has been traversed; + * 2) an already traversed block is encountered; + * 3) a block fails validation; + * 4) callback function (if given) returned non-zero. + * + * Returns 0 (success) in cases (1) and (4). Otherwise, sets the + * appropriate GDBM error code and returns -1. + * The case (2) makes this function useful for detecting loops in the + * avail stack. + */ +int +gdbm_avail_traverse (GDBM_FILE dbf, + int (*cb) (avail_block *, off_t, void *), void *data) +{ + avail_block *blk; + size_t size; + off_t n; + struct off_map map = OFF_MAP_INITIALIZER; + int rc; + + GDBM_ASSERT_CONSISTENCY (dbf, -1); + if (gdbm_avail_block_validate (dbf, &dbf->header->avail, + GDBM_HEADER_AVAIL_SIZE (dbf))) + return -1; + + if (off_map_lookup (&map, offsetof (gdbm_file_header, avail))) + { + GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, FALSE); + return -1; + } + + size = ((((size_t)dbf->header->avail.size * sizeof (avail_elem)) >> 1) + + sizeof (avail_block)); + blk = malloc (size); + if (!blk) + { + GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, FALSE); + off_map_free (&map); + return -1; + } + + rc = 0; + n = dbf->header->avail.next_block; + while (n) + { + rc = off_map_lookup (&map, n); + if (rc != GDBM_NO_ERROR) + { + if (rc == GDBM_CANNOT_REPLACE) + GDBM_SET_ERRNO (dbf, GDBM_BAD_AVAIL, TRUE); + else + GDBM_SET_ERRNO (dbf, rc, FALSE); + rc = -1; + break; + } + + if (gdbm_file_seek (dbf, n, SEEK_SET) != n) + { + GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, FALSE); + rc = -1; + break; + } + + if (_gdbm_avail_block_read (dbf, blk, size)) + { + rc = -1; + break; + } + + if (cb && cb (blk, n, data)) + break; + + n = blk->next_block; + } + + free (blk); + off_map_free (&map); + + return rc; +} + +/* + * gdbm_avail_verify - verify the avail stack consistency. + * + * Traverses the avail stack, verifying each avail block and keeping track + * of visited block offsets to discover eventual loops. + * + * On success, returns 0. On error, sets GDBM errno and returns -1. + */ +int +gdbm_avail_verify (GDBM_FILE dbf) +{ + return gdbm_avail_traverse (dbf, NULL, NULL); +} + + + + + diff --git a/src/gdbm.h.in b/src/gdbm.h.in index 1a6cc0d..87f620f 100644 --- a/src/gdbm.h.in +++ b/src/gdbm.h.in @@ -134,6 +134,8 @@ extern int gdbm_import_from_file (GDBM_FILE dbf, FILE *fp, int flag); extern int gdbm_count (GDBM_FILE dbf, gdbm_count_t *pcount); extern int gdbm_bucket_count (GDBM_FILE dbf, size_t *pcount); +extern int gdbm_avail_verify (GDBM_FILE dbf); + typedef struct gdbm_recovery_s { diff --git a/src/gdbmdefs.h b/src/gdbmdefs.h index c3ff353..dfb6d60 100644 --- a/src/gdbmdefs.h +++ b/src/gdbmdefs.h @@ -278,6 +278,9 @@ struct gdbm_file_info #define GDBM_DIR_COUNT(db) ((db)->header->dir_size / sizeof (off_t)) +#define GDBM_HEADER_AVAIL_SIZE(dbf) \ + ((dbf)->header->block_size - offsetof (gdbm_file_header, avail)) + /* Execute CODE without clobbering errno */ #define SAVE_ERRNO(code) \ do \ diff --git a/src/gdbmopen.c b/src/gdbmopen.c index 97654c7..440acfc 100644 --- a/src/gdbmopen.c +++ b/src/gdbmopen.c @@ -55,84 +55,6 @@ bucket_element_count (size_t bucket_size) return (bucket_size - sizeof (hash_bucket)) / sizeof (bucket_element) + 1; } -static int -avail_comp (void const *a, void const *b) -{ - avail_elem const *ava = a; - avail_elem const *avb = b; - return ava->av_size - avb->av_size; -} - -/* Returns true if the avail array AV[0]@COUNT is valid. - - As a side effect, ensures the array is sorted by element size - in increasing order and restores the ordering if necessary. - - The proper ordering could have been clobbered in versions of GDBM<=1.15, - by a call to _gdbm_put_av_elem with the can_merge parameter set to - TRUE. This happened in two cases: either because the GDBM_COALESCEBLKS - was set, and (quite unfortunately) when _gdbm_put_av_elem was called - from pop_avail_block in falloc.c. The latter case is quite common, - which means that there can be lots of existing databases with broken - ordering of avail arrays. Thus, restoring of the proper ordering - is essential for people to be able to use their existing databases. -*/ -int -gdbm_avail_table_valid_p (GDBM_FILE dbf, avail_elem *av, int count) -{ - off_t prev = 0; - int i; - int needs_sorting = 0; - avail_elem *p = av; - - prev = 0; - for (i = 0; i < count; i++, p++) - { - if (!(p->av_adr >= dbf->header->bucket_size - && p->av_adr + p->av_size <= dbf->header->next_block)) - return 0; - if (p->av_size < prev) - needs_sorting = 1; - prev = p->av_size; - } - - if (needs_sorting && dbf->read_write) - { - GDBM_DEBUG (GDBM_DEBUG_ERR, "%s", "restoring sort order"); - qsort (av, count, sizeof av[0], avail_comp); - } - - return 1; -} - -int -gdbm_avail_block_validate (GDBM_FILE dbf, avail_block *avblk, size_t size) -{ - if (!(size > sizeof (avail_block) - && (avblk->size > 1 && avblk->count >= 0 && avblk->count <= avblk->size) - && ((size - sizeof (avail_block)) / sizeof (avail_elem) + 1) >= avblk->count - && gdbm_avail_table_valid_p (dbf, avblk->av_table, avblk->count))) - { - GDBM_SET_ERRNO (dbf, GDBM_BAD_AVAIL, TRUE); - return -1; - } - return 0; -} - -int -gdbm_bucket_avail_table_validate (GDBM_FILE dbf, hash_bucket *bucket) -{ - if (!(bucket->av_count >= 0 - && bucket->av_count <= BUCKET_AVAIL - && gdbm_avail_table_valid_p (dbf, bucket->bucket_avail, - bucket->av_count))) - { - GDBM_SET_ERRNO (dbf, GDBM_BAD_AVAIL, TRUE); - return -1; - } - return 0; -} - static int validate_header (gdbm_file_header const *hdr, struct stat const *st) { @@ -206,9 +128,6 @@ validate_header (gdbm_file_header const *hdr, struct stat const *st) return result; } -#define HEADER_AVAIL_SIZE(dbf) \ - ((dbf)->header->block_size - offsetof (gdbm_file_header, avail)) - int _gdbm_validate_header (GDBM_FILE dbf) { @@ -222,7 +141,7 @@ _gdbm_validate_header (GDBM_FILE dbf) if (rc == 0) { if (gdbm_avail_block_validate (dbf, &dbf->header->avail, - HEADER_AVAIL_SIZE (dbf))) + GDBM_HEADER_AVAIL_SIZE (dbf))) rc = GDBM_BAD_AVAIL; } return rc; @@ -588,7 +507,7 @@ gdbm_fd_open (int fd, const char *file_name, int block_size, } if (gdbm_avail_block_validate (dbf, &dbf->header->avail, - HEADER_AVAIL_SIZE (dbf))) + GDBM_HEADER_AVAIL_SIZE (dbf))) { if (!(flags & GDBM_CLOERROR)) dbf->desc = -1; diff --git a/src/gdbmtool.c b/src/gdbmtool.c index 0293647..7924a40 100644 --- a/src/gdbmtool.c +++ b/src/gdbmtool.c @@ -215,47 +215,29 @@ print_bucket (FILE *fp, hash_bucket *bucket, const char *mesg, ...) bucket->bucket_avail[index].av_size); } -size_t -_gdbm_avail_list_size (GDBM_FILE dbf, size_t min_size) +struct avail_list_counter { - int temp; - int size; - avail_block *av_stk; - size_t lines; - - lines = 4 + dbf->header->avail.count; - if (lines > min_size) - return lines; - /* Initialize the variables for a pass throught the avail stack. */ - temp = dbf->header->avail.next_block; - size = (((dbf->header->avail.size * sizeof (avail_elem)) >> 1) - + sizeof (avail_block)); - av_stk = emalloc (size); - - /* Traverse the stack. */ - while (temp) - { - if (gdbm_file_seek (dbf, temp, SEEK_SET) != temp) - { - terror ("lseek: %s", strerror (errno)); - break; - } - - if (_gdbm_avail_block_read (dbf, av_stk, size)) - { - terror ("read: %s", gdbm_db_strerror (dbf)); - break; - } - - lines += av_stk->count; - if (lines > min_size) - break; + size_t min_size; + size_t lines; +}; - temp = av_stk->next_block; - } - free (av_stk); +static int +avail_list_count (avail_block *avblk, off_t off, void *data) +{ + struct avail_list_counter *ctr = data; - return lines; + ctr->lines += avblk->count; + return ctr->lines > ctr->min_size; +} + +size_t +_gdbm_avail_list_size (GDBM_FILE dbf, size_t min_size) +{ + struct avail_list_counter ctr; + ctr.min_size = 0; + ctr.lines = 0; + gdbm_avail_traverse (dbf, avail_list_count, &ctr); + return ctr.lines; } static void @@ -270,47 +252,25 @@ av_table_display (avail_elem *av_table, int count, FILE *fp) } } +static int +avail_list_print (avail_block *avblk, off_t n, void *data) +{ + FILE *fp = data; + fprintf (fp, _("\nblock = %lu\nsize = %d\ncount = %d\n"), + (unsigned long) n, avblk->size, avblk->count); + av_table_display (avblk->av_table, avblk->count, fp); + return 0; +} + void _gdbm_print_avail_list (FILE *fp, GDBM_FILE dbf) { - int temp; - size_t size; - avail_block *av_stk; - /* Print the the header avail block. */ fprintf (fp, _("\nheader block\nsize = %d\ncount = %d\n"), dbf->header->avail.size, dbf->header->avail.count); av_table_display (dbf->header->avail.av_table, dbf->header->avail.count, fp); - - /* Initialize the variables for a pass throught the avail stack. */ - temp = dbf->header->avail.next_block; - size = (((size_t)dbf->header->avail.size - 1) * sizeof (avail_elem)) - + sizeof (avail_block); - av_stk = emalloc (size); - - /* Print the stack. */ - while (temp) - { - if (gdbm_file_seek (dbf, temp, SEEK_SET) != temp) - { - terror ("lseek: %s", strerror (errno)); - break; - } - - if (_gdbm_avail_block_read (dbf, av_stk, size)) - { - terror ("read: %s", gdbm_db_strerror (dbf)); - break; - } - - /* Print the block! */ - fprintf (fp, _("\nblock = %d\nsize = %d\ncount = %d\n"), temp, - av_stk->size, av_stk->count); - av_table_display (av_stk->av_table, av_stk->count, fp); - - temp = av_stk->next_block; - } - free (av_stk); + if (gdbm_avail_traverse (dbf, avail_list_print, fp)) + terror ("%s", gdbm_strerror (gdbm_errno)); } void diff --git a/src/proto.h b/src/proto.h index 2b34ae3..7c03b9d 100644 --- a/src/proto.h +++ b/src/proto.h @@ -49,9 +49,6 @@ int _gdbm_end_update (GDBM_FILE); void _gdbm_fatal (GDBM_FILE, const char *); /* From gdbmopen.c */ -int gdbm_avail_block_validate (GDBM_FILE dbf, avail_block *avblk, size_t size); -int gdbm_bucket_avail_table_validate (GDBM_FILE dbf, hash_bucket *bucket); - int _gdbm_validate_header (GDBM_FILE dbf); int _gdbm_file_size (GDBM_FILE dbf, off_t *psize); @@ -92,6 +89,14 @@ cache_tree *_gdbm_cache_tree_alloc (void); void _gdbm_cache_tree_destroy (cache_tree *tree); void _gdbm_cache_tree_delete (cache_tree *tree, struct cache_node *n); +/* avail.c */ +int gdbm_avail_block_validate (GDBM_FILE dbf, avail_block *avblk, size_t size); +int gdbm_bucket_avail_table_validate (GDBM_FILE dbf, hash_bucket *bucket); +int gdbm_avail_traverse (GDBM_FILE dbf, + int (*cb) (avail_block *, off_t, void *), + void *data); + + /* Return codes for _gdbm_cache_tree_lookup. */ enum { diff --git a/src/systems.h b/src/systems.h index 13b5ea7..1ca9caa 100644 --- a/src/systems.h +++ b/src/systems.h @@ -19,6 +19,7 @@ /* Include all system headers first. */ #include #include +#include #if HAVE_SYS_FILE_H # include #endif -- cgit v1.2.1