diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2021-11-07 20:18:31 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2021-11-14 13:29:42 +0200 |
commit | 322f6e9c3c13d3079306628b96864f728f87be62 (patch) | |
tree | 997592aae23da27552e9cbc3549dd58b2e044e5b /src | |
parent | 45d2ce49d88bc59bbc84dc738a55142205a6e2ce (diff) | |
download | gdbm-322f6e9c3c13d3079306628b96864f728f87be62.tar.gz gdbm-322f6e9c3c13d3079306628b96864f728f87be62.tar.bz2 |
Switch to hash table cache implementation
* src/cachetree.c: Remove.
* src/Makefile.am: Remove cachetree.c
* doc/gdbm.texi: Document the changes.
* src/bucket.c (cache_tab_lookup_slot)
(cache_tab_resize): New function.
(cache_elem_new): Initialize ca_coll.
(cache_elem_free, cache_lookup)
(_gdbm_cache_init,_gdbm_cache_free): Rewrite with hash-based cache lookup.
(_gdbm_fetch_data): Remove unused function.
* src/gdbm.h.in (GDBM_GETDBFORMAT, GDBM_GETDIRDEPTH)
(GDBM_GETBUCKETSIZE, GDBM_GETCACHEAUTO, GDBM_SETCACHEAUTO): New option codes.
* src/gdbmdefs.h (cache_node): Remove.
(cache_elem): Remove ca_node. Add ca_coll (collision resolution pointer).
(gdbm_file_info): New members: cache_auto, cache_bits, cache.
* src/gdbmopen.c (gdbm_fd_open): Change cache initialization.
* src/gdbmsetopt.c (GDBM_GETDBFORMAT,GDBM_GETDIRDEPTH)
(GDBM_GETBUCKETSIZE,GDBM_GETCACHEAUTO)
(GDBM_SETCACHEAUTO): Implement new options.
(setopt_gdbm_getflags): Reflect the state of GDBM_CLOEXEC and GDBM_NUMSYNC.
* src/proto.h (_gdbm_fetch_data,_gdbm_cache_tree_alloc)
(_gdbm_cache_tree_destroy,_gdbm_cache_tree_delete)
(_gdbm_cache_tree_lookup): Remove protos.
* src/recover.c (_gdbm_finish_transfer): Restore original cache settings.
* tests/Makefile.am: Add new test.
* tests/testsuite.at: Likewise.
* tests/gtcacheopt.c: New file.
* tests/setopt02.at: New test case.
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 1 | ||||
-rw-r--r-- | src/bucket.c | 343 | ||||
-rw-r--r-- | src/cachetree.c | 484 | ||||
-rw-r--r-- | src/gdbm.h.in | 11 | ||||
-rw-r--r-- | src/gdbmdefs.h | 27 | ||||
-rw-r--r-- | src/gdbmopen.c | 15 | ||||
-rw-r--r-- | src/gdbmsetopt.c | 88 | ||||
-rw-r--r-- | src/proto.h | 15 | ||||
-rw-r--r-- | src/recover.c | 9 |
9 files changed, 328 insertions, 665 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 74103ee..afd9abd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -42,7 +42,6 @@ lib_LTLIBRARIES = libgdbm.la libgdbm_la_LIBADD = @LTLIBINTL@ libgdbm_la_SOURCES = \ - cachetree.c\ gdbmclose.c\ gdbmcount.c\ gdbmdelete.c\ diff --git a/src/bucket.c b/src/bucket.c index 4a5a04c..2165ce1 100644 --- a/src/bucket.c +++ b/src/bucket.c @@ -18,9 +18,10 @@ #include "autoconf.h" #include "gdbmdefs.h" +#include <stdint.h> #include <limits.h> -#define GDBM_MAX_DIR_SIZE INT_MAX +#define GDBM_MAX_DIR_SIZE INT32_MAX #define GDBM_MAX_DIR_HALF (GDBM_MAX_DIR_SIZE / 2) /* Initializing a new hash buckets sets all bucket entries to -1 hash value. */ @@ -49,6 +50,44 @@ set_cache_entry (GDBM_FILE dbf, cache_elem *elem) } +/* Bucket cache table functions */ + +/* Hash an off_t word into an index of width NBITS. */ +static size_t +adrhash (off_t adr, size_t nbits) +{ + adr ^= adr >> (GDBM_HASH_BITS + 1 - nbits); + return ((265443576910ul * adr) & 0xffffffff) >> (GDBM_HASH_BITS + 1 - nbits); +} + +/* + * Return a pointer to the cache table slot for bucket address ADR. + * Never returns NULL. + */ +static cache_elem ** +cache_tab_lookup_slot (GDBM_FILE dbf, off_t adr) +{ + cache_elem **cache = dbf->cache; + size_t h = adrhash (adr, dbf->cache_bits); + + if (cache[h]) + { + if (cache[h]->ca_adr != adr) + { + cache_elem *prev = cache[h], *p = prev->ca_coll; + while (p) + { + if (p->ca_adr == adr) + break; + prev = p; + p = prev->ca_coll; + } + return &prev->ca_coll; + } + } + return &cache[h]; +} + /* LRU list management */ /* Link ELEM after REF in DBF cache. If REF is NULL, link at head */ @@ -126,11 +165,8 @@ cache_elem_new (GDBM_FILE dbf, off_t adr) elem->ca_data.hash_val = -1; elem->ca_data.elem_loc = -1; - elem->ca_prev = elem->ca_next = NULL; + elem->ca_prev = elem->ca_next = elem->ca_coll = NULL; elem->ca_hits = 0; - elem->ca_node = NULL; - - dbf->cache_num++; return elem; } @@ -139,11 +175,25 @@ cache_elem_new (GDBM_FILE dbf, off_t adr) static void cache_elem_free (GDBM_FILE dbf, cache_elem *elem) { - _gdbm_cache_tree_delete (dbf->cache_tree, elem->ca_node); + size_t h = adrhash (elem->ca_adr, dbf->cache_bits); + cache_elem **pp; + lru_unlink_elem (dbf, elem); + elem->ca_next = dbf->cache_avail; dbf->cache_avail = elem; dbf->cache_num--; + + pp = &dbf->cache[h]; + while (*pp) + { + if (*pp == elem) + { + *pp = (*pp)->ca_coll; + break; + } + pp = &(*pp)->ca_coll; + } } /* Free the least recently used cache entry. */ @@ -159,66 +209,134 @@ cache_lru_free (GDBM_FILE dbf) cache_elem_free (dbf, last); return 0; } - + +/* + * Round up V to the next highest power of 2 and compute log2 of + * it using De Brujin sequences. + * See http://supertech.csail.mit.edu/papers/debruijn.pdf + */ +static unsigned +log2i (unsigned v) +{ + static const int dbp[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return dbp[(uint32_t)(v * 0x077CB531U) >> 27]; +} + +static int +cache_tab_resize (GDBM_FILE dbf, int bits) +{ + size_t size = 1 << bits; + + if (!dbf->cache || size != dbf->cache_size) + { + size_t n = size * sizeof (dbf->cache[0]); + cache_elem **p, *elem; + + /* Flush existing cache */ + if (_gdbm_cache_flush (dbf)) + return -1; + + /* Reallocate it */ + p = realloc (dbf->cache, n); + if (!p) + { + GDBM_SET_ERRNO (dbf, GDBM_MALLOC_ERROR, FALSE); + return -1; + } + dbf->cache = p; + dbf->cache_size = size; + dbf->cache_bits = bits; + + memset (dbf->cache, 0, n); + + /* Rehash and free surplus elements */ + for (elem = dbf->cache_lru; elem; ) + { + cache_elem *prev = elem->ca_prev; + elem->ca_coll = NULL; + if (size < dbf->cache_num) + { + cache_elem_free (dbf, elem); + } + else + { + p = cache_tab_lookup_slot (dbf, elem->ca_adr); + if (*p) + abort ();// shouldn't happen + *p = elem; + } + elem = prev; + } + } + return 0; +} + +enum + { + cache_found, + cache_new, + cache_failure + }; + static int cache_lookup (GDBM_FILE dbf, off_t adr, cache_elem *ref, cache_elem **ret_elem) { int rc; - cache_node *node; - cache_elem *elem; - int retrying = 0; + cache_elem **elp, *elem; dbf->cache_access_count++; - retry: - rc = _gdbm_cache_tree_lookup (dbf->cache_tree, adr, &node); - switch (rc) + + elp = cache_tab_lookup_slot (dbf, adr); + + if (*elp != NULL) { - case node_found: - elem = node->elem; + elem = *elp; elem->ca_hits++; dbf->cache_hits++; lru_unlink_elem (dbf, elem); - break; - - case node_new: - elem = cache_elem_new (dbf, adr); - if (!elem) - { - _gdbm_cache_tree_delete (dbf->cache_tree, node); - return node_failure; - } - elem->ca_node = node; - node->elem = elem; - - if (dbf->cache_size != GDBM_CACHE_AUTO - && dbf->cache_num > dbf->cache_size - && cache_lru_free (dbf)) + rc = cache_found; + } + else if ((elem = cache_elem_new (dbf, adr)) == NULL) + return cache_failure; + else + { + rc = cache_new; + + if (dbf->cache_num == dbf->cache_size) { - cache_elem_free (dbf, elem); - return node_failure; + if (dbf->cache_auto && dbf->cache_bits < dbf->header->dir_bits && + cache_tab_resize (dbf, dbf->cache_bits + 1) == 0) + { + /* Table has been reallocated, recompute the slot. */ + elp = cache_tab_lookup_slot (dbf, adr); + } + else if (cache_lru_free (dbf)) + { + rc = cache_failure; + } } - break; - case node_failure: - if (!retrying) + if (rc == cache_new) { - if (errno == ENOMEM) - { - /* Release the last recently used element and retry. */ - if (cache_lru_free (dbf)) - return node_failure; - retrying = 1; - goto retry; - } + *elp = elem; + dbf->cache_num++; } - return node_failure; - - default: - abort (); } - lru_link_elem (dbf, elem, ref); - *ret_elem = elem; + if (rc != cache_failure) + *ret_elem = elem; return rc; } @@ -254,10 +372,10 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index) switch (cache_lookup (dbf, bucket_adr, NULL, &elem)) { - case node_found: + case cache_found: break; - case node_new: + case cache_new: /* Position the file pointer */ file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET); if (file_pos != bucket_adr) @@ -306,7 +424,7 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index) break; - case node_failure: + case cache_failure: return -1; } set_cache_entry (dbf, elem); @@ -343,14 +461,15 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert) off_t dir_end; new_bits = dbf->bucket->bucket_bits + 1; + /* Allocate two new buckets */ adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size); switch (cache_lookup (dbf, adr_0, dbf->cache_mru, &newcache[0])) { - case node_new: + case cache_new: break; - case node_found: + case cache_found: /* should not happen */ GDBM_DEBUG (GDBM_DEBUG_ERR, "%s: bucket found where it should not", @@ -358,7 +477,7 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert) GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE); return -1; - case node_failure: + case cache_failure: return -1; } _gdbm_new_bucket (dbf, newcache[0]->ca_bucket, new_bits); @@ -366,10 +485,10 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert) adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size); switch (cache_lookup (dbf, adr_1, newcache[0], &newcache[1])) { - case node_new: + case cache_new: break; - case node_found: + case cache_found: /* should not happen */ GDBM_DEBUG (GDBM_DEBUG_ERR, "%s: bucket found where it should not", @@ -377,7 +496,7 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert) GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE); return -1; - case node_failure: + case cache_failure: return -1; } _gdbm_new_bucket (dbf, newcache[1]->ca_bucket, new_bits); @@ -508,6 +627,7 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert) newcache[1]->ca_bucket->bucket_avail, &newcache[1]->ca_bucket->av_count, dbf->coalesce_blocks); + lru_unlink_elem (dbf, newcache[0]); lru_link_elem (dbf, newcache[0], NULL); set_cache_entry (dbf, newcache[0]); @@ -554,33 +674,37 @@ _gdbm_write_bucket (GDBM_FILE dbf, cache_elem *ca_entry) return 0; } -/* Cache manipulation functions. */ +/* Cache manipulation interface functions. */ + +#define INIT_CACHE_BITS 9 /* Initialize the bucket cache. */ int _gdbm_cache_init (GDBM_FILE dbf, size_t size) { - if (size == dbf->cache_size) - return 0; - - if (size != GDBM_CACHE_AUTO) + int bits; + int cache_auto; + + if (size == GDBM_CACHE_AUTO) { - while (size < dbf->cache_num) - { - /* Flush the least recently used element */ - cache_elem *elem = dbf->cache_lru; - if (elem->ca_changed) - { - if (_gdbm_write_bucket (dbf, elem)) - return -1; - } - cache_elem_free (dbf, elem); - } + cache_auto = TRUE; + bits = dbf->cache ? dbf->cache_bits : INIT_CACHE_BITS; + } + else if (size > GDBM_DIR_COUNT (dbf) || + size > SIZE_T_MAX / sizeof (dbf->cache[0])) + { + GDBM_SET_ERRNO (dbf, GDBM_OPT_BADVAL, FALSE); + return -1; + } + else + { + cache_auto = FALSE; + bits = log2i (size < 4 ? 4 : size); } - - dbf->cache_size = size; - return 0; + dbf->cache_auto = cache_auto; + + return cache_tab_resize (dbf, bits); } /* Free the bucket cache */ @@ -591,7 +715,8 @@ _gdbm_cache_free (GDBM_FILE dbf) while (dbf->cache_lru) cache_elem_free (dbf, dbf->cache_lru); - _gdbm_cache_tree_destroy (dbf->cache_tree); + free (dbf->cache); + dbf->cache = NULL; while ((elem = dbf->cache_avail) != NULL) { dbf->cache_avail = elem->ca_next; @@ -615,69 +740,7 @@ _gdbm_cache_flush (GDBM_FILE dbf) } return 0; } - -int -_gdbm_fetch_data (GDBM_FILE dbf, off_t off, size_t size, void *buf) -{ - off_t bucket_adr; - off_t file_pos; - int rc; - cache_elem *elem; - char *dst = buf; - - bucket_adr = (off / dbf->header->bucket_size) - * dbf->header->bucket_size; - off -= bucket_adr; - while (size) - { - size_t nbytes; - - switch (cache_lookup (dbf, bucket_adr, dbf->cache_mru, &elem)) - { - case node_found: - break; - - case node_new: - /* Position the file pointer */ - file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET); - if (file_pos != bucket_adr) - { - GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE); - cache_elem_free (dbf, elem); - _gdbm_fatal (dbf, _("lseek error")); - return -1; - } - /* Read the bucket. */ - rc = _gdbm_full_read (dbf, elem->ca_bucket, - dbf->header->bucket_size); - if (rc) - { - GDBM_DEBUG (GDBM_DEBUG_ERR, - "%s: error reading data bucket: %s", - dbf->name, gdbm_db_strerror (dbf)); - dbf->need_recovery = TRUE; - cache_elem_free (dbf, elem); - _gdbm_fatal (dbf, gdbm_db_strerror (dbf)); - return -1; - } - break; - - case node_failure: - return -1; - } - - nbytes = dbf->header->bucket_size - off; - if (nbytes > size) - nbytes = size; - memcpy (dst, (char*) elem->ca_bucket + off, nbytes); - dst += nbytes; - size -= nbytes; - bucket_adr++; - off = 0; - } - return 0; -} void gdbm_get_cache_stats (GDBM_FILE dbf, diff --git a/src/cachetree.c b/src/cachetree.c deleted file mode 100644 index de5a5c1..0000000 --- a/src/cachetree.c +++ /dev/null @@ -1,484 +0,0 @@ -/* cachetree.c - Implementation of the red-black tree for cache lookups. */ - -/* This file is part of GDBM, the GNU data base manager. - Copyright (C) 2019-2021 Free Software Foundation, Inc. - - GDBM is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GDBM is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with GDBM. If not, see <http://www.gnu.org/licenses/>. */ - -#include "autoconf.h" -#include "gdbmdefs.h" - -enum cache_node_color { RED, BLACK }; - -struct cache_tree -{ - cache_node *root; /* Root of the tree */ - cache_node *avail; /* List of available nodes, linked by parent field */ -}; - -/* Allocate and return a new node. Pick the head item from the avail - list and update the avail pointer. If the list is empty, malloc - a new node. - All members in the returned node are filled with 0. -*/ -static cache_node * -rbt_node_alloc (cache_tree *tree) -{ - cache_node *n; - - n = tree->avail; - if (n) - tree->avail = n->parent; - else - { - n = malloc (sizeof (*n)); - if (!n) - return NULL; - } - memset (n, 0, sizeof (*n)); - return n; -} - -/* Return the node N to the avail list in TREE. */ -static void -rbt_node_dealloc (cache_tree *tree, cache_node *n) -{ - n->parent = tree->avail; - tree->avail = n; -} - -/* Red-black tree properties: - 1. Each node is either red or black. - 2. The root node is black. - 3. All leaves are black and contain no data. - 4. Every red node has two children, and both are black. - IOW, the parent of every red node is black. - 5. All paths from any given node to its leaf nodes contain the same - number of black nodes. - */ - -/* Auxiliary functions for accessing nodes. */ - -/* Return the grandparent node of N. - Prerequisite: N may not be root. -*/ -static inline cache_node * -grandparent (cache_node *n) -{ - return n->parent->parent; -} - -/* Return the sibling node of N. - Prerequisite: N may not be root. -*/ -static inline cache_node * -sibling (cache_node *n) -{ - return (n == n->parent->left) ? n->parent->right : n->parent->left; -} - -/* Return the uncle node of N. - Prerequisite: N must be at least 2 nodes away from root. -*/ -static inline cache_node * -uncle (cache_node *n) -{ - return sibling (n->parent); -} - -/* Returns the color of the node N. - Empty leaves are represented by NULL, therefore NULL is assumed to - be black (see property 3). -*/ -static inline enum cache_node_color -node_color (cache_node *n) -{ - return n == NULL ? BLACK : n->color; -} - -/* Replace the OLDN with NEWN. - Does not modify OLDN. */ -static inline void -replace_node (cache_tree *tree, cache_node *oldn, cache_node *newn) -{ - if (oldn->parent == NULL) - tree->root = newn; - else if (oldn == oldn->parent->left) - oldn->parent->left = newn; - else - oldn->parent->right = newn; - - if (newn != NULL) - newn->parent = oldn->parent; -} - -/* Rotate the TREE left over the node N. */ -static inline void -rotate_left (cache_tree *tree, cache_node *n) -{ - cache_node *right = n->right; - replace_node (tree, n, right); - n->right = right->left; - if (right->left != NULL) - right->left->parent = n; - right->left = n; - n->parent = right; -} - -/* Rotate the TREE right over the node N. */ -static inline void -rotate_right (cache_tree *tree, cache_node *n) -{ - cache_node *left = n->left; - replace_node (tree, n, left); - n->left = left->right; - if (left->right != NULL) - left->right->parent = n; - left->right = n; - n->parent = left; -} - -/* Node deletion */ -static inline void -rbt_delete_fixup (cache_tree *tree, cache_node *n) -{ - while (1) - { - if (n->parent == NULL) - { - /* If N has become the root node, deletion resulted in removing - one black node (prior root) from every path, so all properties - still hold. - */ - return; - } - else - { - /* If N has a red sibling, change the colors of the parent and - sibling and rotate about the parent. Thus, the sibling becomes - grandparent and we can proceed to the next case. - */ - if (node_color (sibling (n)) == RED) - { - n->parent->color = RED; - sibling (n)->color = BLACK; - if (n == n->parent->left) - rotate_left (tree, n->parent); - else - rotate_right (tree, n->parent); - } - - /* If the parent, sibling and nephews are all black, paint the - sibling red. This means one black node was removed from all - paths passing through the parent, so we recurse to the beginning - of the loop with parent as the argument to restore the properties. - This is the only branch that loops. - */ - if (node_color (n->parent) == BLACK - && node_color (sibling (n)) == BLACK - && node_color (sibling (n)->left) == BLACK - && node_color (sibling (n)->right) == BLACK) - { - sibling (n)->color = RED; - n = n->parent; - continue; - } - else - { - /* If the sibling and nephews are black but the parent is red, - swap the colors of the sibling and parent. The properties - are then restored. - */ - if (node_color (n->parent) == RED - && node_color (sibling (n)) == BLACK - && node_color (sibling (n)->left) == BLACK - && node_color (sibling (n)->right) == BLACK) - { - sibling (n)->color = RED; - n->parent->color = BLACK; - } - else - { - /* N is the left child of its parent, its sibling is black, - and the sibling's right child is black. Swap the colors - of the sibling and its left sibling and rotate right - over the sibling. - */ - if (n == n->parent->left - && node_color (sibling (n)) == BLACK - && node_color (sibling (n)->left) == RED - && node_color (sibling (n)->right) == BLACK) - { - sibling (n)->color = RED; - sibling (n)->left->color = BLACK; - rotate_right (tree, sibling (n)); - } - else if (n == n->parent->right - && node_color (sibling (n)) == BLACK - && node_color (sibling (n)->right) == RED - && node_color (sibling (n)->left) == BLACK) - { - /* The mirror case is handled similarly. */ - sibling (n)->color = RED; - sibling (n)->right->color = BLACK; - rotate_left (tree, sibling (n)); - } - /* N is the left child of its parent, its sibling is black - and the sibling's right child is red. Swap the colors - of the parent and sibling, paint the sibling's right - child black and rotate left at the parent. Similarly - for the mirror case. This achieves the following: - - . A black node is added to all paths passing through N; - . A black node is removed from all paths through the - sibling's red child. - . The latter is painted black which restores missing - black node in all paths through the sibling's red child. - - Another sibling's child becomes a child of N's parent - during the rotation and is therefore not affected. - */ - sibling (n)->color = node_color (n->parent); - n->parent->color = BLACK; - if (n == n->parent->left) - { - sibling (n)->right->color = BLACK; - rotate_left (tree, n->parent); - } - else - { - sibling (n)->left->color = BLACK; - rotate_right (tree, n->parent); - } - } - } - } - break; - } -} - -/* Remove N from the TREE. */ -void -_gdbm_cache_tree_delete (cache_tree *tree, cache_node *n) -{ - cache_node *child; - - /* If N has both left and right children, reduce the problem to - the node with only one child. To do so, find the in-order - predecessor of N, copy its value (elem) to N and then delete - the predecessor. */ - if (n->left != NULL && n->right != NULL) - { - cache_node *p; - for (p = n->left; p->right; p = p->right) - ; - n->adr = p->adr; - n->elem = p->elem; - n->elem->ca_node = n; - n = p; - } - - /* N has only one child. Select it. */ - child = n->left ? n->left : n->right; - if (node_color (n) == BLACK) - { - n->color = node_color (child); - rbt_delete_fixup (tree, n); - } - replace_node (tree, n, child); - if (n->parent == NULL && child != NULL) /* root should be black */ - child->color = BLACK; - - /* Return N to the avail pool */ - rbt_node_dealloc (tree, n); -} - -/* Insertion */ -static inline void -rbt_insert_fixup (cache_tree *tree, cache_node *n) -{ - while (1) - { - if (n->parent == NULL) - { - /* Node was inserted at the root of the tree. - The root node must be black (property 2). Changing its color - to black would add one black node to every path, which means - the property 5 would remain satisfied. So we simply paint the - node black. - */ - n->color = BLACK; - } - else if (node_color (n->parent) == BLACK) - { - /* The node has black parent. - All properties are satisfied. There's no need to change anything. - */ - return; - } - else if (node_color (uncle (n)) == RED) - { - /* The uncle node is red. - Repaint the parent and uncle black and the grandparent red. This - would satisfy 4. However, if the grandparent is root, this would - violate the property 2. So we repaint the grandparent by - re-entering the fixup loop with grandparent as the node. - This is the only branch that loops. - */ - n->parent->color = BLACK; - uncle (n)->color = BLACK; - n = grandparent (n); - n->color = RED; - continue; - } - else - { - /* The new node is the right child of its parent and the parent is - the left child of the grandparent. Rotate left about the parent. - Mirror case: The new node is the left child of its parent and the - parent is the right child of the grandparent. Rotate right about - the parent. This fixes the properties for the rbt_insert_5. - */ - if (n == n->parent->right && n->parent == grandparent (n)->left) - { - rotate_left (tree, n->parent); - n = n->left; - } - else if (n == n->parent->left && n->parent == grandparent (n)->right) - { - rotate_right (tree, n->parent); - n = n->right; - } - - /* The new node is the left child of its parent and the parent is the - left child of the grandparent. Rotate right about the grandparent. - Mirror case: The new node is the right child of its parent and the - parent - is the right child of the grandparent. Rotate left. - */ - n->parent->color = BLACK; - grandparent (n)->color = RED; - if (n == n->parent->left && n->parent == grandparent (n)->left) - { - rotate_right (tree, grandparent (n)); - } - else - { - rotate_left (tree, grandparent (n)); - } - } - break; - } -} - -/* Look up the node with the given ADR. - If found, put it in *RETVAL and return node_found. - - Otherwise, create a new node and insert it at the appropriate place in - the tree. Store the address of the newly created node in *RETVAL and - return node_new. - - If a new node cannot be created (memory exhausted), return node_failure. -*/ -int -_gdbm_cache_tree_lookup (cache_tree *tree, off_t adr, cache_node **retval) -{ - int res; - cache_node *node, *parent = NULL; - - node = tree->root; - while (node) - { - if (adr == node->adr) - break; - parent = node; - if (adr < node->adr) - node = node->left; - else - node = node->right; - } - - if (node) - { - res = node_found; - } - else - { - node = rbt_node_alloc (tree); - if (!node) - return node_failure; - if (!parent) - tree->root = node; - else if (adr < parent->adr) - parent->left = node; - else - parent->right = node; - node->adr = adr; - node->parent = parent; - rbt_insert_fixup (tree, node); - res = node_new; - } - *retval = node; - return res; -} - -/* Interface functions */ - -/* Create a cache tree structure for the database file DBF. */ -cache_tree * -_gdbm_cache_tree_alloc (void) -{ - cache_tree *t = malloc (sizeof (*t)); - if (t) - { - t->root = NULL; - t->avail = NULL; - } - return t; -} - -/* Free the memory used by the TREE. */ -void -_gdbm_cache_tree_destroy (cache_tree *tree) -{ - cache_node *n; - - /* Free the allocated tree nodes. Traverse the tree as if it were - a simple binary tree: there's no use preserving RBT properties now. - */ - while ((n = tree->root) != NULL) - { - if (!n->left) - tree->root = n->right; - else if (!n->right) - tree->root = n->left; - else - { - cache_node *p; - for (p = n->left; p->right; p = p->right) - ; - p->right = n->right; - tree->root = n->left; - } - free (n); - } - - /* Free the avail list */ - while ((n = tree->avail) != NULL) - { - tree->avail = n->parent; - free (n); - } - free (tree); -} diff --git a/src/gdbm.h.in b/src/gdbm.h.in index 041837d..e371e9b 100644 --- a/src/gdbm.h.in +++ b/src/gdbm.h.in @@ -88,9 +88,16 @@ extern "C" { # define GDBM_GETMAXMAPSIZE 14 /* Get maximum mapped memory size */ # define GDBM_GETDBNAME 15 /* Return database file name */ # define GDBM_GETBLOCKSIZE 16 /* Return block size */ - +# define GDBM_GETDBFORMAT 17 /* Return the database format */ +# define GDBM_GETDIRDEPTH 18 /* Directory depth: number of initial (most + significant) bits in hash interpreted as + index to the directory. */ +# define GDBM_GETBUCKETSIZE 19 /* Get number of elements per bucket */ +# define GDBM_GETCACHEAUTO 20 /* Get the value of cache auto-adjustment */ +# define GDBM_SETCACHEAUTO 21 /* Set the value of cache auto-adjustment */ + # define GDBM_CACHE_AUTO 0 - + typedef @GDBM_COUNT_T@ gdbm_count_t; /* The data and key structure. */ diff --git a/src/gdbmdefs.h b/src/gdbmdefs.h index 98f55c3..fba9d2d 100644 --- a/src/gdbmdefs.h +++ b/src/gdbmdefs.h @@ -179,15 +179,6 @@ typedef struct } data_cache_elem; typedef struct cache_elem cache_elem; -typedef struct cache_node cache_node; - -struct cache_node -{ - cache_node *left, *right, *parent; - int color; - off_t adr; - cache_elem *elem; -}; struct cache_elem { @@ -195,19 +186,16 @@ struct cache_elem char ca_changed; /* Data in the bucket changed. */ data_cache_elem ca_data; /* Cached datum */ cache_elem *ca_prev, /* Previous element in LRU list. */ - *ca_next; /* Next elements in LRU list. + *ca_next, /* Next elements in LRU list. If the item is in cache_avail list, only ca_next is used. It points to the next available element. */ + *ca_coll; /* Next element in a collision sequence */ size_t ca_hits; /* Number of times this element was requested */ - cache_node *ca_node; /* Points back to the RBT node for this - element. */ hash_bucket ca_bucket[1];/* Associated bucket (dbf->header->bucket_size bytes). */ }; -typedef struct cache_tree cache_tree; - /* This final structure contains all main memory based information for a gdbm file. This allows multiple gdbm files to be opened at the same time by one program. */ @@ -243,6 +231,9 @@ struct gdbm_file_info /* Last error was fatal, the database needs recovery */ unsigned need_recovery :1; + /* Automatic bucket cache size */ + unsigned cache_auto :1; + /* Last GDBM error number */ gdbm_error last_error; /* Last system error number */ @@ -275,10 +266,12 @@ struct gdbm_file_info off_t *dir; /* The bucket cache. */ - size_t cache_size; /* Cache capacity */ + int cache_bits; /* Address bits used for compting bucket hash */ + size_t cache_size; /* Cache capacity: 2^cache_bits */ size_t cache_num; /* Actual number of elements in cache */ - /* Cache elements form a binary search tree. */ - cache_tree *cache_tree; + /* Cache hash table. */ + cache_elem **cache; + /* Cache elements are linked in a list sorted by relative access time */ cache_elem *cache_mru; /* Most recently used element - head of the list */ cache_elem *cache_lru; /* Last recently used element - tail of the list */ |