aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2019-11-12 07:29:45 +0200
committerSergey Poznyakoff <gray@gnu.org>2019-11-12 08:11:07 +0200
commitdc176a5cdc841e05876d5e7b52cfb1b7bac2d333 (patch)
tree8a12ecb5057e25a8fdd0ebdc436718b19b6d78c6
parent4fb2326a4ac0e6f45c21f7651b1c87317567fd82 (diff)
downloadgdbm-dc176a5cdc841e05876d5e7b52cfb1b7bac2d333.tar.gz
gdbm-dc176a5cdc841e05876d5e7b52cfb1b7bac2d333.tar.bz2
Rewrite bucket cache
The new bucket cache uses the least recently used replacement policy (instead of the least recently read, implemented previously). It also allows for quick bucket lookups by the corresponding disk address. To this effect the cache entries form a red-black tree sorted by bucket address. Additionally, data buckets are also cached. * README: Describe the new branch. * src/bucket.c: Rewrite cache support. * src/cachetree.c: New file. * src/Makefile.am: Add new file. * src/findkey.c (_gdbm_read_entry): Use _gdbm_fetch_data. This ensures data pages are cached as well as buckets. * src/gdbm.h.in (GDBM_BUCKET_CACHE_CORRUPTED): New error code. (gdbm_cache_stat): New struct. (gdbm_get_cache_stats): New proto. * src/gdbmclose.c (gdbm_close): Call _gdbm_cache_free to dispose of the cache. * src/gdbmdefs.h (cache_elem_color): New data type. (cache_elem): New members: ca_left, ca_right, ca_node, and ca_hits. (cache_tree): New typedef. (gdbm_file_info): Remove bucket_cache and last_read. New fields: cache_num, cache_tree, cache_mru, cache_lru, cache_avail, cache_access_count. * src/gdbmerrno.c: Handle GDBM_BUCKET_CACHE_CORRUPTED. * src/gdbmopen.c (gdbm_fd_open): Change cache initialization. (_gdbm_init_cache, _gdbm_cache_entry_invalidate: Remove. * src/gdbmsetopt.c (setopt_gdbm_setcachesize): Cache can be re-initialized on the fly. * src/gdbmtool.c: Change bucket printing routines. * src/proto.h (_gdbm_read_bucket_at): Remove. (_gdbm_fetch_data,_gdbm_cache_init,_gdbm_cache_free) (_gdbm_cache_flush,_gdbm_cache_elem_new) (_gdbm_cache_tree_alloc,_gdbm_cache_tree_destroy) (_gdbm_cache_tree_delete,_gdbm_rbt_remove_node) (_gdbm_cache_tree_lookup): New protos. (_gdbm_init_cache,_gdbm_cache_entry_invalidate): Remove. * src/recover.c (_gdbm_finish_transfer): Adapt to the new cache structure. * src/update.c: Likewise. * tests/setopt00.at: Fix second GDBM_SETCACHESIZE test.
-rw-r--r--README6
-rw-r--r--src/Makefile.am1
-rw-r--r--src/bucket.c609
-rw-r--r--src/cachetree.c528
-rw-r--r--src/findkey.c25
-rw-r--r--src/gdbm.h.in19
-rw-r--r--src/gdbmclose.c12
-rw-r--r--src/gdbmdefs.h40
-rw-r--r--src/gdbmerrno.c3
-rw-r--r--src/gdbmopen.c51
-rw-r--r--src/gdbmsetopt.c8
-rw-r--r--src/gdbmtool.c27
-rw-r--r--src/proto.h27
-rw-r--r--src/recover.c24
-rw-r--r--src/update.c14
-rw-r--r--tests/setopt00.at2
16 files changed, 1057 insertions, 339 deletions
diff --git a/README b/README
index 747d1a2..c938120 100644
--- a/README
+++ b/README
@@ -1,6 +1,12 @@
GNU dbm README
See the end of file for copying conditions.
+* Note
+
+This is an experimental branch of GNU DBM. Its goal is to evaluate
+possibilities of improving the caching scheme. Please don't use it
+in production.
+
* Introduction
This file contains brief information about configuring, testing
diff --git a/src/Makefile.am b/src/Makefile.am
index e34bb7e..18c80d9 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -41,6 +41,7 @@ lib_LTLIBRARIES = libgdbm.la
libgdbm_la_LIBADD = @LTLIBINTL@
libgdbm_la_SOURCES = \
+ cachetree.c\
gdbmclose.c\
gdbmcount.c\
gdbmdelete.c\
diff --git a/src/bucket.c b/src/bucket.c
index b88972e..4bce594 100644
--- a/src/bucket.c
+++ b/src/bucket.c
@@ -56,7 +56,155 @@ gdbm_dir_entry_valid_p (GDBM_FILE dbf, int dir_index)
&& dir_index < GDBM_DIR_COUNT (dbf)
&& dbf->dir[dir_index] >= dbf->header->block_size;
}
-
+
+static void
+set_cache_entry (GDBM_FILE dbf, cache_elem *elem)
+{
+ dbf->cache_entry = elem;
+ dbf->bucket = dbf->cache_entry->ca_bucket;
+}
+
+
+/* LRU list management */
+
+/* Link ELEM after REF in DBF cache. If REF is NULL, link at head */
+static void
+lru_link_elem (GDBM_FILE dbf, cache_elem *elem, cache_elem *ref)
+{
+ if (!ref)
+ {
+ elem->ca_prev = NULL;
+ elem->ca_next = dbf->cache_mru;
+ if (dbf->cache_mru)
+ dbf->cache_mru->ca_prev = elem;
+ else
+ dbf->cache_lru = elem;
+ dbf->cache_mru = elem;
+ }
+ else
+ {
+ cache_elem *x;
+
+ elem->ca_prev = ref;
+ elem->ca_next = ref->ca_next;
+ if ((x = ref->ca_next))
+ x->ca_prev = elem;
+ else
+ dbf->cache_lru = elem;
+ ref->ca_next = elem;
+ }
+}
+
+/* Unlink ELEM from the list of cache elements in DBF. */
+static void
+lru_unlink_elem (GDBM_FILE dbf, cache_elem *elem)
+{
+ cache_elem *x;
+
+ if ((x = elem->ca_prev))
+ x->ca_next = elem->ca_next;
+ else
+ dbf->cache_mru = elem->ca_next;
+ if ((x = elem->ca_next))
+ x->ca_prev = elem->ca_prev;
+ else
+ dbf->cache_lru = elem->ca_prev;
+ elem->ca_prev = elem->ca_next = NULL;
+}
+
+/* Creates and returns new cache element for DBF. The element is initialized,
+ but not linked to the LRU list.
+ Return NULL on error.
+*/
+cache_elem *
+_gdbm_cache_elem_new (GDBM_FILE dbf, off_t adr)
+{
+ cache_elem *elem;
+
+ elem = dbf->cache_avail;
+ if (elem)
+ {
+ dbf->cache_avail = elem->ca_next;
+ }
+ else
+ {
+ elem = calloc (1, sizeof (*elem));
+
+ if (!elem)
+ return NULL;
+
+ elem->ca_bucket = malloc (dbf->header->bucket_size);
+ if (!elem->ca_bucket)
+ {
+ free (elem);
+ return NULL;
+ }
+ }
+
+ elem->ca_adr = adr;
+ elem->ca_changed = FALSE;
+ elem->ca_data.hash_val = -1;
+ elem->ca_data.elem_loc = -1;
+
+ elem->ca_prev = elem->ca_next = NULL;
+ elem->ca_hits = 0;
+ elem->ca_node = NULL;
+
+ dbf->cache_num++;
+
+ return elem;
+}
+
+/* Frees element ELEM. Unlinks it from the cache tree and LRU list. */
+static void
+cache_elem_free (GDBM_FILE dbf, cache_elem *elem)
+{
+ _gdbm_rbt_remove_node (dbf->cache_tree, elem->ca_node);
+ lru_unlink_elem (dbf, elem);
+ elem->ca_next = dbf->cache_avail;
+ dbf->cache_avail = elem;
+ dbf->cache_num--;
+}
+
+static int
+cache_lookup (GDBM_FILE dbf, off_t adr, cache_elem *ref, cache_elem **ret_elem)
+{
+ int rc;
+ cache_elem *elem;
+
+ dbf->cache_access_count++;
+ rc = _gdbm_cache_tree_lookup (dbf->cache_tree, adr, &elem);
+ switch (rc)
+ {
+ case node_found:
+ elem->ca_hits++;
+ lru_unlink_elem (dbf, elem);
+ break;
+
+ case node_new:
+ if (dbf->cache_num > dbf->cache_size)
+ {
+ cache_elem *last = dbf->cache_lru;
+ if (last->ca_changed)
+ {
+ if (_gdbm_write_bucket (dbf, last))
+ {
+ cache_elem_free (dbf, elem);
+ return node_failure;
+ }
+ }
+ cache_elem_free (dbf, last);
+ }
+ break;
+
+ default:
+ return rc;
+ }
+ lru_link_elem (dbf, elem, ref);
+ *ret_elem = elem;
+ return rc;
+}
+
/* Find a bucket for DBF that is pointed to by the bucket directory from
location DIR_INDEX. The bucket cache is first checked to see if it
is already in memory. If not, a bucket may be tossed to read the new
@@ -70,8 +218,9 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index)
int rc;
off_t bucket_adr; /* The address of the correct hash bucket. */
off_t file_pos; /* The return address for lseek. */
- int index; /* Loop index. */
-
+ hash_bucket *bucket;
+ cache_elem *elem;
+
if (!gdbm_dir_entry_valid_p (dbf, dir_index))
{
/* FIXME: negative caching? */
@@ -82,127 +231,66 @@ _gdbm_get_bucket (GDBM_FILE dbf, int dir_index)
/* Initial set up. */
dbf->bucket_dir = dir_index;
bucket_adr = dbf->dir[dir_index];
-
- if (dbf->bucket_cache == NULL)
- {
- if (_gdbm_init_cache (dbf, DEFAULT_CACHESIZE) == -1)
- {
- _gdbm_fatal (dbf, _("couldn't init cache"));
- return -1;
- }
- }
- /* If that one is not already current, we must find it. */
- if (dbf->cache_entry->ca_adr != bucket_adr)
+ switch (cache_lookup (dbf, bucket_adr, NULL, &elem))
{
- size_t lru;
- hash_bucket *bucket;
+ case node_found:
+ break;
- /* Look in the cache. */
- for (index = 0; index < dbf->cache_size; index++)
- {
- if (dbf->bucket_cache[index].ca_adr == bucket_adr)
- {
- dbf->bucket = dbf->bucket_cache[index].ca_bucket;
- dbf->cache_entry = &dbf->bucket_cache[index];
- return 0;
- }
- }
-
- /* It is not in the cache, read it from the disk. */
-
+ case node_new:
/* Position the file pointer */
file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET);
if (file_pos != bucket_adr)
{
GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE);
+ cache_elem_free (dbf, elem);
_gdbm_fatal (dbf, _("lseek error"));
return -1;
}
-
- /* Flush and drop the last recently used cache entry */
- lru = (dbf->last_read + 1) % dbf->cache_size;
- if (dbf->bucket_cache[lru].ca_changed)
- {
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[lru]))
- return -1;
- }
- _gdbm_cache_entry_invalidate (dbf, lru);
-
+
/* Read the bucket. */
- rc = _gdbm_full_read (dbf, dbf->bucket_cache[lru].ca_bucket,
- dbf->header->bucket_size);
+ rc = _gdbm_full_read (dbf, elem->ca_bucket, dbf->header->bucket_size);
if (rc)
{
GDBM_DEBUG (GDBM_DEBUG_ERR,
"%s: error reading bucket: %s",
dbf->name, gdbm_db_strerror (dbf));
dbf->need_recovery = TRUE;
+ cache_elem_free (dbf, elem);
_gdbm_fatal (dbf, gdbm_db_strerror (dbf));
return -1;
}
+
/* Validate the bucket */
- bucket = dbf->bucket_cache[lru].ca_bucket;
+ bucket = elem->ca_bucket;
if (!(bucket->count >= 0
&& bucket->count <= dbf->header->bucket_elems
&& bucket->bucket_bits >= 0
&& bucket->bucket_bits <= dbf->header->dir_bits))
{
GDBM_SET_ERRNO (dbf, GDBM_BAD_BUCKET, TRUE);
+ cache_elem_free (dbf, elem);
return -1;
}
/* Validate bucket_avail table */
if (gdbm_bucket_avail_table_validate (dbf, bucket))
- return -1;
-
- /* Finally, store it in cache */
- dbf->last_read = lru;
- dbf->bucket_cache[lru].ca_adr = bucket_adr;
- dbf->bucket = dbf->bucket_cache[lru].ca_bucket;
- dbf->cache_entry = &dbf->bucket_cache[lru];
- dbf->cache_entry->ca_data.elem_loc = -1;
- dbf->cache_entry->ca_changed = FALSE;
- }
- return 0;
-}
-
-int
-_gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket,
- size_t size)
-{
- off_t file_pos;
- int i;
-
- if (dbf->cache_entry && dbf->cache_entry->ca_adr == off)
- {
- memcpy (bucket, dbf->bucket, size);
- return 0;
- }
-
- /* Look in the cache. */
- for (i = 0; i < dbf->cache_size; i++)
- {
- if (dbf->bucket_cache[i].ca_adr == off)
{
- memcpy (bucket, dbf->bucket_cache[i].ca_bucket, size);
- return 0;
+ cache_elem_free (dbf, elem);
+ return -1;
}
- }
-
- /* Read the bucket. */
- file_pos = gdbm_file_seek (dbf, off, SEEK_SET);
- if (file_pos != off)
- {
- GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE);
- return -1;
- }
- if (_gdbm_full_read (dbf, bucket, size))
- {
- GDBM_DEBUG (GDBM_DEBUG_ERR,
- "%s: error reading bucket: %s",
- dbf->name, gdbm_db_strerror (dbf));
+
+ /* Update the cache */
+ elem->ca_adr = bucket_adr;
+ elem->ca_data.elem_loc = -1;
+ elem->ca_changed = FALSE;
+
+ break;
+
+ case node_failure:
return -1;
}
+ set_cache_entry (dbf, elem);
+
return 0;
}
@@ -213,86 +301,74 @@ _gdbm_read_bucket_at (GDBM_FILE dbf, off_t off, hash_bucket *bucket,
int
_gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
{
- hash_bucket *bucket[2]; /* Pointers to the new buckets. */
-
- int new_bits; /* The number of bits for the new buckets. */
- int cache_0; /* Location in the cache for the buckets. */
- int cache_1;
- off_t adr_0; /* File address of the new bucket 0. */
- off_t adr_1; /* File address of the new bucket 1. */
- avail_elem old_bucket; /* Avail Struct for the old bucket. */
-
- off_t dir_start0; /* Used in updating the directory. */
- off_t dir_start1;
- off_t dir_end;
-
- off_t *new_dir; /* Pointer to the new directory. */
- off_t dir_adr; /* Address of the new directory. */
- int dir_size; /* Size of the new directory. */
off_t old_adr[GDBM_HASH_BITS]; /* Address of the old directories. */
int old_size[GDBM_HASH_BITS]; /* Size of the old directories. */
int old_count; /* Number of old directories. */
int index; /* Used in array indexing. */
int index1; /* Used in array indexing. */
- int elem_loc; /* Location in new bucket to put element. */
- bucket_element *old_el; /* Pointer into the old bucket. */
- int select; /* Used to index bucket during movement. */
/* No directories are yet old. */
old_count = 0;
-
- if (dbf->bucket_cache == NULL)
+ while (dbf->bucket->count == dbf->header->bucket_elems)
{
- if (_gdbm_init_cache (dbf, DEFAULT_CACHESIZE) == -1)
+ int new_bits; /* The number of bits for the new buckets. */
+ cache_elem *newcache[2]; /* Location in the cache for the buckets. */
+ off_t adr_0; /* File address of the new bucket 0. */
+ off_t adr_1; /* File address of the new bucket 1. */
+ avail_elem old_bucket; /* Avail Struct for the old bucket. */
+
+ off_t dir_start0; /* Used in updating the directory. */
+ off_t dir_start1;
+ off_t dir_end;
+
+ new_bits = dbf->bucket->bucket_bits + 1;
+ /* Allocate two new buckets */
+ adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
+ switch (cache_lookup (dbf, adr_0, dbf->cache_mru, &newcache[0]))
{
- _gdbm_fatal (dbf, _("couldn't init cache"));
+ case node_new:
+ break;
+
+ case node_found:
+ /* should not happen */
+ GDBM_DEBUG (GDBM_DEBUG_ERR,
+ "%s: bucket found where it should not",
+ dbf->name);
+ GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE);
return -1;
- }
- }
- while (dbf->bucket->count == dbf->header->bucket_elems)
- {
- /* Initialize the "new" buckets in the cache. */
- do
- {
- dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
- cache_0 = dbf->last_read;
- }
- while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
- bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
- if (dbf->bucket_cache[cache_0].ca_changed)
- {
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]))
- return -1;
+ case node_failure:
+ return -1;
}
- do
- {
- dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
- cache_1 = dbf->last_read;
- }
- while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
- bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
- if (dbf->bucket_cache[cache_1].ca_changed)
+ _gdbm_new_bucket (dbf, newcache[0]->ca_bucket, new_bits);
+
+ adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
+ switch (cache_lookup (dbf, adr_1, newcache[0], &newcache[1]))
{
- if (_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]))
- return -1;
+ case node_new:
+ break;
+
+ case node_found:
+ /* should not happen */
+ GDBM_DEBUG (GDBM_DEBUG_ERR,
+ "%s: bucket found where it should not",
+ dbf->name);
+ GDBM_SET_ERRNO (dbf, GDBM_BUCKET_CACHE_CORRUPTED, TRUE);
+ return -1;
+
+ case node_failure:
+ return -1;
}
- new_bits = dbf->bucket->bucket_bits + 1;
- _gdbm_new_bucket (dbf, bucket[0], new_bits);
- _gdbm_new_bucket (dbf, bucket[1], new_bits);
- adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
- if (adr_0 == 0)
- return -1;
- dbf->bucket_cache[cache_0].ca_adr = adr_0;
- adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
- if (adr_1 == 0)
- return -1;
- dbf->bucket_cache[cache_1].ca_adr = adr_1;
+ _gdbm_new_bucket (dbf, newcache[1]->ca_bucket, new_bits);
/* Double the directory size if necessary. */
if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
{
+ off_t *new_dir; /* Pointer to the new directory. */
+ int dir_size; /* Size of the new directory. */
+ off_t dir_adr; /* Address of the new directory. */
+
if (dbf->header->dir_size >= GDBM_MAX_DIR_HALF)
{
GDBM_SET_ERRNO (dbf, GDBM_DIR_OVERFLOW, TRUE);
@@ -335,40 +411,44 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
/* Copy all elements in dbf->bucket into the new buckets. */
for (index = 0; index < dbf->header->bucket_elems; index++)
{
- old_el = &dbf->bucket->h_table[index];
- select = (old_el->hash_value >> (GDBM_HASH_BITS - new_bits)) & 1;
- elem_loc = old_el->hash_value % dbf->header->bucket_elems;
- while (bucket[select]->h_table[elem_loc].hash_value != -1)
+ bucket_element *old_el = &dbf->bucket->h_table[index];
+ hash_bucket *bucket =
+ newcache[(old_el->hash_value >> (GDBM_HASH_BITS - new_bits)) & 1]->ca_bucket;
+ int elem_loc = old_el->hash_value % dbf->header->bucket_elems;
+ while (bucket->h_table[elem_loc].hash_value != -1)
elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
- bucket[select]->h_table[elem_loc] = *old_el;
- bucket[select]->count++;
+ bucket->h_table[elem_loc] = *old_el;
+ bucket->count++;
}
- /* Allocate avail space for the bucket[1]. */
- bucket[1]->bucket_avail[0].av_adr
+ /* Allocate avail space for the newcache[1]->ca_bucket. */
+ newcache[1]->ca_bucket->bucket_avail[0].av_adr
= _gdbm_alloc (dbf, dbf->header->block_size);
- if (bucket[1]->bucket_avail[0].av_adr == 0)
+ if (newcache[1]->ca_bucket->bucket_avail[0].av_adr == 0)
return -1;
- bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
- bucket[1]->av_count = 1;
+ newcache[1]->ca_bucket->bucket_avail[0].av_size
+ = dbf->header->block_size;
+ newcache[1]->ca_bucket->av_count = 1;
- /* Copy the avail elements in dbf->bucket to bucket[0]. */
- bucket[0]->av_count = dbf->bucket->av_count;
+ /* Copy the avail elements in dbf->bucket to newcache[0]->ca_bucket. */
+ newcache[0]->ca_bucket->av_count = dbf->bucket->av_count;
index = 0;
- index1 = 0;
- if (bucket[0]->av_count == BUCKET_AVAIL)
+ if (newcache[0]->ca_bucket->av_count == BUCKET_AVAIL)
{
- /* The avail is full, move the first one to bucket[1]. */
+ /* The avail is full, move the first one to newcache[1]->ca_bucket.*/
_gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
- bucket[1]->bucket_avail,
- &bucket[1]->av_count,
+ newcache[1]->ca_bucket->bucket_avail,
+ &newcache[1]->ca_bucket->av_count,
dbf->coalesce_blocks);
index = 1;
- bucket[0]->av_count--;
+ newcache[0]->ca_bucket->av_count--;
}
+
+ index1 = 0;
for (; index < dbf->bucket->av_count; index++)
{
- bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
+ newcache[0]->ca_bucket->bucket_avail[index1++]
+ = dbf->bucket->bucket_avail[index];
}
/* Update the directory. We have new file addresses for both buckets. */
@@ -381,10 +461,9 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
for (index = dir_start1; index < dir_end; index++)
dbf->dir[index] = adr_1;
-
/* Set changed flags. */
- dbf->bucket_cache[cache_0].ca_changed = TRUE;
- dbf->bucket_cache[cache_1].ca_changed = TRUE;
+ newcache[0]->ca_changed = TRUE;
+ newcache[1]->ca_changed = TRUE;
dbf->bucket_changed = TRUE;
dbf->directory_changed = TRUE;
dbf->second_changed = TRUE;
@@ -395,29 +474,23 @@ _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
/* Invalidate old cache entry. */
old_bucket.av_adr = dbf->cache_entry->ca_adr;
old_bucket.av_size = dbf->header->bucket_size;
- dbf->cache_entry->ca_adr = 0;
- dbf->cache_entry->ca_changed = FALSE;
+ cache_elem_free (dbf, dbf->cache_entry);
/* Set dbf->bucket to the proper bucket. */
- if (dbf->dir[dbf->bucket_dir] == adr_0)
+ if (dbf->dir[dbf->bucket_dir] != adr_0)
{
- dbf->bucket = bucket[0];
- dbf->cache_entry = &dbf->bucket_cache[cache_0];
- _gdbm_put_av_elem (old_bucket,
- bucket[1]->bucket_avail,
- &bucket[1]->av_count,
- dbf->coalesce_blocks);
- }
- else
- {
- dbf->bucket = bucket[1];
- dbf->cache_entry = &dbf->bucket_cache[cache_1];
- _gdbm_put_av_elem (old_bucket,
- bucket[0]->bucket_avail,
- &bucket[0]->av_count,
- dbf->coalesce_blocks);
+ cache_elem *t = newcache[0];
+ newcache[0] = newcache[1];
+ newcache[1] = t;
}
-
+
+ _gdbm_put_av_elem (old_bucket,
+ newcache[1]->ca_bucket->bucket_avail,
+ &newcache[1]->ca_bucket->av_count,
+ dbf->coalesce_blocks);
+ lru_unlink_elem (dbf, newcache[0]);
+ lru_link_elem (dbf, newcache[0], NULL);
+ set_cache_entry (dbf, newcache[0]);
}
/* Get rid of old directories. */
@@ -460,3 +533,159 @@ _gdbm_write_bucket (GDBM_FILE dbf, cache_elem *ca_entry)
ca_entry->ca_data.elem_loc = -1;
return 0;
}
+
+/* Cache manipulation functions. */
+
+/* Initialize the bucket cache. */
+int
+_gdbm_cache_init (GDBM_FILE dbf, size_t size)
+{
+ if (size < 3)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (size == dbf->cache_size)
+ return 0;
+
+ while (size < dbf->cache_num)
+ {
+ /* Flush the least recently used element */
+ cache_elem *elem = dbf->cache_lru;
+ if (elem->ca_changed)
+ {
+ if (_gdbm_write_bucket (dbf, elem))
+ return -1;
+ }
+ cache_elem_free (dbf, elem);
+ }
+
+ dbf->cache_size = size;
+
+ return 0;
+}
+
+/* Free the bucket cache */
+void
+_gdbm_cache_free (GDBM_FILE dbf)
+{
+ cache_elem *elem;
+
+ while (dbf->cache_lru)
+ cache_elem_free (dbf, dbf->cache_lru);
+ _gdbm_cache_tree_destroy (dbf->cache_tree);
+ while ((elem = dbf->cache_avail) != NULL)
+ {
+ dbf->cache_avail = elem->ca_next;
+ free (elem->ca_data.dptr);
+ free (elem->ca_bucket);
+ free (elem);
+ }
+}
+
+/* Flush cache content to disk. */
+int
+_gdbm_cache_flush (GDBM_FILE dbf)
+{
+ cache_elem *elem;
+ for (elem = dbf->cache_lru; elem; elem = elem->ca_prev)
+ {
+ if (elem->ca_changed)
+ {
+ if (_gdbm_write_bucket (dbf, elem))
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int
+_gdbm_fetch_data (GDBM_FILE dbf, off_t off, size_t size, void *buf)
+{
+ off_t bucket_adr;
+ off_t file_pos;
+ int rc;
+ cache_elem *elem;
+ char *dst = buf;
+
+ bucket_adr = (off / dbf->header->bucket_size)
+ * dbf->header->bucket_size;
+ off -= bucket_adr;
+ while (size)
+ {
+ size_t nbytes;
+
+ switch (cache_lookup (dbf, bucket_adr, dbf->cache_mru, &elem))
+ {
+ case node_found:
+ break;
+
+ case node_new:
+ /* Position the file pointer */
+ file_pos = gdbm_file_seek (dbf, bucket_adr, SEEK_SET);
+ if (file_pos != bucket_adr)
+ {
+ GDBM_SET_ERRNO (dbf, GDBM_FILE_SEEK_ERROR, TRUE);
+ cache_elem_free (dbf, elem);
+ _gdbm_fatal (dbf, _("lseek error"));
+ return -1;
+ }
+
+ /* Read the bucket. */
+ rc = _gdbm_full_read (dbf, elem->ca_bucket,
+ dbf->header->bucket_size);
+ if (rc)
+ {
+ GDBM_DEBUG (GDBM_DEBUG_ERR,
+ "%s: error reading data bucket: %s",
+ dbf->name, gdbm_db_strerror (dbf));
+ dbf->need_recovery = TRUE;
+ cache_elem_free (dbf, elem);
+ _gdbm_fatal (dbf, gdbm_db_strerror (dbf));
+ return -1;
+ }
+ break;
+
+ case node_failure:
+ return -1;
+ }
+
+ nbytes = dbf->header->bucket_size - off;
+ if (nbytes > size)
+ nbytes = size;
+ memcpy (dst, (char*) elem->ca_bucket + off, nbytes);
+ dst += nbytes;
+ size -= nbytes;
+ bucket_adr++;
+ off = 0;
+ }
+ return 0;
+}
+
+void
+gdbm_get_cache_stats (GDBM_FILE dbf,
+ size_t *access_count,
+ size_t *cache_count,
+ struct gdbm_cache_stat *bstat,
+ size_t nstat)
+{
+ if (access_count)
+ *access_count = dbf->cache_access_count;
+ if (cache_count)
+ *cache_count = dbf->cache_num;
+ if (bstat)
+ {
+ size_t i;
+ cache_elem *elem;
+
+ if (nstat > dbf->cache_num)
+ nstat = dbf->cache_num;
+
+ for (i = 0, elem = dbf->cache_mru; i < nstat; i++, elem = elem->ca_next)
+ {
+ bstat[i].adr = elem->ca_adr;
+ bstat[i].hits = elem->ca_hits;
+ }
+ }
+}
diff --git a/src/cachetree.c b/src/cachetree.c
new file mode 100644
index 0000000..b27d8a7
--- /dev/null
+++ b/src/cachetree.c
@@ -0,0 +1,528 @@
+/* cachetree.c - Implementation of the red-black tree for cache lookups. */
+
+/* This file is part of GDBM, the GNU data base manager.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+
+ GDBM is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GDBM is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GDBM. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "autoconf.h"
+#include "gdbmdefs.h"
+#define NDEBUG
+#include "assert.h"
+
+enum cache_node_color { RED, BLACK };
+
+typedef struct cache_node cache_node;
+struct cache_node
+{
+ cache_node *left, *right, *parent;
+ enum cache_node_color color;
+ cache_elem *elem;
+};
+
+struct cache_tree
+{
+ cache_node *root; /* Root of the tree */
+ cache_node *avail; /* List of available nodes, linked by parent field */
+ GDBM_FILE dbf; /* Database this tree belongs to */
+};
+
+/* Allocate and return a new node. Pick the head item from the avail
+ list and update the avail pointer. If the list is empty, malloc
+ a new node.
+ All members in the returned node are filled with 0.
+*/
+static cache_node *
+rbt_node_alloc (cache_tree *tree)
+{
+ cache_node *n;
+
+ n = tree->avail;
+ if (n)
+ tree->avail = n->parent;
+ else
+ {
+ n = malloc (sizeof (*n));
+ if (!n)
+ return NULL;
+ }
+ memset (n, 0, sizeof (*n));
+ return n;
+}
+
+/* Return the node N to the avail list in TREE. */
+static void
+rbt_node_dealloc (cache_tree *tree, cache_node *n)
+{
+ n->parent = tree->avail;
+ tree->avail = n;
+}
+
+/* Red-black tree properties:
+ 1. Each node is either red or black.
+ 2. The root node is black.
+ 3. All leaves are black and contain no data.
+ 4. Every red node has two children, and both are black.
+ IOW, the parent of every red node is black.
+ 5. All paths from any given node to its leaf nodes contain the same
+ number of black nodes.
+ */
+
+/* Auxiliary functions for accessing nodes. */
+
+/* Return the grandparent node of N.
+ Prerequisite: N may not be root.
+*/
+static inline cache_node *
+grandparent (cache_node *n)
+{
+ return n->parent->parent;
+}
+
+/* Return the sibling node of N.
+ Prerequisite: N may not be root.
+*/
+static inline cache_node *
+sibling (cache_node *n)
+{
+ return (n == n->parent->left) ? n->parent->right : n->parent->left;
+}
+
+/* Return the uncle node of N.
+ Prerequisite: N must be at least 2 nodes away from root.
+*/
+static inline cache_node *
+uncle (cache_node *n)
+{
+ return sibling (n->parent);
+}
+
+/* Returns the color of the node N.
+ Empty leaves are represented by NULL, therefore NULL is assumed to
+ be black (see property 3).
+*/
+static inline enum cache_node_color
+node_color (cache_node *n)
+{
+ return n == NULL ? BLACK : n->color;
+}
+
+/* Replace the OLDN with NEWN.
+ Does not modify OLDN. */
+static void
+replace_node (cache_tree *tree, cache_node *oldn, cache_node *newn)
+{
+ if (oldn->parent == NULL)
+ tree->root = newn;
+ else if (oldn == oldn->parent->left)
+ oldn->parent->left = newn;
+ else
+ oldn->parent->right = newn;
+
+ if (newn != NULL)
+ newn->parent = oldn->parent;
+}
+
+/* Rotate the TREE left over the node N. */
+static void
+rotate_left (cache_tree *tree, cache_node *n)
+{
+ cache_node *right = n->right;
+ replace_node (tree, n, right);
+ n->right = right->left;
+ if (right->left != NULL)
+ right->left->parent = n;
+ right->left = n;
+ n->parent = right;
+}
+
+/* Rotate the TREE right over the node N. */
+static void
+rotate_right (cache_tree *tree, cache_node *n)
+{
+ cache_node *left = n->left;
+ replace_node (tree, n, left);
+ n->left = left->right;
+ if (left->right != NULL)
+ left->right->parent = n;
+ left->right = n;
+ n->parent = left;
+}
+
+/* Node deletion */
+static void rbt_delete_fixup (cache_tree *tree, cache_node *n);
+
+/* Remove N from the TREE. */
+void
+_gdbm_rbt_remove_node (cache_tree *tree, cache_node *n)
+{
+ cache_node *child;
+
+ /* If N has both left and right children, reduce the problem to
+ the node with only one child. To do so, find the in-order
+ predecessor of N, copy its value (elem) to N and then delete
+ the predecessor. */
+ if (n->left != NULL && n->right != NULL)
+ {
+ cache_node *p;
+ for (p = n->left; p->right; p = p->right)
+ ;
+ n->elem = p->elem;
+ n->elem->ca_node = n;
+ n = p;
+ }
+
+ /* N has only one child. Select it. */
+ child = n->left ? n->left : n->right;
+ if (node_color (n) == BLACK)
+ {
+ n->color = node_color (child);
+ rbt_delete_fixup (tree, n);
+ }
+ replace_node (tree, n, child);
+ if (n->parent == NULL && child != NULL) /* root should be black */
+ child->color = BLACK;
+
+ /* Return N to the avail pool */
+ rbt_node_dealloc (tree, n);
+}
+
+static void
+rbt_delete_fixup (cache_tree *tree, cache_node *n)
+{
+ while (1)
+ {
+ if (n->parent == NULL)
+ {
+ /* If N has become the root node, deletion resulted in removing
+ one black node (prior root) from every path, so all properties
+ still hold.
+ */
+ return;
+ }
+ else
+ {
+ /* If N has a red sibling, change the colors of the parent and
+ sibling and rotate about the parent. Thus, the sibling becomes
+ grandparent and we can proceed to the next case.
+ */
+ if (node_color (sibling (n)) == RED)
+ {