From 87279b57516dfd891ef03858dac1d644df758b4c Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Sat, 14 Mar 2020 12:02:33 +0200 Subject: Initial commit --- .gitignore | 39 ++++++ .gitmodules | 7 ++ Makefile.am | 22 ++++ bootstrap | 8 ++ configure.ac | 18 +++ gdbm/master | 1 + gdbm/newcache | 1 + src/.gitignore | 3 + src/Makefile.am | 2 + src/benchmark.mk.in | 50 ++++++++ src/dropcache.c | 24 ++++ src/fetchkeys.c | 320 +++++++++++++++++++++++++++++++++++++++++++++++ src/gnuplot.m4 | 13 ++ src/master/Makefile.am | 5 + src/newcache/Makefile.am | 5 + src/runtest | 97 ++++++++++++++ 16 files changed, 615 insertions(+) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 Makefile.am create mode 100644 bootstrap create mode 100644 configure.ac create mode 160000 gdbm/master create mode 160000 gdbm/newcache create mode 100644 src/.gitignore create mode 100644 src/Makefile.am create mode 100644 src/benchmark.mk.in create mode 100644 src/dropcache.c create mode 100644 src/fetchkeys.c create mode 100644 src/gnuplot.m4 create mode 100644 src/master/Makefile.am create mode 100644 src/newcache/Makefile.am create mode 100755 src/runtest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b2a687e --- /dev/null +++ b/.gitignore @@ -0,0 +1,39 @@ +*.a +*.la +*.lo +*.o +*.orig +*.pyc +*.rej +*.tar.bz2 +*.tar.gz +*.tar.xz +*~ +*.gdbm +*.cflow +.deps +.emacs* +.libs +ABOUT-NLS +ChangeLog +INSTALL +Makefile +Makefile.in +TAGS +aclocal.m4 +autoconf.h +autoconf.h.in +autom4te.cache +build-aux +config.log +config.status +configure +configure.ac +core +gmon.out +libltdl +libtool +m4 +stamp-h1 +tmp/ +benchmarks diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a4fd957 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,7 @@ +[submodule "gdbm/newcache"] + path = gdbm/newcache + url = git://git.gnu.org.ua/gdbm.git + branch = newcache +[submodule "gdbm/master"] + path = gdbm/master + url = git://git.gnu.org.ua/gdbm.git diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..a7c97e1 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,22 @@ +ACLOCAL_AMFLAGS = -I m4 +SUBDIRS = gdbm/master gdbm/newcache src +BENCHMARKDIR=benchmarks +benchmark: + @if test -z "$(NRECS)"; then \ + echo "Please set NRECS"; \ + exit 1; \ + fi; \ + if test -z "$(NKEYS)"; then \ + echo "Please set NKEYS"; \ + exit 1; \ + fi; \ + DIRNAME="$(BENCHMARKDIR)/$(NRECS)-$(NKEYS)"; \ + if ! test -d "$$DIRNAME"; then \ + mkdir -p $$DIRNAME; \ + (echo "NRECS=$(NRECS)"; \ + echo "NKEYS=$(NKEYS)"; \ + echo "MAXCACHE=$(MAXCACHE)"; \ + echo "include @abs_top_builddir@/src/benchmark.mk") > $$DIRNAME/Makefile; \ + fi; \ + $(MAKE) -C $$DIRNAME + diff --git a/bootstrap b/bootstrap new file mode 100644 index 0000000..1726aea --- /dev/null +++ b/bootstrap @@ -0,0 +1,8 @@ +#!/bin/sh +set e +test -d m4 || mkdir m4 +test -d gdbm || mkdir gdbm +git submodule init +git submodule update +git submodule foreach ./bootstrap +autoreconf -f -i -s diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..5abd4ff --- /dev/null +++ b/configure.ac @@ -0,0 +1,18 @@ +AC_PREREQ([2.69]) +AC_INIT([gdbm-benchmarks],[0.1],[gray@gnu.org], + [gdbm-benchmarks], + [https://puszcza.gnu.org.ua/projects/gdbm]) +AC_CONFIG_SRCDIR([src/fetchkeys.c]) +AC_CONFIG_AUX_DIR([build-aux]) +AM_INIT_AUTOMAKE([-Wall -Werror 1.11.5 foreign tar-ustar silent-rules]) + +# Checks for programs. +AC_PROG_CC + +AC_CONFIG_SUBDIRS(gdbm/master gdbm/newcache) +AC_CONFIG_FILES([Makefile + src/Makefile + src/master/Makefile + src/newcache/Makefile + src/benchmark.mk]) +AC_OUTPUT diff --git a/gdbm/master b/gdbm/master new file mode 160000 index 0000000..4fb2326 --- /dev/null +++ b/gdbm/master @@ -0,0 +1 @@ +Subproject commit 4fb2326a4ac0e6f45c21f7651b1c87317567fd82 diff --git a/gdbm/newcache b/gdbm/newcache new file mode 160000 index 0000000..1f94a81 --- /dev/null +++ b/gdbm/newcache @@ -0,0 +1 @@ +Subproject commit 1f94a81a9f3a9955ca74dff07b7909595a850485 diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..ef23521 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +dropcache +fetchkeys +benchmark.mk diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..c019df3 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,2 @@ +SUBDIRS=newcache master +EXTRA_DIST=fechkeys.c runtest gnuplot.m4 benchmark.mk.in diff --git a/src/benchmark.mk.in b/src/benchmark.mk.in new file mode 100644 index 0000000..c576cd6 --- /dev/null +++ b/src/benchmark.mk.in @@ -0,0 +1,50 @@ +AM_DEFAULT_VERBOSITY=0 +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = + +abs_top_srcdir = @abs_top_srcdir@ +abs_top_builddir = @abs_top_builddir@ + +MASTERDIR=$(abs_top_builddir)/gdbm/master +GDBMTOOL=$(MASTERDIR)/src/gdbmtool +TESTDIR=$(MASTERDIR)/tests + +all: test +test: master.log newcache.log +%.log: keys.txt a.gdbm + $(AM_V_GEN)MAXCACHE=$(MAXCACHE); \ + $(abs_top_srcdir)/src/runtest -d \ + --final $${MAXCACHE:-$$(($$($(GDBMTOOL) a.gdbm dir |\ + sed -n -e '2{' \ + -e 's/.*Buckets = //' \ + -e 's/\.$$//' \ + -e 'p}') * 2))} \ + --log-file=$*.log \ + $(abs_top_builddir)/src/$*/fetchkeys +clean: + rm -f master.log newcache.log +allclean: clean + rm -f keys.txt a.gdbm +keys.txt: + @$(MAKE) -C $(TESTDIR) num2word + $(AM_V_GEN)$(TESTDIR)/num2word 1:$(NRECS) | \ + cut -f1 | shuf | head -n $(NKEYS) > keys.txt +a.gdbm: + @$(MAKE) -C $(TESTDIR) num2word gtload + $(AM_V_GEN)$(TESTDIR)/num2word 1:$(NRECS) | $(TESTDIR)/gtload -clear a.gdbm +benchmark.gnuplot: $(abs_top_srcdir)/src/gnuplot.m4 + m4 -DNRECS=$(NRECS) -DNKEYS=$(NKEYS) \ + $(abs_top_srcdir)/src/gnuplot.m4 > benchmark.gnuplot +plot: test benchmark.gnuplot + gnuplot -p benchmark.gnuplot + diff --git a/src/dropcache.c b/src/dropcache.c new file mode 100644 index 0000000..659f00a --- /dev/null +++ b/src/dropcache.c @@ -0,0 +1,24 @@ +#include +#include +#include + +char const drop_caches[] = "/proc/sys/vm/drop_caches"; + +int +main(int argc, char **argv) +{ + int fd; + fd = open(drop_caches, O_WRONLY); + if (fd == -1) + { + perror(drop_caches); + return 1; + } + if (write(fd, "3\n", 2) != 2) + { + perror(drop_caches); + return 1; + } + close(fd); + return 0; +} diff --git a/src/fetchkeys.c b/src/fetchkeys.c new file mode 100644 index 0000000..32f0199 --- /dev/null +++ b/src/fetchkeys.c @@ -0,0 +1,320 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned long +get_size (char const *str) +{ + char *p; + unsigned long n, m; + + errno = 0; + n = strtoul (str, &p, 10); + if (errno) + { + perror (str); + exit (1); + } + switch (*p) + { + case 0: + m = 1; + break; + + case 'k': + case 'K': + p++; + m = 1024; + break; + + case 'm': + case 'M': + p++; + m = 1024 * 1024; + break; + } + + if (*p) + { + fprintf (stderr, "%s: bad number (near %s)\n", str, p); + exit (1); + } + + if (ULONG_MAX / m < n) + { + fprintf (stderr, "%s: number too big\n", str); + exit (1); + } + return m * n; +} + +#ifdef DUMPSTATS +static void +dump_stats (GDBM_FILE dbf, size_t n) +{ + static size_t cache_size; + static struct gdbm_cache_stat *stat; + size_t access_count, cache_count; + FILE *fp; + char fname[80]; + size_t i; + + if (cache_size == 0) + { + if (gdbm_setopt (dbf, GDBM_GETCACHESIZE, &cache_size, sizeof(cache_size))) + { + fprintf (stderr, "GDBM_GETCACHESIZE: %s\n", + gdbm_strerror (gdbm_errno)); + exit (1); + } + stat = calloc (cache_size, sizeof (stat[0])); + assert (stat != NULL); + } + + gdbm_get_cache_stats (dbf, &access_count, &cache_count, stat, cache_size); + + snprintf (fname, sizeof (fname), "%zu.stat", n); + + fp = fopen (fname, "w"); + assert (fp != NULL); + + fprintf (fp, "%zu\n", access_count); + fprintf (fp, "%zu\n", cache_count); + for (i = 0; i < cache_count; i++) + { + fprintf (fp, "%zu %zu\n", stat[i].adr, stat[i].hits); + } + fclose (fp); + + fprintf (stderr, "%80.80s\rDump file %s\n", "", fname); +} + +static volatile int statsig; + +void +sighan (int sig) +{ + statsig = 1; + signal (sig, sighan); +} +#endif + +char *dbname = "a.gdbm"; +char *kfname = "keys.txt"; +int percent; +int verbose = 0; +int time_verbose = 1; + +char *keybuf; +char *keyptr; +char *keyend; +size_t nkeys; + +char * +nextkey (void) +{ + char *ret = keyptr; + + if (keyptr >= keyend) + return NULL; + keyptr += strlen (keyptr) + 1; + return ret; +} + +void +readkeys (char const *name) +{ + FILE *fp; + size_t n; + struct stat st; + char *p; + + if (stat (name, &st)) + { + fprintf (stderr, "%s: can't stat: %s\n", name, strerror (errno)); + exit (1); + } + + fp = fopen (name, "r"); + if (!fp) + { + fprintf (stderr, "%s: can't read: %s\n", name, strerror (errno)); + exit (1); + } + + keybuf = malloc (st.st_size + 1); + assert (keybuf != NULL); + keyend = keybuf + st.st_size; + *keyend = 0; + + if (fread (keybuf, st.st_size, 1, fp) != 1) + { + fprintf (stderr, "%s: read error: %s\n", name, strerror (errno)); + exit (1); + } + fclose (fp); + keyptr = keybuf; + + nkeys = 0; + for (p = keybuf; p < keyend; p++) + if (*p == '\n') + { + *p = 0; + nkeys++; + } +} + +int +main (int argc, char **argv) +{ + GDBM_FILE dbf; + datum key; + datum data; + int status = 0; + int i; + int print = 0; + unsigned long maxsize = 0; + int flags = 0; + struct timeval t_start, t_open, t_now, td; + char *keystr; + + while ((i = getopt (argc, argv, "c:t:nTpv")) != EOF) + { + switch (i) + { + case 'n': + flags |= GDBM_NOMMAP; + break; + + case 'p': + print = 1; + break; + + case 'c': + maxsize = get_size (optarg); + break; + + case 'T': + time_verbose = 0; + break; + + case 'v': + verbose = 1; + break; + + default: + exit (1); + } + } + + argc -= optind; + argv += optind; + + switch (argc) + { + case 2: + kfname = argv[1]; + case 1: + dbname = argv[0]; + break; + case 0: + break; + default: + fprintf (stderr, "too many arguments\n"); + exit (1); + } + + if (verbose) + printf ("PID %u\n", getpid ()); + +#ifdef DUMPSTATS + signal (SIGUSR1, sighan); +#endif + + gettimeofday (&t_start, NULL); + dbf = gdbm_open (dbname, 0, GDBM_READER | flags, 00664, NULL); + assert (dbf != NULL); + gettimeofday (&t_open, NULL); + + if (maxsize) + { + int rc = gdbm_setopt (dbf, GDBM_SETCACHESIZE, &maxsize, sizeof (maxsize)); + if (rc) + { + fprintf (stderr, "GDBM_SETCACHESIZE: %s\n", gdbm_strerror (gdbm_errno)); + return 1; + } + } + + readkeys (kfname); + i = 0; + while ((keystr = nextkey ()) != NULL) + { + i++; + + if (verbose) + { + unsigned long long k = (unsigned long long) i * 100 / nkeys; + gettimeofday (&t_now, NULL); + timersub (&t_now, &t_start, &td); + printf ("%8u %6zu / %6zu % 2d%%\r", td.tv_sec, i, nkeys, k); + fflush (stdout); + } + +#ifdef DUMPSTATS + if (statsig) + { + dump_stats (dbf, i); + statsig = 0; + } +#endif + + key.dptr = keystr; + key.dsize = strlen (key.dptr); + data = gdbm_fetch (dbf, key); + if (data.dptr == NULL) + { + status = 2; + if (gdbm_errno == GDBM_ITEM_NOT_FOUND) + { + fprintf (stderr, "%s: not found\n", keystr); + } + else + { + fprintf (stderr, "%s: error: %s\n", keystr, + gdbm_strerror (gdbm_errno)); + } + continue; + } + if (print) + printf ("%s: %*.*s\n", keystr, data.dsize, data.dsize, data.dptr); + free (data.dptr); + } + gdbm_close (dbf); + if (verbose) + putchar ('\n'); + if (time_verbose) + { + gettimeofday (&t_now, NULL); + timersub (&t_now, &t_start, &td); + printf ("%lu.%06lu\n", td.tv_sec, td.tv_usec); + timersub (&t_open, &t_start, &td); + printf ("%lu.%06lu\n", td.tv_sec, td.tv_usec); + timersub (&t_now, &t_open, &td); + printf ("%lu.%06lu\n", td.tv_sec, td.tv_usec); + } + return status; +} + + + + + diff --git a/src/gnuplot.m4 b/src/gnuplot.m4 new file mode 100644 index 0000000..7ea1a79 --- /dev/null +++ b/src/gnuplot.m4 @@ -0,0 +1,13 @@ +divert(-1) +changequote([.]) +divert(0)dnl +set title 'Execution time as function of cache size. Database size NRECS entries. Key set size NKEYS' +show title +set xlabel 'Cache entries' +set ylabel 'Runtime (seconds)' +plot 'master.log' \ + using 1:4 smooth bezier \ + title 'GDBM 1.18.1', \ + 'newcache.log' \ + using 1:4 smooth bezier \ + title 'GDBM newcache' diff --git a/src/master/Makefile.am b/src/master/Makefile.am new file mode 100644 index 0000000..c3adae5 --- /dev/null +++ b/src/master/Makefile.am @@ -0,0 +1,5 @@ +noinst_PROGRAMS=fetchkeys +fetchkeys_SOURCES=fetchkeys.c +VPATH=$(top_srcdir)/src +LDADD=../../gdbm/master/src/.libs/libgdbm.a +AM_CPPFLAGS=-I$(top_srcdir)/gdbm/master/src/ diff --git a/src/newcache/Makefile.am b/src/newcache/Makefile.am new file mode 100644 index 0000000..a916d36 --- /dev/null +++ b/src/newcache/Makefile.am @@ -0,0 +1,5 @@ +noinst_PROGRAMS=fetchkeys +fetchkeys_SOURCES=fetchkeys.c +VPATH=$(top_srcdir)/src +LDADD=../../gdbm/newcache/src/.libs/libgdbm.a +AM_CPPFLAGS=-I$(top_srcdir)/gdbm/newcache/src/ diff --git a/src/runtest b/src/runtest new file mode 100755 index 0000000..33322b1 --- /dev/null +++ b/src/runtest @@ -0,0 +1,97 @@ +#!/bin/sh +#! -*-perl-*- +eval 'exec perl -x -S $0 ${1+"$@"}' + if 0; + +use strict; +use warnings; +use Getopt::Long qw(:config gnu_getopt no_ignore_case require_order); +use File::Basename; +use File::Spec; + +my $nsamples = 1; # Number of times to run each test. +my $c_init = 100; # Initial cache capacity. +my $c_step = 100; # Cache capacity increment. +my $c_final = 100; # Final cache capacity. +my $drop_caches; # Drop system disk caches before each run. +my $log_file; # Name of the log file. + +sub runtest { + my ($t_total, $t_open, $t_loop) = (0,0,0); + + my $n; + for ($n = 0; $n < $nsamples; $n++) { + if ($drop_caches) { + system($drop_caches); + } + open(PH, '-|', @_) + or die "can't run ".join(' ', @_).": $!\n"; + my @inbuf; + while () { + if (chomp) { + if (@inbuf == 3) { + print shift(@inbuf),"\n"; + } + push @inbuf, $_; + } else { + print "$_"; + } + } + close PH; + + if (@inbuf == 3) { + $t_total += $inbuf[0]; + $t_open += $inbuf[1]; + $t_loop += $inbuf[2]; + } else { + while (my $s = shift(@inbuf)) { + print "$s\n"; + } + die "no timing info\n"; + } + } + return ($t_total/$nsamples, $t_open/$nsamples, $t_loop/$nsamples) +} + +GetOptions( + 'n=n' => \$nsamples, + 'init|i=n' => \$c_init, + 'step|s=n' => \$c_step, + 'final|end|e=n' => \$c_final, + 'drop-caches|d' => sub { + $drop_caches = File::Spec->catfile(dirname($0), 'dropcache'); + unless (-x $drop_caches) { + print STDERR < \$log_file +) or exit(1); + +die "command line missing\n" unless @ARGV; + +if ($c_init > $c_final) { + $c_init = 10; +} +if ($c_step > $c_final - $c_init) { + $c_step = 1; +} + +if ($log_file) { + open(LOG, '>', $log_file) or die "can't open log file $log_file: $!"; +} else { + open(LOG, '>&', 'STDOUT') or die "can't dup STDOUT: $!"; +} + +for (my $c = $c_init; $c <= $c_final; $c += $c_step) { + my ($t_total, $t_open, $t_loop) = runtest(@ARGV, '-c', $c); + printf LOG "%d %.6f %.6f %.6f\n", $c, $t_total, $t_open, $t_loop; +} + + -- cgit v1.2.1