From ef3f928cc712f6774e19f8cb8f880cbeac15e8ff Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Tue, 15 Oct 2013 18:38:49 +0300 Subject: Change default log naming. * configure.ac: Call AM_PROG_CC_C_O * src/binlog.c (BLF_TRUNCATE): New flag. (vmod_init): Change pattern initialization. New parameter "reuselog". (createfile): Remove O_TRUNC. (checkheader): New function. (newfile): Reuse existing file, if it is the first file to be opened after varnish startup and its header matches exactly our data. * src/binlogsel.c: Use indexed directory structure to speed up searches. * src/vmod-binlog.h (BINLOG_PATTERN): Change pattern. (BINLOG_GLOB_PATTERN,BINLOG_INDEX): New defines. --- configure.ac | 1 + src/binlog.c | 181 ++++++++++++++++++++++++++++++++++++---- src/binlogsel.c | 245 ++++++++++++++++++++++++++++++++++++++++++++++++++---- src/vmod-binlog.h | 18 +++- 4 files changed, 410 insertions(+), 35 deletions(-) diff --git a/configure.ac b/configure.ac index 2bb8f38..56e0411 100644 --- a/configure.ac +++ b/configure.ac @@ -28,6 +28,7 @@ AM_INIT_AUTOMAKE([gnu tar-ustar]) AC_GNU_SOURCE AC_PROG_CC AC_PROG_CC_STDC +AM_PROG_CC_C_O if test "x$ac_cv_prog_cc_c99" = xno; then AC_MSG_ERROR([could not find a C99 compatible compiler]) fi diff --git a/src/binlog.c b/src/binlog.c index 765b945..e7ac7e5 100644 --- a/src/binlog.c +++ b/src/binlog.c @@ -37,7 +37,8 @@ # define O_SEARCH 0 #endif -#define BLF_ROUNDTS 0x01 +#define BLF_ROUNDTS 0x01 +#define BLF_TRUNCATE 0x02 enum binlog_state { state_init, @@ -183,6 +184,29 @@ getinterval(char *p, char **endp) } } +static struct indexdef { + char *name; + char *pat; +} indextab[] = { + { "year", "%Y" }, + { "0", "%Y" }, + { "month", "%Y/%m" }, + { "1", "%Y/%m" }, + { "day", "%Y/%m/%d" }, + { "2", "%Y/%m/%d" }, + { NULL } +}; + +static char * +getindexpat(const char *name) +{ + struct indexdef *p; + for (p = indextab; p->name; p++) + if (strcmp(p->name, name) == 0) + return p->pat; + return NULL; +} + void vmod_init(struct sess *sp, struct vmod_priv *priv, const char *dir, const char *dataspec, const char *param) @@ -191,7 +215,8 @@ vmod_init(struct sess *sp, struct vmod_priv *priv, struct stat st; char *p, *q; unsigned long n; - + int user_pattern = 0; + p = findparam(param, "debug"); if (p) { conf->debug = atoi(p); @@ -238,8 +263,32 @@ vmod_init(struct sess *sp, struct vmod_priv *priv, if (!p) { p = strdup(BINLOG_PATTERN); AN(p); - } + } else + user_pattern = 1; conf->pattern = p; + + p = findparam(param, "index"); + if (p) { + q = getindexpat(p); + if (!q) { + binlog_error("invalid index type"); + abort(); + } + } else if (!user_pattern) { + q = getindexpat(BINLOG_INDEX); + AN(q); + } else + q = NULL; + + if (q) { + p = malloc(strlen(q) + strlen(conf->pattern) + 2); + AN(p); + strcpy(p, q); + strcat(p, "/"); + strcat(p, conf->pattern); + free(conf->pattern); + conf->pattern = p; + } p = findparam(param, "size"); if (p) { @@ -305,6 +354,15 @@ vmod_init(struct sess *sp, struct vmod_priv *priv, conf->flags &= ~BLF_ROUNDTS; free(p); } + + p = findparam(param, "reuselog"); + if (p) { + if (atoi(p)) + conf->flags &= ~BLF_TRUNCATE; + else + conf->flags |= BLF_TRUNCATE; + free(p); + } conf->fd = -1; conf->base = NULL; @@ -384,7 +442,7 @@ createfile(struct sess *sp, struct binlog_config *conf) return -1; } - fd = openat(conf->dd, fname, O_CREAT|O_RDWR|O_TRUNC, + fd = openat(conf->dd, fname, O_CREAT|O_RDWR, 0666 & ~conf->umask); if (fd == -1) { binlog_error("cannot create log file %s/%s: %s", @@ -419,21 +477,105 @@ setstoptime(struct binlog_config *conf) #define binlog_recnum(conf) \ (((conf)->size - (conf)->base->hdrsize) / (conf)->base->recsize) +static int +checkheader(struct binlog_config *conf, size_t hdrsize) +{ + struct binlog_file_header header; + int c; + ssize_t rc; + char *p; + + rc = read(conf->fd, &header, sizeof(header)); + if (rc == -1) { + binlog_error("error reading header of %s/%s: %s", + conf->dir, conf->fname, strerror(errno)); + return -1; + } else if (rc != sizeof(header)) { + binlog_error("error reading header of %s/%s: %s", + conf->dir, conf->fname, "hit eof"); + return -1; + } + + if (memcmp(header.magic, BINLOG_MAGIC_STR, BINLOG_MAGIC_LEN)) { + binlog_error("%s/%s is not a binlog file", + conf->dir, conf->fname); + return -1; + } + + if (header.version != BINLOG_VERSION) { + binlog_error("%s/%s: unknown version", conf->dir, conf->fname); + return -1; + } + + if (header.hdrsize != hdrsize) { + debug(conf,1,("%s/%s: header size mismatch", + conf->dir, conf->fname)); + return 1; + } + if (header.recsize != conf->recsize) { + debug(conf,1,("%s/%s: record size mismatch", + conf->dir, conf->fname)); + return 1; + } + + p = conf->dataspec; + while (*p) { + if (read(conf->fd, &c, 1) != 1 || c != *p) { + debug(conf,1,("%s/%s: dataspec mismatch near %s: %c", + conf->dir, conf->fname, p, c)); + return 1; + } + ++p; + } + if (read(conf->fd, &c, 1) != 1 || c != 0) { + debug(conf,1,("%s/%s: dataspec mismatch at the end: %c", + conf->dir, conf->fname, c)); + return 1; + } + return 0; +} + static int newfile(struct sess *sp, struct binlog_config *conf) { int c; void *base; - size_t n; + size_t hdrsize; + struct stat st; + int reuse = 0; setstoptime(conf); if (createfile(sp, conf)) return -1; + + hdrsize = ((sizeof(struct binlog_file_header) + + strlen(conf->dataspec) + + conf->recsize - 1) / conf->recsize) * conf->recsize; + + if (fstat(conf->fd, &st) == 0) { + /* File already exists */ + if (st.st_size > 0 && + !(conf->flags & BLF_TRUNCATE) && + checkheader(conf, hdrsize) == 0) { + reuse = 1; + } else { + binlog_error("truncating existing file %s/%s", + conf->dir, conf->fname); + ftruncate(conf->fd, 0); + } + } else { + binlog_error("can't stat %s/%s: %s", + conf->dir, conf->fname, strerror(errno)); + /* try to continue anyway */ + } + conf->flags |= BLF_TRUNCATE; + if (lseek(conf->fd, conf->size, SEEK_SET) == -1) { binlog_error("seek in log file %s/%s failed: %s", conf->dir, conf->fname, strerror(errno)); - unlinkat(conf->dd, conf->fname, 0); + if (!reuse) + unlinkat(conf->dd, conf->fname, 0); close(conf->fd); free(conf->fname); reset(conf); @@ -446,7 +588,8 @@ newfile(struct sess *sp, struct binlog_config *conf) conf->fd, 0); if (base == MAP_FAILED) { binlog_error("mmap: %s", strerror(errno)); - unlinkat(conf->dd, conf->fname, 0); + if (!reuse) + unlinkat(conf->dd, conf->fname, 0); close(conf->fd); free(conf->fname); reset(conf); @@ -454,20 +597,24 @@ newfile(struct sess *sp, struct binlog_config *conf) } conf->base = base; - memcpy(conf->base->magic, BINLOG_MAGIC_STR, BINLOG_MAGIC_LEN); - conf->base->version = BINLOG_VERSION; - conf->base->recsize = conf->recsize; - conf->base->recnum = 0; - strcpy((char*)(conf->base + 1), conf->dataspec); - - n = (sizeof(struct binlog_file_header) + strlen(conf->dataspec) + - conf->recsize - 1) / conf->recsize; - conf->base->hdrsize = n * conf->recsize; + if (reuse) { + debug(conf,1,("reusing log file %s, recnum=%lu", + conf->fname, (unsigned long)conf->base->recnum)); + } else { + debug(conf,1,("created new log file %s",conf->fname)); + memcpy(conf->base->magic, BINLOG_MAGIC_STR, BINLOG_MAGIC_LEN); + conf->base->version = BINLOG_VERSION; + conf->base->recsize = conf->recsize; + conf->base->recnum = 0; + strcpy((char*)(conf->base + 1), conf->dataspec); + + conf->base->hdrsize = hdrsize; + } + conf->recbase = (char *) conf->base + conf->base->hdrsize; conf->recnum = binlog_recnum(conf); - debug(conf,1,("created new log file %s",conf->fname)); return 0; } diff --git a/src/binlogsel.c b/src/binlogsel.c index 62299e5..8f1cfce 100644 --- a/src/binlogsel.c +++ b/src/binlogsel.c @@ -38,21 +38,26 @@ char *timefmt = "%c"; int number_option; int verbose_option; int timediff_option; +char *directory; char *pattern; +enum binlog_index_type index_type = index_year; #define FROM_TIME 0x01 #define TO_TIME 0x02 int timemask; time_t from_time, to_time; +static int matchnames(const char *dir, const char *pat, glob_t *gl); +void selglob(const char *dir, const char *pattern); + void help() { printf("usage: %s [-dhnv] [-t FORMAT] [-F FROMTIME] [-T TOTIME] [-p PATTERN] [-D DIR] [FILE...]\n", progname); } - + /* Convert strftime-like pattern into globbing pattern */ -void +char * convpattern(const char *dir) { char *p, *q; @@ -73,13 +78,14 @@ convpattern(const char *dir) for (q = pattern; *q; ) { if (*q == '%') { - *p++ = '*'; + if (p > newpat && p[-1] != '*') + *p++ = '*'; q += 2; } else *p++ = *q++; } *p = 0; - pattern = newpat; + return newpat; } #define getrec(base, recsize, n) \ @@ -332,14 +338,212 @@ selfilelist(char **argv) for (;*argv;++argv) selfile(*argv); } + +static char * +mkfilename(const char *dir, const char *file) +{ + size_t dirlen, size; + char *ret; + + dirlen = strlen(dir); + while (dirlen > 0 && dir[dirlen-1] == '/') + --dirlen; + size = dirlen + 1 + strlen(file) + 1; + ret = xmalloc(size); + memcpy(ret, dir, dirlen); + ret[dirlen++] = '/'; + strcpy(ret + dirlen, file); + return ret; +} + +int +filename_to_int(char *name) +{ + char *p = strrchr(name, '/'); + if (!p) + abort(); + return atoi(p + 1); +} + +void +selidx_day(const char *dir) +{ + int from_day, to_day; + struct tm *tm; + glob_t gl; + int glinit = 0; + char *dirbuf; + size_t dirlen; + + if (index_type == index_month) { + selglob(dir, BINLOG_GLOB_PATTERN); + return; + } + + if (timemask & FROM_TIME) + from_day = gmtime(&from_time)->tm_mday; + else { + glinit = matchnames(dir, "[0-9][0-9]", &gl); + if (glinit) + from_day = filename_to_int(gl.gl_pathv[0]); + else { + error("no matching files"); + exit(1); + } + } + + if (timemask & TO_TIME) + to_day = gmtime(&to_time)->tm_mday; + else { + if (!glinit) { + glinit = matchnames(dir, "[0-9][0-9]", &gl); + if (!glinit) { + error("no matching files"); + exit(1); + } + } + to_day = filename_to_int(gl.gl_pathv[gl.gl_pathc - 1]); + } + + dirlen = strlen(dir) + 4; + dirbuf = xmalloc(dirlen); + for (;from_day <= to_day; from_day++) { + snprintf(dirbuf, dirlen, "%s/%02d", dir, from_day); + selglob(dirbuf, BINLOG_GLOB_PATTERN); + } + free(dirbuf); + if (glinit) + globfree(&gl); +} + +void +selidx_month(const char *dir) +{ + int from_month, to_month; + struct tm *tm; + glob_t gl; + int glinit = 0; + char *dirbuf; + size_t dirlen; + + if (index_type == index_year) { + selglob(dir, BINLOG_GLOB_PATTERN); + return; + } + + if (timemask & FROM_TIME) + from_month = 1 + gmtime(&from_time)->tm_mon; + else { + glinit = matchnames(dir, "[0-9][0-9]", &gl); + if (glinit) + from_month = filename_to_int(gl.gl_pathv[0]); + else { + error("no matching files"); + exit(1); + } + } + + if (timemask & TO_TIME) + to_month = 1 + gmtime(&to_time)->tm_mon; + else { + if (!glinit) { + glinit = matchnames(dir, "[0-9][0-9]", &gl); + if (!glinit) { + error("no matching files"); + exit(1); + } + } + to_month = filename_to_int(gl.gl_pathv[gl.gl_pathc - 1]); + } + + dirlen = strlen(dir) + 4; + dirbuf = xmalloc(dirlen); + for (;from_month <= to_month; from_month++) { + snprintf(dirbuf, dirlen, "%s/%02d", dir, from_month); + selidx_day(dirbuf); + } + free(dirbuf); + if (glinit) + globfree(&gl); +} + +void +selidx_year(const char *dir) +{ + int from_year, to_year; + struct tm *tm; + glob_t gl; + int glinit = 0; + char *dirbuf; + size_t dirlen; + + if (timemask & FROM_TIME) + from_year = 1900 + gmtime(&from_time)->tm_year; + else { + glinit = matchnames(dir, "[0-9][0-9][0-9][0-9]", &gl); + if (glinit) + from_year = filename_to_int(gl.gl_pathv[0]); + else { + error("no matching files"); + exit(1); + } + } + if (timemask & TO_TIME) + to_year = 1900 + gmtime(&to_time)->tm_year; + else { + if (!glinit) { + glinit = matchnames(dir, "[0-9][0-9][0-9][0-9]", &gl); + if (!glinit) { + error("no matching files"); + exit(1); + } + } + to_year = filename_to_int(gl.gl_pathv[gl.gl_pathc - 1]); + } + + dirlen = strlen(dir) + 6; + dirbuf = xmalloc(dirlen); + for (;from_year <= to_year; from_year++) { + snprintf(dirbuf, dirlen, "%s/%04d", dir, from_year); + selidx_month(dirbuf); + } + free(dirbuf); + if (glinit) + globfree(&gl); +} + int globerrfunc (const char *epath, int eerrno) { - error("%s: %s", strerror(eerrno)); + error("%s: %s", epath, strerror(eerrno)); return 0; } +static int +matchnames(const char *dir, const char *pat, glob_t *gl) +{ + char *p = mkfilename(dir, pat); + int rc = glob(p, GLOB_ERR, globerrfunc, gl); + free(p); + switch (rc) { + case 0: + break; + case GLOB_NOSPACE: + error("out of memory"); + exit(1); + + case GLOB_ABORTED: + error("read error"); + exit(1); + + case GLOB_NOMATCH: + return 0; + } + return 1; +} + + struct logfile { char *name; time_t start; @@ -358,13 +562,14 @@ tsort(const void *a, const void *b) } void -selpattern(void) +selglob(const char *dir, const char *pattern) { size_t i, j; glob_t gl; struct logfile *logfiles; - - switch (glob(pattern, GLOB_ERR|GLOB_NOSORT, globerrfunc, &gl)) { + char *p = mkfilename(dir, pattern); + + switch (glob(p, GLOB_ERR|GLOB_NOSORT, globerrfunc, &gl)) { case 0: break; case GLOB_NOSPACE: @@ -379,7 +584,8 @@ selpattern(void) error("no files matched pattern"); exit(1); } - + free(p); + logfiles = xcalloc(gl.gl_pathc, sizeof(*logfiles)); for (i = j = 0; i < gl.gl_pathc; i++) { @@ -400,16 +606,15 @@ selpattern(void) free(logfiles); globfree(&gl); } - + int main(int argc, char **argv) { int c; struct timespec ts; - char *directory; setprogname(argv[0]); - while ((c = getopt(argc, argv, "D:dF:hp:T:t:nv")) != EOF) + while ((c = getopt(argc, argv, "D:dF:hi:p:T:t:nv")) != EOF) switch (c) { case 'D': directory = optarg; @@ -429,6 +634,13 @@ main(int argc, char **argv) case 'h': help(); return 0; + case 'i': + index_type = atoi(optarg); + if (index_type < 0 || index_type > index_last) { + error("invalid index type: %s", optarg); + exit(1); + } + break; case 'p': pattern = optarg; break; @@ -457,17 +669,16 @@ main(int argc, char **argv) argv += optind; if (argc) { - if (pattern) { + if (pattern || directory) { error("either files or pattern (-p) must be given, " "but not both"); exit(1); } selfilelist(argv); + } else if (pattern) { + selglob(directory, convpattern(pattern)); } else { - if (!pattern) - pattern = BINLOG_PATTERN; - convpattern(directory); - selpattern(); + selidx_year(directory); } exit(0); } diff --git a/src/vmod-binlog.h b/src/vmod-binlog.h index 06d517d..e39ccea 100644 --- a/src/vmod-binlog.h +++ b/src/vmod-binlog.h @@ -24,7 +24,15 @@ #endif #ifndef BINLOG_PATTERN -# define BINLOG_PATTERN "%Y/%m/%d.log" +# define BINLOG_PATTERN "%Y%m%dT%H%M%S.log" +#endif + +#ifndef BINLOG_GLOB_PATTERN +# define BINLOG_GLOB_PATTERN "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T[0-9][0-9][0-9][0-9][0-9][0-9].log" +#endif + +#ifndef BINLOG_INDEX +# define BINLOG_INDEX "year" #endif #ifndef BINLOG_INTERVAL @@ -35,6 +43,14 @@ # define BINLOG_UMASK 0077 #endif +enum binlog_index_type { + index_year, + index_month, + index_day, + +}; +#define index_last index_day + struct binlog_record { time_t ts; /* timestamp */ char data[1]; /* payload */ -- cgit v1.2.1