diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2013-10-15 18:38:49 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2013-10-15 18:56:22 +0300 |
commit | ef3f928cc712f6774e19f8cb8f880cbeac15e8ff (patch) | |
tree | 88f59a4c8d1867bfadb924ee547b4b74a97c5ce0 /src | |
parent | f035194d7d1b6cc0846ad7a5d86e0d6fa9463c67 (diff) | |
download | vmod-binlog-ef3f928cc712f6774e19f8cb8f880cbeac15e8ff.tar.gz vmod-binlog-ef3f928cc712f6774e19f8cb8f880cbeac15e8ff.tar.bz2 |
Change default log naming.
* configure.ac: Call AM_PROG_CC_C_O
* src/binlog.c (BLF_TRUNCATE): New flag.
(vmod_init): Change pattern initialization.
New parameter "reuselog".
(createfile): Remove O_TRUNC.
(checkheader): New function.
(newfile): Reuse existing file, if it is the first file
to be opened after varnish startup and its header matches
exactly our data.
* src/binlogsel.c: Use indexed directory structure to speed up
searches.
* src/vmod-binlog.h (BINLOG_PATTERN): Change pattern.
(BINLOG_GLOB_PATTERN,BINLOG_INDEX): New defines.
Diffstat (limited to 'src')
-rw-r--r-- | src/binlog.c | 159 | ||||
-rw-r--r-- | src/binlogsel.c | 235 | ||||
-rw-r--r-- | src/vmod-binlog.h | 18 |
3 files changed, 393 insertions, 19 deletions
diff --git a/src/binlog.c b/src/binlog.c index 765b945..e7ac7e5 100644 --- a/src/binlog.c +++ b/src/binlog.c @@ -35,12 +35,13 @@ #ifndef O_SEARCH # define O_SEARCH 0 #endif #define BLF_ROUNDTS 0x01 +#define BLF_TRUNCATE 0x02 enum binlog_state { state_init, state_start, state_pack }; @@ -180,20 +181,44 @@ getinterval(char *p, char **endp) return (hours*60 + minutes)*60 + seconds; } p++; } } +static struct indexdef { + char *name; + char *pat; +} indextab[] = { + { "year", "%Y" }, + { "0", "%Y" }, + { "month", "%Y/%m" }, + { "1", "%Y/%m" }, + { "day", "%Y/%m/%d" }, + { "2", "%Y/%m/%d" }, + { NULL } +}; + +static char * +getindexpat(const char *name) +{ + struct indexdef *p; + for (p = indextab; p->name; p++) + if (strcmp(p->name, name) == 0) + return p->pat; + return NULL; +} + void vmod_init(struct sess *sp, struct vmod_priv *priv, const char *dir, const char *dataspec, const char *param) { struct binlog_config *conf = priv->priv; struct stat st; char *p, *q; unsigned long n; + int user_pattern = 0; p = findparam(param, "debug"); if (p) { conf->debug = atoi(p); free(p); } @@ -235,14 +260,38 @@ vmod_init(struct sess *sp, struct vmod_priv *priv, AN(conf->dataspec); p = findparam(param, "pattern"); if (!p) { p = strdup(BINLOG_PATTERN); AN(p); + } else + user_pattern = 1; + conf->pattern = p; + + p = findparam(param, "index"); + if (p) { + q = getindexpat(p); + if (!q) { + binlog_error("invalid index type"); + abort(); } + } else if (!user_pattern) { + q = getindexpat(BINLOG_INDEX); + AN(q); + } else + q = NULL; + + if (q) { + p = malloc(strlen(q) + strlen(conf->pattern) + 2); + AN(p); + strcpy(p, q); + strcat(p, "/"); + strcat(p, conf->pattern); + free(conf->pattern); conf->pattern = p; + } p = findparam(param, "size"); if (p) { uintmax_t u; errno = 0; @@ -303,12 +352,21 @@ vmod_init(struct sess *sp, struct vmod_priv *priv, conf->flags |= BLF_ROUNDTS; else conf->flags &= ~BLF_ROUNDTS; free(p); } + p = findparam(param, "reuselog"); + if (p) { + if (atoi(p)) + conf->flags &= ~BLF_TRUNCATE; + else + conf->flags |= BLF_TRUNCATE; + free(p); + } + conf->fd = -1; conf->base = NULL; conf->stoptime = time(NULL); pthread_mutex_init(&conf->mutex, NULL); } @@ -381,13 +439,13 @@ createfile(struct sess *sp, struct binlog_config *conf) return -1; if (mkdir_p(conf, fname)) { free(fname); return -1; } - fd = openat(conf->dd, fname, O_CREAT|O_RDWR|O_TRUNC, + fd = openat(conf->dd, fname, O_CREAT|O_RDWR, 0666 & ~conf->umask); if (fd == -1) { binlog_error("cannot create log file %s/%s: %s", conf->dir, fname, strerror(errno)); free(fname); } @@ -417,25 +475,109 @@ setstoptime(struct binlog_config *conf) } #define binlog_recnum(conf) \ (((conf)->size - (conf)->base->hdrsize) / (conf)->base->recsize) static int +checkheader(struct binlog_config *conf, size_t hdrsize) +{ + struct binlog_file_header header; + int c; + ssize_t rc; + char *p; + + rc = read(conf->fd, &header, sizeof(header)); + if (rc == -1) { + binlog_error("error reading header of %s/%s: %s", + conf->dir, conf->fname, strerror(errno)); + return -1; + } else if (rc != sizeof(header)) { + binlog_error("error reading header of %s/%s: %s", + conf->dir, conf->fname, "hit eof"); + return -1; + } + + if (memcmp(header.magic, BINLOG_MAGIC_STR, BINLOG_MAGIC_LEN)) { + binlog_error("%s/%s is not a binlog file", + conf->dir, conf->fname); + return -1; + } + + if (header.version != BINLOG_VERSION) { + binlog_error("%s/%s: unknown version", conf->dir, conf->fname); + return -1; + } + + if (header.hdrsize != hdrsize) { + debug(conf,1,("%s/%s: header size mismatch", + conf->dir, conf->fname)); + return 1; + } + if (header.recsize != conf->recsize) { + debug(conf,1,("%s/%s: record size mismatch", + conf->dir, conf->fname)); + return 1; + } + + p = conf->dataspec; + while (*p) { + if (read(conf->fd, &c, 1) != 1 || c != *p) { + debug(conf,1,("%s/%s: dataspec mismatch near %s: %c", + conf->dir, conf->fname, p, c)); + return 1; + } + ++p; + } + if (read(conf->fd, &c, 1) != 1 || c != 0) { + debug(conf,1,("%s/%s: dataspec mismatch at the end: %c", + conf->dir, conf->fname, c)); + return 1; + } + return 0; +} + +static int newfile(struct sess *sp, struct binlog_config *conf) { int c; void *base; - size_t n; + size_t hdrsize; + struct stat st; + int reuse = 0; setstoptime(conf); if (createfile(sp, conf)) return -1; + + hdrsize = ((sizeof(struct binlog_file_header) + + strlen(conf->dataspec) + + conf->recsize - 1) / conf->recsize) * conf->recsize; + + if (fstat(conf->fd, &st) == 0) { + /* File already exists */ + if (st.st_size > 0 && + !(conf->flags & BLF_TRUNCATE) && + checkheader(conf, hdrsize) == 0) { + reuse = 1; + } else { + binlog_error("truncating existing file %s/%s", + conf->dir, conf->fname); + ftruncate(conf->fd, 0); + } + } else { + binlog_error("can't stat %s/%s: %s", + conf->dir, conf->fname, strerror(errno)); + /* try to continue anyway */ + } + conf->flags |= BLF_TRUNCATE; + if (lseek(conf->fd, conf->size, SEEK_SET) == -1) { binlog_error("seek in log file %s/%s failed: %s", conf->dir, conf->fname, strerror(errno)); + if (!reuse) unlinkat(conf->dd, conf->fname, 0); close(conf->fd); free(conf->fname); reset(conf); return -1; } @@ -443,34 +585,39 @@ newfile(struct sess *sp, struct binlog_config *conf) write(conf->fd, &c, 1); base = mmap((caddr_t)0, conf->size, PROT_READ|PROT_WRITE, MAP_SHARED, conf->fd, 0); if (base == MAP_FAILED) { binlog_error("mmap: %s", strerror(errno)); + if (!reuse) unlinkat(conf->dd, conf->fname, 0); close(conf->fd); free(conf->fname); reset(conf); return -1; } conf->base = base; + + if (reuse) { + debug(conf,1,("reusing log file %s, recnum=%lu", + conf->fname, (unsigned long)conf->base->recnum)); + } else { + debug(conf,1,("created new log file %s",conf->fname)); memcpy(conf->base->magic, BINLOG_MAGIC_STR, BINLOG_MAGIC_LEN); conf->base->version = BINLOG_VERSION; conf->base->recsize = conf->recsize; conf->base->recnum = 0; strcpy((char*)(conf->base + 1), conf->dataspec); - n = (sizeof(struct binlog_file_header) + strlen(conf->dataspec) + - conf->recsize - 1) / conf->recsize; - conf->base->hdrsize = n * conf->recsize; + conf->base->hdrsize = hdrsize; + } conf->recbase = (char *) conf->base + conf->base->hdrsize; conf->recnum = binlog_recnum(conf); - debug(conf,1,("created new log file %s",conf->fname)); return 0; } static void closefile(struct sess *sp, struct binlog_config *conf) { diff --git a/src/binlogsel.c b/src/binlogsel.c index 62299e5..8f1cfce 100644 --- a/src/binlogsel.c +++ b/src/binlogsel.c @@ -35,27 +35,32 @@ #include "parse-datetime.h" char *timefmt = "%c"; int number_option; int verbose_option; int timediff_option; +char *directory; char *pattern; +enum binlog_index_type index_type = index_year; #define FROM_TIME 0x01 #define TO_TIME 0x02 int timemask; time_t from_time, to_time; +static int matchnames(const char *dir, const char *pat, glob_t *gl); +void selglob(const char *dir, const char *pattern); + void help() { printf("usage: %s [-dhnv] [-t FORMAT] [-F FROMTIME] [-T TOTIME] [-p PATTERN] [-D DIR] [FILE...]\n", progname); } /* Convert strftime-like pattern into globbing pattern */ -void +char * convpattern(const char *dir) { char *p, *q; char *newpat; size_t size = strlen(pattern) + 1; @@ -70,19 +75,20 @@ convpattern(const char *dir) p += strlen(dir); *p++ = '/'; } for (q = pattern; *q; ) { if (*q == '%') { + if (p > newpat && p[-1] != '*') *p++ = '*'; q += 2; } else *p++ = *q++; } *p = 0; - pattern = newpat; + return newpat; } #define getrec(base, recsize, n) \ ((struct binlog_record*)((char *)(base) + (n) * (recsize))) int @@ -330,19 +336,217 @@ void selfilelist(char **argv) { for (;*argv;++argv) selfile(*argv); } +static char * +mkfilename(const char *dir, const char *file) +{ + size_t dirlen, size; + char *ret; + + dirlen = strlen(dir); + while (dirlen > 0 && dir[dirlen-1] == '/') + --dirlen; + size = dirlen + 1 + strlen(file) + 1; + ret = xmalloc(size); + memcpy(ret, dir, dirlen); + ret[dirlen++] = '/'; + strcpy(ret + dirlen, file); + return ret; +} + +int +filename_to_int(char *name) +{ + char *p = strrchr(name, '/'); + if (!p) + abort(); + return atoi(p + 1); +} + +void +selidx_day(const char *dir) +{ + int from_day, to_day; + struct tm *tm; + glob_t gl; + int glinit = 0; + char *dirbuf; + size_t dirlen; + + if (index_type == index_month) { + selglob(dir, BINLOG_GLOB_PATTERN); + return; + } + + if (timemask & FROM_TIME) + from_day = gmtime(&from_time)->tm_mday; + else { + glinit = matchnames(dir, "[0-9][0-9]", &gl); + if (glinit) + from_day = filename_to_int(gl.gl_pathv[0]); + else { + error("no matching files"); + exit(1); + } + } + + if (timemask & TO_TIME) + to_day = gmtime(&to_time)->tm_mday; + else { + if (!glinit) { + glinit = matchnames(dir, "[0-9][0-9]", &gl); + if (!glinit) { + error("no matching files"); + exit(1); + } + } + to_day = filename_to_int(gl.gl_pathv[gl.gl_pathc - 1]); + } + + dirlen = strlen(dir) + 4; + dirbuf = xmalloc(dirlen); + for (;from_day <= to_day; from_day++) { + snprintf(dirbuf, dirlen, "%s/%02d", dir, from_day); + selglob(dirbuf, BINLOG_GLOB_PATTERN); + } + free(dirbuf); + if (glinit) + globfree(&gl); +} + +void +selidx_month(const char *dir) +{ + int from_month, to_month; + struct tm *tm; + glob_t gl; + int glinit = 0; + char *dirbuf; + size_t dirlen; + + if (index_type == index_year) { + selglob(dir, BINLOG_GLOB_PATTERN); + return; + } + + if (timemask & FROM_TIME) + from_month = 1 + gmtime(&from_time)->tm_mon; + else { + glinit = matchnames(dir, "[0-9][0-9]", &gl); + if (glinit) + from_month = filename_to_int(gl.gl_pathv[0]); + else { + error("no matching files"); + exit(1); + } + } + + if (timemask & TO_TIME) + to_month = 1 + gmtime(&to_time)->tm_mon; + else { + if (!glinit) { + glinit = matchnames(dir, "[0-9][0-9]", &gl); + if (!glinit) { + error("no matching files"); + exit(1); + } + } + to_month = filename_to_int(gl.gl_pathv[gl.gl_pathc - 1]); + } + + dirlen = strlen(dir) + 4; + dirbuf = xmalloc(dirlen); + for (;from_month <= to_month; from_month++) { + snprintf(dirbuf, dirlen, "%s/%02d", dir, from_month); + selidx_day(dirbuf); + } + free(dirbuf); + if (glinit) + globfree(&gl); +} + +void +selidx_year(const char *dir) +{ + int from_year, to_year; + struct tm *tm; + glob_t gl; + int glinit = 0; + char *dirbuf; + size_t dirlen; + + if (timemask & FROM_TIME) + from_year = 1900 + gmtime(&from_time)->tm_year; + else { + glinit = matchnames(dir, "[0-9][0-9][0-9][0-9]", &gl); + if (glinit) + from_year = filename_to_int(gl.gl_pathv[0]); + else { + error("no matching files"); + exit(1); + } + } + + if (timemask & TO_TIME) + to_year = 1900 + gmtime(&to_time)->tm_year; + else { + if (!glinit) { + glinit = matchnames(dir, "[0-9][0-9][0-9][0-9]", &gl); + if (!glinit) { + error("no matching files"); + exit(1); + } + } + to_year = filename_to_int(gl.gl_pathv[gl.gl_pathc - 1]); + } + + dirlen = strlen(dir) + 6; + dirbuf = xmalloc(dirlen); + for (;from_year <= to_year; from_year++) { + snprintf(dirbuf, dirlen, "%s/%04d", dir, from_year); + selidx_month(dirbuf); + } + free(dirbuf); + if (glinit) + globfree(&gl); +} + int globerrfunc (const char *epath, int eerrno) { - error("%s: %s", strerror(eerrno)); + error("%s: %s", epath, strerror(eerrno)); return 0; } +static int +matchnames(const char *dir, const char *pat, glob_t *gl) +{ + char *p = mkfilename(dir, pat); + int rc = glob(p, GLOB_ERR, globerrfunc, gl); + free(p); + switch (rc) { + case 0: + break; + case GLOB_NOSPACE: + error("out of memory"); + exit(1); + + case GLOB_ABORTED: + error("read error"); + exit(1); + + case GLOB_NOMATCH: + return 0; + } + return 1; +} + + struct logfile { char *name; time_t start; }; static int @@ -355,19 +559,20 @@ tsort(const void *a, const void *b) if (la->start < lb->start) return -1; return 0; } void -selpattern(void) +selglob(const char *dir, const char *pattern) { size_t i, j; glob_t gl; struct logfile *logfiles; + char *p = mkfilename(dir, pattern); - switch (glob(pattern, GLOB_ERR|GLOB_NOSORT, globerrfunc, &gl)) { + switch (glob(p, GLOB_ERR|GLOB_NOSORT, globerrfunc, &gl)) { case 0: break; case GLOB_NOSPACE: error("out of memory"); exit(1); @@ -376,12 +581,13 @@ selpattern(void) exit(1); case GLOB_NOMATCH: error("no files matched pattern"); exit(1); } + free(p); logfiles = xcalloc(gl.gl_pathc, sizeof(*logfiles)); for (i = j = 0; i < gl.gl_pathc; i++) { time_t t; if (checktime(gl.gl_pathv[i], &t) == 0) { @@ -403,16 +609,15 @@ selpattern(void) int main(int argc, char **argv) { int c; struct timespec ts; - char *directory; setprogname(argv[0]); - while ((c = getopt(argc, argv, "D:dF:hp:T:t:nv")) != EOF) + while ((c = getopt(argc, argv, "D:dF:hi:p:T:t:nv")) != EOF) switch (c) { case 'D': directory = optarg; break; case 'd': timediff_option = 1; @@ -426,12 +631,19 @@ main(int argc, char **argv) from_time = ts.tv_sec; timemask |= FROM_TIME; break; case 'h': help(); return 0; + case 'i': + index_type = atoi(optarg); + if (index_type < 0 || index_type > index_last) { + error("invalid index type: %s", optarg); + exit(1); + } + break; case 'p': pattern = optarg; break; case 'T': if (!parse_datetime(&ts, optarg, NULL)) { error("invalid timespec: %s", optarg); @@ -454,21 +666,20 @@ main(int argc, char **argv) } argc -= optind; argv += optind; if (argc) { - if (pattern) { + if (pattern || directory) { error("either files or pattern (-p) must be given, " "but not both"); exit(1); } selfilelist(argv); + } else if (pattern) { + selglob(directory, convpattern(pattern)); } else { - if (!pattern) - pattern = BINLOG_PATTERN; - convpattern(directory); - selpattern(); + selidx_year(directory); } exit(0); } diff --git a/src/vmod-binlog.h b/src/vmod-binlog.h index 06d517d..e39ccea 100644 --- a/src/vmod-binlog.h +++ b/src/vmod-binlog.h @@ -21,23 +21,39 @@ #ifndef BINLOG_SIZE # define BINLOG_SIZE (1024*1024*1024) #endif #ifndef BINLOG_PATTERN -# define BINLOG_PATTERN "%Y/%m/%d.log" +# define BINLOG_PATTERN "%Y%m%dT%H%M%S.log" +#endif + +#ifndef BINLOG_GLOB_PATTERN +# define BINLOG_GLOB_PATTERN "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T[0-9][0-9][0-9][0-9][0-9][0-9].log" +#endif + +#ifndef BINLOG_INDEX +# define BINLOG_INDEX "year" #endif #ifndef BINLOG_INTERVAL # define BINLOG_INTERVAL 86400 #endif #ifndef BINLOG_UMASK # define BINLOG_UMASK 0077 #endif +enum binlog_index_type { + index_year, + index_month, + index_day, + +}; +#define index_last index_day + struct binlog_record { time_t ts; /* timestamp */ char data[1]; /* payload */ }; #define BINLOG_MAGIC_STR "NXCBINLOG" |