/*************************************************************************************************
 * Command line utility to manage an index
 *                                                      Copyright (C) 2003-2006 Mikio Hirabayashi
 * This file is part of Estraier, a personal full-text search system.
 * Estraier is free software; you can redistribute it and/or modify it under the terms of the GNU
 * General Public License as published by the Free Software Foundation; either version 2 of the
 * License, or any later version.
 * Estraier is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with Estraier;
 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307 USA.
 *************************************************************************************************/


#include "estcommon.h"

#define TEXTSUFS       ".txt,.asc"       /* suffix list of plain text */
#define HTMLSUFS       ".html,.htm"      /* suffix list of HTML */
#define MIMESUFS       ".eml,.mht"       /* suffix list of MIME */
#define SUFWILDCARD    "*"               /* wild card matching any file */
#define EXTINFILE      "extin.tmp"       /* name of a temporary file for input */
#define EXTOUTFILE     "extout.tmp"      /* name of a temporary file for output */
#define DLFILTERPRE    '@'               /* prefix of a dynamic linking function */
#define RCINDEXBNUM    163819            /* ibnum tuning of rich mode */
#define RCINDEXDNUM    11                /* idnum tuning of rich mode */
#define RCCACHEBNUM    262139            /* cbnum tuning of rich mode */
#define RCCACHESIZ     10485760          /* csiz tuning of rich mode */
#define PLINDEXBNUM    163819            /* ibnum tuning of plute mode */
#define PLINDEXDNUM    17                /* idnum tuning of plute mode */
#define PLCACHEBNUM    524288            /* cbnum tuning of plute mode */
#define PLCACHESIZ     20971520          /* csiz tuning of plute mode */
#define NICEINC        10                /* increment value for nice */
#define RLXSLEEPSEC    1                 /* number of sleeping seconds for relax */
#define RLXPTMAX       1024              /* max point for relax */
#define RLXPASSINC     1                 /* increment of relax for passing */
#define RLXREGINC      64                /* increment of relax for registering */
#define RLXRELINC      16                /* increment of relax for relating */
#define RLXPRGINC      8                 /* increment of relax for purging */


/* global variables */
const char *progname = NULL;             /* program name */
int sigterm = FALSE;                     /* flag for termination signal */
int force = FALSE;                       /* whether to force doing with targets */
int relax = FALSE;                       /* whether to relax the stress of system */
int wmax = -1;                           /* max number of words to be stored for summary */
CBLIST *tsuflist = NULL;                 /* list of suffixes of plain text */
CBLIST *hsuflist = NULL;                 /* list of suffixes of HTML */
CBLIST *msuflist = NULL;                 /* list of suffixes of MIME */
int mnude = FALSE;                       /* whether attributes of the inner to be prior */
CBLIST *xsuflist = NULL;                 /* list of suffixes for the external filter */
const char *xtype = NULL;                /* media type for outer filter */
const char *xcmd = NULL;                 /* command of outer filter */
CBMAP *typemap = NULL;                   /* map of media types and outer filter */
int xtext = FALSE;                       /* whether to use the outer filter of plain text */
int xmime = FALSE;                       /* whether to use the outer filter of MIME */
int izero = FALSE;                       /* whether to ignore empty documents */
CBLIST *iprelist = NULL;                 /* list of prefixes of files to be ignored */
int isiz = -1;                           /* minimum size of files to be ignored */
const char *enc = NULL;                  /* encoding of text of each document */
const char *ptcode = NULL;               /* encoding of file path as title */
const char *ftcode = NULL;               /* encoding of file name as title */
CBLIST *tattrlist = NULL;                /* list of attributes to be merged to text */
const char *prefix = NULL;               /* prefix of the URI of target documents */
int tfidf = TRUE;                        /* whether to enable TF-IDF method */
int doopt = TRUE;                        /* whether to optimzize the fat index */


/* function prototypes */
int main(int argc, char **argv);
void setsignals(void);
void sigtermhandler(int num);
void usage(void);
int runregister(int argc, char **argv);
int runrelate(int argc, char **argv);
int runpurge(int argc, char **argv);
int runoptimize(int argc, char **argv);
int runinform(int argc, char **argv);
int runmerge(int argc, char **argv);
int runpree(int argc, char **argv);
void otcb(const char *fname, ODEUM *odeum, const char *msg);
char *readstdin(int *sp);
int fwmatchlist(const char *str, const CBLIST *keys);
int bwimatchlist(const char *str, const CBLIST *keys);
char *fgetl(FILE *ifp);
void pdperror(const char *name);
void printferror(const char *format, ...);
void printfinfo(const char *format, ...);
void relaxsystem(int inc);
const char *datestr(time_t t);
char *filetouri(const char *file);
char *uritofile(const char *uri);
int proclist(const char *name, const char *lfile);
int procdir(const char *name, const char *dir);
int indexdir(ODEUM *odeum, VILLA *mtdb, const char *name, const char *dir);
int indexfile(ODEUM *odeum, VILLA *mtdb, const char *name, const char *file, const char *ruri);
int ishot(VILLA *mtdb, const char *path, time_t mtime);
int procrelate(const char *name, const char *lfile);
int procpurge(const char *name, const char *lfile);
int procoptimize(const char *name, int small);
int procinform(const char *name);
int procmerge(const char *name, const CBLIST *elems);
int procpree(const char *path, int hb, int mb, int wl);
ODDOC *docext(const char *uri, const char *path, const char *ptr, int size);
void docltitle(ODDOC *doc, const char *path);
void docsettattrs(ODDOC *doc);
void showwlist(ODDOC *doc);
void showdoc(ODDOC *doc);


/* main routine */
int main(int argc, char **argv){
  const char *tmp;
  int rv;
  estputenv("LANG", ESTLOCALE);
  estputenv("LC_ALL", ESTLOCALE);
  if((tmp = getenv(ESTDBGFDENV)) != NULL) dpdbgfd = atoi(tmp);
  cbstdiobin();
  progname = argv[0];
  sigterm = FALSE;
  setsignals();
  if(argc < 2) usage();
  odsetotcb(otcb);
  rv = 0;
  if(!strcmp(argv[1], "register")){
    rv = runregister(argc, argv);
  } else if(!strcmp(argv[1], "relate")){
    rv = runrelate(argc, argv);
  } else if(!strcmp(argv[1], "purge")){
    rv = runpurge(argc, argv);
  } else if(!strcmp(argv[1], "optimize")){
    rv = runoptimize(argc, argv);
  } else if(!strcmp(argv[1], "inform")){
    rv = runinform(argc, argv);
  } else if(!strcmp(argv[1], "merge")){
    rv = runmerge(argc, argv);
  } else if(!strcmp(argv[1], "pree")){
    rv = runpree(argc, argv);
  } else if(!strcmp(argv[1], "version") || !strcmp(argv[1], "--version")){
    printf("Estraier: a personal full-text search system version %s\n", _EST_VERSION);
    printf("Powered by QDBM version %s\n", dpversion);
    printf("Copyright (C) 2003-2006 Mikio Hirabayashi\n");
    rv = 0;
  } else {
    usage();
  }
  return rv;
}


/* set signal handlers */
void setsignals(void){
  signal(SIGHUP, sigtermhandler);
  signal(SIGINT, sigtermhandler);
  signal(SIGQUIT, sigtermhandler);
  signal(SIGPIPE, sigtermhandler);
  signal(SIGTERM, sigtermhandler);
}


/* handler of termination signal */
void sigtermhandler(int num){
  signal(num, SIG_DFL);
  sigterm = TRUE;
  printfinfo("the termination signal %d catched", num);
}


/* print the usage and exit */
void usage(void){
  fprintf(stderr, "%s: indexer of document files\n", progname);
  fprintf(stderr, "\n");
  fprintf(stderr, "usage:\n");
  fprintf(stderr, "  %s register [-list file] [-force] [-relax] [-wmax num]"
          " [-tsuf sufs] [-hsuf sufs] [-msuf sufs] [-mn] [-xsuf sufs type cmd] [-xtype type cmd]"
          " [-xt] [-xm] [-iz] [-ipre pres] [-isiz size] [-enc code] [-pt code] [-ft code]"
          " [-tattr attrs] [-rich] [-plute] name [dir]\n", progname);
  fprintf(stderr, "  %s relate [-list file] [-force] [-relax] [-ni] name [prefix]\n", progname);
  fprintf(stderr, "  %s purge [-list file] [-force] [-relax] name [prefix]\n", progname);
  fprintf(stderr, "  %s optimize [-relax] [-small] name\n", progname);
  fprintf(stderr, "  %s inform name\n", progname);
  fprintf(stderr, "  %s merge [-relax] [-rich] [-plute] name elems...\n", progname);
  fprintf(stderr, "  %s pree [-h] [-m] [-x type cmd] [-xt] [-xm] [-enc code]"
          " [-pt code] [-ft code] [-tattr attrs] [-wl] [file]\n", progname);
  fprintf(stderr, "  %s version\n", progname);
  fprintf(stderr, "\n");
  fprintf(stderr, "typical examples:\n");
  fprintf(stderr, "  %s register casket      # register documents into the index named as"
          " `casket'\n", progname);
  fprintf(stderr, "  %s relate casket        # prepare scores for relational document search\n",
          progname);
  fprintf(stderr, "  %s purge casket         # purge data of deleted documents\n", progname);
  fprintf(stderr, "  %s optimize casket      # optimize the index\n", progname);
  fprintf(stderr, "\n");
  exit(1);
}


/* parse arguments of register command */
int runregister(int argc, char **argv){
  char *tmp, *name, *dir, *lfile, *tsuf, *hsuf, *msuf, *xsuf, *ipre, *tattr, path[ESTPATHBUFSIZ];
  int i, plen, rv;
  if(!(tmp = cbiconv("Dummy", -1, "ISO-8859-1", "UTF-8", NULL, NULL))){
    fprintf(stderr, "%s: QDBM does not feature iconv\n", progname);
    return 1;
  }
  free(tmp);
  typemap = cbmapopenex(31);
  cbglobalgc(typemap, (void (*)(void *))cbmapclose);
  name = NULL;
  dir = NULL;
  lfile = NULL;
  tsuf = NULL;
  hsuf = NULL;
  msuf = NULL;
  xsuf = NULL;
  ipre = NULL;
  tattr = NULL;
  for(i = 2; i < argc; i++){
    if(!name && argv[i][0] == '-'){
      if(!strcmp(argv[i], "-list")){
        if(++i >= argc) usage();
        lfile = argv[i];
      } else if(!strcmp(argv[i], "-force")){
        force = TRUE;
      } else if(!strcmp(argv[i], "-relax")){
        nice(NICEINC);
        relax = TRUE;
      } else if(!strcmp(argv[i], "-wmax")){
        if(++i >= argc) usage();
        wmax = atoi(argv[i]);
      } else if(!strcmp(argv[i], "-tsuf")){
        if(++i >= argc) usage();
        tsuf = argv[i];
      } else if(!strcmp(argv[i], "-hsuf")){
        if(++i >= argc) usage();
        hsuf = argv[i];
      } else if(!strcmp(argv[i], "-msuf")){
        if(++i >= argc) usage();
        msuf = argv[i];
      } else if(!strcmp(argv[i], "-mn")){
        mnude = TRUE;
      } else if(!strcmp(argv[i], "-xsuf")){
        if(++i >= argc) usage();
        xsuf = argv[i];
        if(++i >= argc) usage();
        xtype = argv[i];
        if(++i >= argc) usage();
        xcmd = argv[i];
      } else if(!strcmp(argv[i], "-xtype")){
        if(++i >= argc || ++i >= argc) usage();
        cbmapput(typemap, argv[i-1], -1, argv[i], -1, FALSE);
      } else if(!strcmp(argv[i], "-xt")){
        xtext = TRUE;
      } else if(!strcmp(argv[i], "-xm")){
        xmime = TRUE;
      } else if(!strcmp(argv[i], "-iz")){
        izero = TRUE;
      } else if(!strcmp(argv[i], "-ipre")){
        if(++i >= argc) usage();
        ipre = argv[i];
      } else if(!strcmp(argv[i], "-isiz")){
        if(++i >= argc) usage();
        isiz = atoi(argv[i]);
      } else if(!strcmp(argv[i], "-enc")){
        if(++i >= argc) usage();
        enc = argv[i];
      } else if(!strcmp(argv[i], "-pt")){
        if(++i >= argc) usage();
        ptcode = argv[i];
      } else if(!strcmp(argv[i], "-ft")){
        if(++i >= argc) usage();
        ftcode = argv[i];
      } else if(!strcmp(argv[i], "-tattr")){
        if(++i >= argc) usage();
        tattr = argv[i];
      } else if(!strcmp(argv[i], "-rich")){
        odsettuning(RCINDEXBNUM, RCINDEXDNUM, RCCACHEBNUM, RCCACHESIZ);
        doopt = FALSE;
      } else if(!strcmp(argv[i], "-plute")){
        odsettuning(PLINDEXBNUM, PLINDEXDNUM, PLCACHEBNUM, PLCACHESIZ);
        doopt = FALSE;
      } else {
        usage();
      }
    } else if(!name){
      name = argv[i];
    } else if(!dir){
      dir = argv[i];
    } else {
      usage();
    }
  }
  if(!name) usage();
  if(!dir) dir = ESTCDIRSTR;
  plen = sprintf(path, "%s", dir);
  if(plen > 1 && path[plen-1] == ESTPATHCHR) path[plen-1] = '\0';
  tsuflist = cbsplit(tsuf ? tsuf : TEXTSUFS, -1, ",");
  hsuflist = cbsplit(hsuf ? hsuf : HTMLSUFS, -1, ",");
  msuflist = cbsplit(msuf ? msuf : MIMESUFS, -1, ",");
  xsuflist = xsuf ? cbsplit(xsuf, -1, ",") : cblistopen();
  iprelist = ipre ? cbsplit(ipre, -1, ",") : cblistopen();
  tattrlist = tattr ? cbsplit(tattr, -1, ",") : cblistopen();
  if(lfile){
    rv = proclist(name, lfile);
  } else {
    rv = procdir(name, path);
  }
  cblistclose(tattrlist);
  cblistclose(iprelist);
  cblistclose(xsuflist);
  cblistclose(msuflist);
  cblistclose(hsuflist);
  cblistclose(tsuflist);
  return rv;
}


/* parse arguments of relate command */
int runrelate(int argc, char **argv){
  char *name, *lfile;
  int i, rv;
  name = NULL;
  lfile = NULL;
  for(i = 2; i < argc; i++){
    if(!name && argv[i][0] == '-'){
      if(!strcmp(argv[i], "-list")){
        if(++i >= argc) usage();
        lfile = argv[i];
      } else if(!strcmp(argv[i], "-force")){
        force = TRUE;
      } else if(!strcmp(argv[i], "-relax")){
        nice(NICEINC);
        relax = TRUE;
      } else if(!strcmp(argv[i], "-ni")){
        tfidf = FALSE;
      } else {
        usage();
      }
    } else if(!name){
      name = argv[i];
    } else if(!prefix){
      prefix = argv[i];
    } else {
      usage();
    }
  }
  if(!name) usage();
  if(!prefix) prefix = "";
  rv = procrelate(name, lfile);
  return rv;
}


/* parse arguments of purge command */
int runpurge(int argc, char **argv){
  char *name, *lfile;
  int i, rv;
  name = NULL;
  lfile = NULL;
  prefix = NULL;
  for(i = 2; i < argc; i++){
    if(!name && argv[i][0] == '-'){
      if(!strcmp(argv[i], "-list")){
        if(++i >= argc) usage();
        lfile = argv[i];
      } else if(!strcmp(argv[i], "-force")){
        force = TRUE;
      } else if(!strcmp(argv[i], "-relax")){
        nice(NICEINC);
        relax = TRUE;
      } else {
        usage();
      }
    } else if(!name){
      name = argv[i];
    } else if(!prefix){
      prefix = argv[i];
    } else {
      usage();
    }
  }
  if(!name) usage();
  if(!prefix) prefix = "";
  rv = procpurge(name, lfile);
  return rv;
}


/* parse arguments of optimize command */
int runoptimize(int argc, char **argv){
  char *name;
  int i, small, rv;
  name = NULL;
  small = FALSE;
  for(i = 2; i < argc; i++){
    if(!name && argv[i][0] == '-'){
      if(!strcmp(argv[i], "-relax")){
        nice(NICEINC);
        relax = TRUE;
      } else if(!strcmp(argv[i], "-small")){
        small = TRUE;
      } else {
        usage();
      }
    } else if(!name){
      name = argv[i];
    } else {
      usage();
    }
  }
  if(!name) usage();
  rv = procoptimize(name, small);
  return rv;
}


/* parse arguments of inform command */
int runinform(int argc, char **argv){
  char *name;
  int i, rv;
  name = NULL;
  for(i = 2; i < argc; i++){
    if(!name && argv[i][0] == '-'){
      usage();
    } else if(!name){
      name = argv[i];
    } else {
      usage();
    }
  }
  if(!name) usage();
  rv = procinform(name);
  return rv;
}


/* parse arguments of merge command */
int runmerge(int argc, char **argv){
  char *name;
  CBLIST *elems;
  int i, rv;
  name = NULL;
  elems = cblistopen();
  for(i = 2; i < argc; i++){
    if(!name && argv[i][0] == '-'){
      if(!strcmp(argv[i], "-relax")){
        nice(NICEINC);
        relax = TRUE;
      } else if(!strcmp(argv[i], "-rich")){
        odsettuning(RCINDEXBNUM, RCINDEXDNUM, RCCACHEBNUM, RCCACHESIZ);
      } else if(!strcmp(argv[i], "-plute")){
        odsettuning(PLINDEXBNUM, PLINDEXDNUM, PLCACHEBNUM, PLCACHESIZ);
      } else {
        usage();
      }
    } else if(!name){
      name = argv[i];
    } else {
      cblistpush(elems, argv[i], -1);
    }
  }
  if(!name) usage();
  if(cblistnum(elems) < 1){
    cblistclose(elems);
    usage();
  }
  rv = procmerge(name, elems);
  cblistclose(elems);
  return rv;
}


/* parse arguments of pree command */
int runpree(int argc, char **argv){
  char *file, *tattr;
  int i, hb, mb, wl, rv;
  file = NULL;
  tattr = NULL;
  hb = FALSE;
  mb = FALSE;
  wl = FALSE;
  for(i = 2; i < argc; i++){
    if(!file && argv[i][0] == '-'){
      if(!strcmp(argv[i], "-h")){
        hb = TRUE;
      } else if(!strcmp(argv[i], "-m")){
        mb = TRUE;
      } else if(!strcmp(argv[i], "-x")){
        if(++i >= argc) usage();
        xtype = argv[i];
        if(++i >= argc) usage();
        xcmd = argv[i];
      } else if(!strcmp(argv[i], "-xt")){
        xtext = TRUE;
      } else if(!strcmp(argv[i], "-xm")){
        xmime = TRUE;
      } else if(!strcmp(argv[i], "-enc")){
        if(++i >= argc) usage();
        enc = argv[i];
      } else if(!strcmp(argv[i], "-pt")){
        if(++i >= argc) usage();
        ptcode = argv[i];
      } else if(!strcmp(argv[i], "-ft")){
        if(++i >= argc) usage();
        ftcode = argv[i];
      } else if(!strcmp(argv[i], "-tattr")){
        if(++i >= argc) usage();
        tattr = argv[i];
      } else if(!strcmp(argv[i], "-wl")){
        wl = TRUE;
      } else {
        usage();
      }
    } else if(!file){
      file = argv[i];
    } else {
      usage();
    }
  }
  tattrlist = tattr ? cbsplit(tattr, -1, ",") : cblistopen();
  rv = procpree(file, hb, mb, wl);
  cblistclose(tattrlist);
  return rv;
}


/* report the outturn */
void otcb(const char *fname, ODEUM *odeum, const char *msg){
  char *name;
  if(!(name = odname(odeum))) name = cbmemdup("(ERROR)", -1);
  printf("%s: INFO: %s: %s: fsiz=%.0f dnum=%d wnum=%d bnum=%d\n",
         progname, name, msg, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  fflush(stdout);
  if(relax) sleep(RLXSLEEPSEC);
  free(name);
}


/* read the standard input */
char *readstdin(int *sp){
  char *buf;
  int i, blen, c;
  blen = 256;
  buf = cbmalloc(blen);
  for(i = 0; (c = getchar()) != EOF; i++){
    if(i >= blen - 1) buf = cbrealloc(buf, blen *= 2);
    buf[i] = c;
  }
  buf[i] = '\0';
  *sp = i;
  return buf;
}


/* case sensitive forward matching with a list */
int fwmatchlist(const char *str, const CBLIST *keys){
  const char *kbuf;
  int i, ksiz;
  for(i = 0; i < cblistnum(keys); i++){
    kbuf = cblistval(keys, i, &ksiz);
    if(ksiz > 0 && cbstrfwimatch(str, kbuf)) return TRUE;
  }
  return FALSE;
}


/* case insensitive backward matching with a list */
int bwimatchlist(const char *str, const CBLIST *keys){
  const char *kbuf;
  int i, ksiz;
  for(i = 0; i < cblistnum(keys); i++){
    kbuf = cblistval(keys, i, &ksiz);
    if(!strcmp(kbuf, SUFWILDCARD)) return TRUE;
    if(ksiz > 0 && cbstrbwimatch(str, kbuf)) return TRUE;
  }
  return FALSE;
}


/* read a line */
char *fgetl(FILE *ifp){
  char *buf;
  int c, len, blen;
  buf = NULL;
  len = 0;
  blen = 256;
  while((c = fgetc(ifp)) != EOF){
    if(c == '\r') continue;
    if(blen <= len) blen *= 2;
    buf = cbrealloc(buf, blen + 1);
    if(c == '\n') c = '\0';
    buf[len++] = c;
    if(c == '\0') break;
  }
  if(!buf) return NULL;
  buf[len] = '\0';
  return buf;
}


/* print an error message */
void pdperror(const char *name){
  printf("%s: ERROR: %s: %s\n", progname, name, dperrmsg(dpecode));
  fflush(stdout);
}


/* print formatted error string and flush the buffer */
void printferror(const char *format, ...){
  va_list ap;
  va_start(ap, format);
  printf("%s: ERROR: ", progname);
  vprintf(format, ap);
  putchar('\n');
  fflush(stdout);
  va_end(ap);
}


/* print formatted information string and flush the buffer */
void printfinfo(const char *format, ...){
  va_list ap;
  va_start(ap, format);
  printf("%s: INFO: ", progname);
  vprintf(format, ap);
  putchar('\n');
  fflush(stdout);
  va_end(ap);
}


/* relax the stress of the system */
void relaxsystem(int inc){
  static int rlxpt = 0;
  if(!relax) return;
  rlxpt += inc;
  if(rlxpt >= RLXPTMAX){
    sleep(RLXSLEEPSEC);
    rlxpt = 0;
  }
}


/* get static string of the date in RFC822 format */
const char *datestr(time_t t){
  static char buf[64], *wp;
  struct tm *tp, lt, gt;
  int lag;
  if(!(tp = gmtime(&t))) return "Thu, 01 Jan 1970 00:00:00 GMT";
  gt = *tp;
  if(!(tp = localtime(&t))) return "Thu, 01 Jan 1970 00:00:00 GMT";
  lt = *tp;
  lag = (lt.tm_hour * 60 + lt.tm_min) - (gt.tm_hour * 60 + gt.tm_min);
  if(lt.tm_year > gt.tm_year){
    lag += 24 * 60;
  } else if(lt.tm_year < gt.tm_year){
    lag -= 24 * 60;
  } else if(lt.tm_mon > gt.tm_mon){
    lag += 24 * 60;
  } else if(lt.tm_mon < gt.tm_mon){
    lag -= 24 * 60;
  } else if(lt.tm_mday > gt.tm_mday){
    lag += 24 * 60;
  } else if(lt.tm_mday < gt.tm_mday){
    lag -= 24 * 60;
  }
  wp = buf;
  wp += strftime(buf, sizeof(buf) - 1, "%a, %d %b %Y %H:%M:%S", &lt);
  sprintf(wp, " %+03d%02d", lag / 60, lag % 60);
  return buf;
}


/* make the URI from file path */
char *filetouri(const char *file){
  CBLIST *list;
  char str[ESTPATHBUFSIZ], *wp, *enc;
  const char *name;
  int i, nsiz;
  sprintf(str, "%c", ESTPATHCHR);
  list = cbsplit(file, -1, str);
  wp = str;
  for(i = 0; i < cblistnum(list); i++){
    if(i > 0) *(wp++) = '/';
    name = cblistval(list, i, &nsiz);
    enc = cburlencode(name, nsiz);
    wp += sprintf(wp, "%s", enc);
    free(enc);
  }
  cblistclose(list);
  *wp = '\0';
  return cbmemdup(str, -1);
}


/* make the file path from a URI */
char *uritofile(const char *uri){
  char *path;
  int i;
  path = cburldecode(uri, NULL);
  for(i = 0; path[i] != '\0'; i++){
    if(path[i] == '/') path[i] = ESTPATHCHR;
  }
  return path;
}


/* processing with finding files in a list file */
int proclist(const char *name, const char *lfile){
  ODEUM *odeum;
  VILLA *mtdb;
  FILE *ifp;
  char *line, *pv, path[ESTPATHBUFSIZ];
  int err, fatal;
  if(!strcmp(lfile, "-")){
    ifp = stdin;
  } else {
    if(!(ifp = fopen(lfile, "rb"))){
      printferror("%s: file cannot be opened", lfile);
      return 1;
    }
  }
  printfinfo("%s: registration started", name);
  if(!(odeum = odopen(name, OD_OWRITER | OD_OCREAT))){
    pdperror(name);
    if(ifp != stdin) fclose(ifp);
    return 1;
  }
  sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTMTDBNAME);
  if(!(mtdb = vlopen(path, VL_OWRITER | VL_OCREAT | VL_OZCOMP, VL_CMPLEX))){
    pdperror(name);
    odclose(odeum);
    if(ifp != stdin) fclose(ifp);
    return 1;
  }
  vlsettuning(mtdb, ESTMTDBLRM, ESTMTDBNIM, ESTMTDBLCN, ESTMTDBNCN);
  sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTWDLSNAME);
  remove(path);
  printfinfo("%s: database opened: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
             name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  err = FALSE;
  while((line = fgetl(ifp)) != NULL){
    if(sigterm){
      printferror("aborting due to a termination signal");
      free(line);
      err = TRUE;
      break;
    }
    if((pv = strchr(line, '\t')) != NULL){
      *pv = '\0';
      pv++;
    }
    if(!indexfile(odeum, mtdb, name, line, pv)) err = TRUE;
    free(line);
  }
  fatal = odfatalerror(odeum);
  printfinfo("%s: database closing: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
             name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  if(!vlclose(mtdb)){
    pdperror(name);
    err = TRUE;
  }
  if(!odclose(odeum)){
    pdperror(name);
    err = TRUE;
  }
  if(err){
    printfinfo("%s: registration was over%s", name, fatal ? " with fatal error" : "");
  } else {
    printfinfo("%s: registration completed successfully", name);
  }
  if(ifp != stdin) fclose(ifp);
  return err ? 1 : 0;
}


/* processing with finding files in a directory */
int procdir(const char *name, const char *dir){
  ODEUM *odeum;
  VILLA *mtdb;
  char path[ESTPATHBUFSIZ];
  int err, fatal;
  printfinfo("%s: registration started", name);
  if(!(odeum = odopen(name, OD_OWRITER | OD_OCREAT))){
    pdperror(name);
    return 1;
  }
  sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTMTDBNAME);
  if(!(mtdb = vlopen(path, VL_OWRITER | VL_OCREAT | VL_OZCOMP, VL_CMPLEX))){
    pdperror(name);
    odclose(odeum);
    return 1;
  }
  vlsettuning(mtdb, ESTMTDBLRM, ESTMTDBNIM, ESTMTDBLCN, ESTMTDBNCN);
  sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTWDLSNAME);
  remove(path);
  printfinfo("%s: database opened: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
             name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  err = FALSE;
  if(!indexdir(odeum, mtdb, name, dir)) err = TRUE;
  fatal = odfatalerror(odeum);
  printfinfo("%s: database closing: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
             name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  if(!vlclose(mtdb)){
    pdperror(name);
    err = TRUE;
  }
  if(!odclose(odeum)){
    pdperror(name);
    err = TRUE;
  }
  if(err){
    printfinfo("%s: registration was over%s", name, fatal ? " with fatal error" : "");
  } else {
    printfinfo("%s: registration completed successfully", name);
  }
  return err ? 1 : 0;
}


/* find and index files in a directory */
int indexdir(ODEUM *odeum, VILLA *mtdb, const char *name, const char *dir){
  CBLIST *files;
  const char *file;
  char path[ESTPATHBUFSIZ];
  int i, isroot, isdir, err;
  if(!(files = cbdirlist(dir))){
    printfinfo("%s: directory cannot be opened", dir);
    return TRUE;
  }
  isroot = dir[0] == ESTPATHCHR && dir[1] == '\0';
  err = FALSE;
  for(i = 0; i < cblistnum(files); i++){
    if(sigterm){
      printferror("aborting due to a termination signal");
      cblistclose(files);
      return FALSE;
    }
    file = cblistval(files, i, NULL);
    if(!strcmp(file, ESTCDIRSTR) || !strcmp(file, ESTPDIRSTR)) continue;
    if(isroot){
      sprintf(path, "%s%s", dir, file);
    } else {
      sprintf(path, "%s%c%s", dir, ESTPATHCHR, file);
    }
    if(!cbfilestat(path, &isdir, NULL, NULL)){
      printfinfo("%s: file does not exist", file);
      continue;
    }
    if(isdir){
      if(!indexdir(odeum, mtdb, name, path)) err = TRUE;
    } else {
      if(!indexfile(odeum, mtdb, name, path, NULL)) err = TRUE;
    }
  }
  cblistclose(files);
  return err ? FALSE : TRUE;
}


/* index a file into the database */
int indexfile(ODEUM *odeum, VILLA *mtdb, const char *name, const char *file, const char *ruri){
  static int cnt = 0;
  char *buf, *uri;
  const char *cmd, *date, *title;
  int size, wnum, bnum, err;
  time_t mtime;
  ODDOC *doc;
  if(!cbfilestat(file, NULL, &size, &mtime)){
    printfinfo("%s: file does not exist", file);
    return TRUE;
  }
  date = datestr(mtime);
  doc = NULL;
  if(cbmaprnum(typemap) > 0){
    xtype = ruri;
    xcmd = "";
    ruri = NULL;
  }
  if(fwmatchlist(file, iprelist) || (isiz > 0 && size > isiz)){
    return TRUE;
  } else if(xtype && xtype[0] != '\0' && (cmd = cbmapget(typemap, xtype, -1, NULL))){
    xcmd = cmd;
    uri = filetouri(file);
    if(!force && !ishot(mtdb, file, mtime)){
      printfinfo("%s: passed", uri);
      free(uri);
      relaxsystem(RLXPASSINC);
      return TRUE;
    }
    if(!(buf = cbreadfile(file, NULL))){
      printfinfo("%s: file cannot be opened", uri);
      free(uri);
      return TRUE;
    }
    doc = docext(uri, file, buf, size);
    if(!oddocgetattr(doc, "date")) oddocaddattr(doc, "date", date);
    free(uri);
    free(buf);
    ruri = NULL;
  } else if(bwimatchlist(file, tsuflist)){
    uri = filetouri(file);
    if(!force && !ishot(mtdb, file, mtime)){
      printfinfo("%s: passed", uri);
      free(uri);
      relaxsystem(RLXPASSINC);
      return TRUE;
    }
    if(!(buf = cbreadfile(file, &size))){
      printfinfo("%s: file cannot be opened", uri);
      free(uri);
      return TRUE;
    }
    doc = estdocplain(uri, buf, size, enc);
    if(!oddocgetattr(doc, "date")) oddocaddattr(doc, "date", date);
    free(uri);
    free(buf);
  } else if(bwimatchlist(file, hsuflist)){
    uri = filetouri(file);
    if(!force && !ishot(mtdb, file, mtime)){
      printfinfo("%s: passed", uri);
      free(uri);
      relaxsystem(RLXPASSINC);
      return TRUE;
    }
    if(!(buf = cbreadfile(file, NULL))){
      printfinfo("%s: file cannot be opened", uri);
      free(uri);
      return TRUE;
    }
    doc = estdochtml(uri, buf, size, enc);
    if(!oddocgetattr(doc, "date")) oddocaddattr(doc, "date", date);
    free(uri);
    free(buf);
  } else if(bwimatchlist(file, msuflist)){
    uri = filetouri(file);
    if(!force && !ishot(mtdb, file, mtime)){
      printfinfo("%s: passed", uri);
      free(uri);
      relaxsystem(RLXPASSINC);
      return TRUE;
    }
    if(!(buf = cbreadfile(file, NULL))){
      printfinfo("%s: file cannot be opened", file);
      free(uri);
      return TRUE;
    }
    doc = estdocmime(uri, buf, enc, mnude);
    if(!oddocgetattr(doc, "date")) oddocaddattr(doc, "date", date);
    free(uri);
    free(buf);
  } else if(xcmd && xtype && bwimatchlist(file, xsuflist)){
    uri = filetouri(file);
    if(!force && !ishot(mtdb, file, mtime)){
      printfinfo("%s: passed", uri);
      free(uri);
      relaxsystem(RLXPASSINC);
      return TRUE;
    }
    if(!(buf = cbreadfile(file, NULL))){
      printfinfo("%s: file cannot be opened", uri);
      free(uri);
      return TRUE;
    }
    doc = docext(uri, file, buf, size);
    if(!oddocgetattr(doc, "date")) oddocaddattr(doc, "date", date);
    free(uri);
    free(buf);
  }
  err = FALSE;
  if(doc){
    if(izero && cblistnum(oddocnwords(doc)) < 1){
      printfinfo("%s: passed", oddocuri(doc));
      oddocclose(doc);
      return TRUE;
    }
    if(ruri) oddocaddattr(doc, "realuri", ruri);
    docltitle(doc, file);
    if(!mnude && (!(title = oddocgetattr(doc, "title")) || strlen(title) < 1)){
      if((title = strrchr(file, ESTPATHCHR)) != NULL){
        title++;
      } else {
        title = file;
      }
      if(!strcmp(cbencname(title, -1), "US-ASCII")) oddocaddattr(doc, "title", title);
    }
    docsettattrs(doc);
    if(odput(odeum, doc, wmax, TRUE) &&
       vlput(mtdb, file, -1, (char *)&mtime, sizeof(time_t), VL_DOVER)){
      printfinfo("%s: registered: id=%d wnum=%d",
                 oddocuri(doc), oddocid(doc), cblistnum(oddocnwords(doc)));
      cnt++;
      relaxsystem(RLXREGINC);
    } else {
      pdperror(file);
      err = TRUE;
    }
    oddocclose(doc);
  }
  wnum = odwnum(odeum);
  bnum = odbnum(odeum);
  if(doopt && wnum != -1 && bnum != -1 && (double)wnum / (double)bnum > ESTMAXLOAD){
    printfinfo("%s: optimizing started: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
               name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
    if(odoptimize(odeum)){
      printfinfo("%s: optimizing completed: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
                 name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
    } else {
      pdperror(file);
      err = TRUE;
    }
  }
  if(cnt >= 256){
    printfinfo("%s: database status: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
               name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
    cnt = 0;
  }
  return err ? FALSE : TRUE;
}


/* check whetehr a document of the path is new or modified. */
int ishot(VILLA *mtdb, const char *path, time_t mtime){
  char *vbuf;
  int hot, vsiz;
  hot = TRUE;
  if((vbuf = vlget(mtdb, path, -1, &vsiz)) != NULL){
    if(vsiz == sizeof(int) && mtime <= *(time_t *)vbuf) hot = FALSE;
    free(vbuf);
  }
  return hot;
}


/* register scores of documents */
int procrelate(const char *name, const char *lfile){
  ODEUM *odeum;
  CURIA *scdb, *indexdb;
  DEPOT *dtdb;
  ODDOC *doc;
  VILLA *rdocs;
  CBMAP *scores;
  FILE *ifp, *ofp;
  const char *dstr;
  char path[ESTPATHBUFSIZ], *line, *uri, *vbuf, *mbuf, *iword;
  int i, bnum, omode, err, cnt, fatal, vsiz, id, msiz, date;
  ifp = NULL;
  if(lfile){
    if(!strcmp(lfile, "-")){
      ifp = stdin;
    } else if(!(ifp = fopen(lfile, "rb"))){
      printferror("%s: file cannot be opened", lfile);
      return 1;
    }
  }
  printfinfo("%s: relating started", name);
  if(!(odeum = odopen(name, OD_OWRITER))){
    pdperror(name);
    if(ifp && ifp != stdin) fclose(ifp);
    return 1;
  }
  rdocs = odidbrdocs(odeum);
  sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTSCDBNAME);
  bnum = (oddnum(odeum) * ESTSCDBBRAT) / ESTSCDBDIVNUM;
  omode = CR_OWRITER | CR_OCREAT;
  if(prefix[0] == '\0' && !lfile && force) omode |= DP_OTRUNC;
  if(!(scdb = cropen(path, omode, bnum, ESTSCDBDIVNUM))){
    pdperror(name);
    odclose(odeum);
    if(ifp && ifp != stdin) fclose(ifp);
    return 1;
  }
  sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTDTDBNAME);
  bnum = oddnum(odeum) * ESTDTDBBRAT;
  omode = DP_OWRITER | DP_OCREAT;
  if(prefix[0] == '\0' && !lfile && force) omode |= DP_OTRUNC;
  if(!(dtdb = dpopen(path, omode, bnum))){
    pdperror(name);
    crclose(scdb);
    odclose(odeum);
    if(ifp && ifp != stdin) fclose(ifp);
    return 1;
  }
  printfinfo("%s: database opened: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
             name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  err = FALSE;
  if(!estiterinit(odeum, prefix)){
    pdperror(name);
    err = TRUE;
  } else {
    cnt = 0;
    for(i = 0; TRUE; i++){
      if(sigterm){
        printferror("aborting due to a termination signal");
        err = TRUE;
        break;
      }
      if(ifp){
        if(!(line = fgetl(ifp))) break;
        uri = filetouri(line);
        free(line);
      } else {
        if(!(uri = estiternext(odeum, prefix))){
          if(dpecode != DP_ENOITEM){
            pdperror(name);
            err = TRUE;
          }
          break;
        }
      }
      if(!force && (vbuf = vlget(rdocs, uri, -1, &vsiz)) != NULL){
        if(vsiz == sizeof(int) && crvsiz(scdb, vbuf, vsiz) != -1){
          printfinfo("%s: passed", uri);
          free(vbuf);
          free(uri);
          relaxsystem(RLXPASSINC);
          continue;
        }
        free(vbuf);
      }
      if(!(doc = odget(odeum, uri))){
        if(dpecode != DP_ENOITEM){
          pdperror(name);
          err = TRUE;
        } else {
          printfinfo("%s: no such document", uri);
        }
        free(uri);
        continue;
      }
      id = oddocid(doc);
      scores = oddocscores(doc, ESTKEYNUM, tfidf ? odeum : NULL);
      mbuf = cbmapdump(scores, &msiz);
      dstr = oddocgetattr(doc, "date");
      date = dstr ? eststrmktime(dstr) : -1;
      if(!crput(scdb, (char *)&id, sizeof(int), mbuf, msiz, CR_DOVER)){
        pdperror(name);
        err = TRUE;
      } else if(!dpput(dtdb, (char *)&id, sizeof(int), (char *)&date, sizeof(int), DP_DOVER)){
        pdperror(name);
        err = TRUE;
      } else {
        printfinfo("%s: related: id=%d", uri, id);
        relaxsystem(RLXRELINC);
      }
      free(mbuf);
      cbmapclose(scores);
      oddocclose(doc);
      free(uri);
      cnt++;
      if(cnt >= 256){
        printfinfo("%s: progress: %d/%d", name, i + 1, oddnum(odeum));
        cnt = 0;
      }
      if(err) break;
    }
  }
  if(estisregex){
    printfinfo("%s: updating the word list", name);
    indexdb = odidbindex(odeum);
    sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTWDLSNAME);
    if((ofp = fopen(path, "wb")) != NULL){
      criterinit(indexdb);
      while((iword = criternext(indexdb, NULL)) != NULL){
        fprintf(ofp, "%s\n", iword);
        free(iword);
      }
      if(fclose(ofp) == EOF){
        printferror("%s: updating failed");
        err = TRUE;
      }
    } else {
      printferror("%s: cannot open");
      err = TRUE;
    }
  }
  fatal = odfatalerror(odeum);
  printfinfo("%s: database closing: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
             name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  if(!dpclose(dtdb)){
    pdperror(name);
    err = TRUE;
  }
  if(!crclose(scdb)){
    pdperror(name);
    err = TRUE;
  }
  if(!odclose(odeum)){
    pdperror(name);
    err = TRUE;
  }
  if(err){
    printfinfo("%s: relating was over%s", name, fatal ? " with fatal error" : "");
  } else {
    printfinfo("%s: relating completed successfully", name);
  }
  if(ifp && ifp != stdin) fclose(ifp);
  return err ? 1 : 0;
}


/* purge documents which did not exist. */
int procpurge(const char *name, const char *lfile){
  ODEUM *odeum;
  VILLA *rdocs;
  VILLA *mtdb;
  CURIA *scdb;
  DEPOT *dtdb;
  FILE *ifp;
  char path[ESTPATHBUFSIZ], *uri, *file, *vbuf;
  int err, vsiz, hit, fatal;
  ifp = NULL;
  if(lfile){
    if(!strcmp(lfile, "-")){
      ifp = stdin;
    } else if(!(ifp = fopen(lfile, "rb"))){
      printferror("%s: file cannot be opened", lfile);
      return 1;
    }
  }
  printfinfo("%s: purging started", name);
  if(!(odeum = odopen(name, OD_OWRITER))){
    pdperror(name);
    if(ifp && ifp != stdin) fclose(ifp);
    return 1;
  }
  rdocs = odidbrdocs(odeum);
  sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTMTDBNAME);
  if(!(mtdb = vlopen(path, VL_OWRITER | VL_OZCOMP, VL_CMPLEX))){
    pdperror(name);
    odclose(odeum);
    if(ifp && ifp != stdin) fclose(ifp);
    return 1;
  }
  sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTSCDBNAME);
  scdb = cropen(path, CR_OWRITER, -1, -1);
  sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTDTDBNAME);
  dtdb = dpopen(path, DP_OWRITER, -1);
  printfinfo("%s: database opened: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
             name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  err = FALSE;
  if(!estiterinit(odeum, prefix)){
    pdperror(name);
    err = TRUE;
  } else {
    while(TRUE){
      if(sigterm){
        printferror("aborting due to a termination signal");
        err = TRUE;
        break;
      }
      if(ifp){
        if(!(file = fgetl(ifp))) break;
        uri = filetouri(file);
      } else {
        if(!(uri = estiternext(odeum, prefix))){
          if(dpecode != DP_ENOITEM){
            pdperror(name);
            err = TRUE;
          }
          break;
        }
        file = uritofile(uri);
      }
      if(!force && cbfilestat(file, NULL, NULL, NULL)){
        printfinfo("%s: passed", uri);
        relaxsystem(RLXPASSINC);
      } else {
        if((scdb || dtdb) && (vbuf = vlget(rdocs, uri, -1, &vsiz)) != NULL){
          if(scdb) crout(scdb, vbuf, vsiz);
          if(dtdb) dpout(dtdb, vbuf, vsiz);
          free(vbuf);
        }
        hit = FALSE;
        if(odout(odeum, uri)){
          hit = TRUE;
        } else if(dpecode != DP_ENOITEM){
          pdperror(file);
          err = TRUE;
        }
        if(!estiterresurge(odeum, uri) && dpecode != DP_ENOITEM){
          pdperror(file);
          err = TRUE;
        }
        if(!vlout(mtdb, uri, -1) && dpecode != DP_ENOITEM){
          pdperror(file);
          err = TRUE;
        }
        if(hit && !err){
          printfinfo("%s: purged", uri);
          relaxsystem(RLXPRGINC);
        } else {
          printfinfo("%s: no such document", uri);
        }
      }
      free(file);
      free(uri);
    }
  }
  fatal = odfatalerror(odeum);
  printfinfo("%s: database closing: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
             name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  if(dtdb && !dpclose(dtdb)){
    pdperror(name);
    err = TRUE;
  }
  if(scdb && !crclose(scdb)){
    pdperror(name);
    err = TRUE;
  }
  if(!vlclose(mtdb)){
    pdperror(name);
    err = TRUE;
  }
  if(!odclose(odeum)){
    pdperror(name);
    err = TRUE;
  }
  if(err){
    printfinfo("%s: purging was over%s", name, fatal ? " with fatal error" : "");
  } else {
    printfinfo("%s: purging completed successfully", name);
  }
  if(ifp && ifp != stdin) fclose(ifp);
  return err ? 1 : 0;
}


/* optimize a database */
int procoptimize(const char *name, int small){
  ODEUM *odeum;
  CURIA *docsdb, *indexdb, *scdb;
  VILLA *rdocsdb, *mtdb;
  DEPOT *dtdb;
  char path[ESTPATHBUFSIZ];
  if(!(odeum = odopen(name, OD_OWRITER))){
    pdperror(name);
    return 1;
  }
  printfinfo("%s: optimizing started: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
             name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  if(!odoptimize(odeum)){
    pdperror(name);
    odclose(odeum);
    return 1;
  }
  if(small){
    docsdb = odidbdocs(odeum);
    indexdb = odidbindex(odeum);
    rdocsdb = odidbrdocs(odeum);
    printfinfo("%s: shrinking the document database: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
               name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
    crsetalign(docsdb, 0);
    if(!croptimize(docsdb, oddnum(odeum) * 2 + 1)){
      pdperror(name);
      odclose(odeum);
      return 1;
    }
    printfinfo("%s: shrinking the inverted index: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
               name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
    crsetalign(indexdb, 0);
    if(!croptimize(indexdb, odwnum(odeum) * 2 + 1)){
      pdperror(name);
      odclose(odeum);
      return 1;
    }
    printfinfo("%s: shrinking the reverse dictionary: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
               name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
    if(!vloptimize(rdocsdb)){
      pdperror(name);
      odclose(odeum);
      return 1;
    }
    sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTMTDBNAME);
    if((mtdb = vlopen(path, VL_OWRITER | VL_OZCOMP, VL_CMPLEX)) != NULL){
      printfinfo("%s: shrinking the modified time database: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
                 name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
      if(!vloptimize(mtdb)){
        vlclose(mtdb);
        pdperror(name);
        odclose(odeum);
        return 1;
      }
      if(!vlclose(mtdb)){
        pdperror(name);
        odclose(odeum);
        return 1;
      }
    }
    sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTSCDBNAME);
    if((scdb = cropen(path, CR_OWRITER, -1, -1)) != NULL){
      printfinfo("%s: shrinking the score database: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
                 name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
      if(!croptimize(scdb, oddnum(odeum) * 2 + 1)){
        crclose(scdb);
        pdperror(name);
        odclose(odeum);
        return 1;
      }
      if(!crclose(scdb)){
        pdperror(name);
        odclose(odeum);
        return 1;
      }
    }
    sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTDTDBNAME);
    if((dtdb = dpopen(path, DP_OWRITER, -1)) != NULL){
      printfinfo("%s: shrinking the date database: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
                 name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
      if(!dpoptimize(dtdb, oddnum(odeum) * 2 + 1)){
        dpclose(dtdb);
        pdperror(name);
        odclose(odeum);
        return 1;
      }
      if(!dpclose(dtdb)){
        pdperror(name);
        odclose(odeum);
        return 1;
      }
    }
  }
  printfinfo("%s: optimizing completed: fsiz=%.0f dnum=%d wnum=%d bnum=%d",
             name, odfsiz(odeum), oddnum(odeum), odwnum(odeum), odbnum(odeum));
  if(!odclose(odeum)){
    pdperror(name);
    return 1;
  }
  return 0;
}


/* print information of a database */
int procinform(const char *name){
  ODEUM *odeum;
  char *tmp;
  if(!(odeum = odopen(name, OD_OREADER))){
    pdperror(name);
    return 1;
  }
  tmp = odname(odeum);
  printf("name: %s\n", tmp ? tmp : "(null)");
  free(tmp);
  printf("file size: %.0f\n", odfsiz(odeum));
  printf("index buckets: %d\n", odbnum(odeum));
  printf("used buckets: %d\n", odbusenum(odeum));
  printf("all documents: %d\n", oddnum(odeum));
  printf("all words: %d\n", odwnum(odeum));
  printf("inode number: %d\n", odinode(odeum));
  printf("modified time: %.0f\n", (double)odmtime(odeum));
  if(!odclose(odeum)){
    pdperror(name);
    return 1;
  }
  return 0;
}


/* merge some databases */
int procmerge(const char *name, const CBLIST *elems){
  VILLA *mtdb, *emtdb;
  char path[ESTPATHBUFSIZ], *kbuf, *vbuf;
  int i, err, ksiz, vsiz;
  printfinfo("%s: merging started", name);
  if(!odmerge(name, elems)){
    pdperror(name);
    return 1;
  }
  sprintf(path, "%s%c%s", name, ESTPATHCHR, ESTMTDBNAME);
  if(!(mtdb = vlopen(path, VL_OWRITER | VL_OCREAT | VL_OTRUNC | VL_OZCOMP, VL_CMPLEX))){
    pdperror(name);
    return 1;
  }
  err = FALSE;
  for(i = 0; i < cblistnum(elems); i++){
    sprintf(path, "%s%c%s", cblistval(elems, i, NULL), ESTPATHCHR, ESTMTDBNAME);
    if(!(emtdb = vlopen(path, VL_OREADER, VL_CMPLEX))){
      pdperror(name);
      err = TRUE;
      break;
    }
    vlcurfirst(emtdb);
    while(TRUE){
      kbuf = vlcurkey(emtdb, &ksiz);
      vbuf = vlcurval(emtdb, &vsiz);
      if(kbuf && vbuf) vlput(mtdb, kbuf, ksiz, vbuf, vsiz, VL_DKEEP);
      free(vbuf);
      free(kbuf);
      if(!vlcurnext(emtdb)) break;
    }
    if(!vlclose(emtdb)){
      pdperror(name);
      err = TRUE;
      break;
    }
  }
  if(!vlclose(mtdb)){
    pdperror(name);
    err = TRUE;
  }
  if(err){
    printfinfo("%s: merging failed", name);
  } else {
    printfinfo("%s: merging completed successfully", name);
  }
  return err ? 1 : 0;
}


/* print attributes and words extracted from a text */
int procpree(const char *path, int hb, int mb, int wl){
  ODDOC *doc;
  char *text, *uri;
  int size;
  if(path){
    if(!(text = cbreadfile(path, &size))){
      fprintf(stderr, "%s: %s: cannot open\n", progname, path);
      return 1;
    }
  } else {
    path = "STDIN";
    text = readstdin(&size);
  }
  uri = filetouri(path);
  if(hb){
    doc = estdochtml(uri, text, size, enc);
  } else if(mb){
    doc = estdocmime(uri, text, enc, mnude);
  } else if(xtype && xcmd){
    doc = docext(uri, path, text, size);
  } else {
    doc = estdocplain(uri, text, size, enc);
  }
  docltitle(doc, path);
  docsettattrs(doc);
  free(uri);
  if(wl){
    showwlist(doc);
  } else {
    showdoc(doc);
  }
  oddocclose(doc);
  free(text);
  return 0;
}


/* make a document handle using an extension command. */
ODDOC *docext(const char *uri, const char *path, const char *ptr, int size){
  ODDOC *doc;
  FILE *ofp;
  ESTFILTER filter;
  char cmd[ESTPATHBUFSIZ], *text, numbuf[ESTNUMBUFSIZ];
  int i, tsiz;
  if((ofp = fopen(EXTINFILE, "wb")) != NULL){
    for(i = 0; i < size; i++){
      fputc(ptr[i], ofp);
    }
    fclose(ofp);
  }
  estputenv("ESTORIG", path);
  if(xcmd[0] == DLFILTERPRE){
    if((filter = estfilterget(xcmd + 1)) != NULL){
      filter(EXTINFILE, EXTOUTFILE);
    } else {
      fprintf(stderr, "%s: no valid function in %s\n", progname, xcmd + 1);
    }
  } else {
    sprintf(cmd, "%s %s %s", xcmd, EXTINFILE, EXTOUTFILE);
    system(cmd);
  }
  if((text = cbreadfile(EXTOUTFILE, &tsiz)) != NULL){
    if(xtext){
      doc = estdocplain(uri, text, tsiz, enc);
    } else if(xmime){
      doc = estdocmime(uri, text, enc, mnude);
    } else {
      doc = estdochtml(uri, text, tsiz, enc);
    }
    free(text);
  } else {
    doc = oddocopen(path);
  }
  oddocaddattr(doc, "type", xtype);
  sprintf(numbuf, "%d", size);
  oddocaddattr(doc, "size", numbuf);
  remove(EXTOUTFILE);
  remove(EXTINFILE);
  return doc;
}


/* set the local path of a file as its title */
void docltitle(ODDOC *doc, const char *path){
  char *sel, *title;
  if(ptcode || ftcode){
    if(cbstrfwmatch(path, ESTCDIRSTR) && path[sizeof(ESTCDIRSTR)-1] == ESTPATHCHR)
      path += sizeof(ESTCDIRSTR);
    if((sel = cbiconv(path, -1, ptcode ? ptcode : ftcode, "UTF-8", NULL, NULL)) != NULL){
      if(ftcode && (title = strrchr(sel, ESTPATHCHR)) != NULL){
        title++;
      } else {
        title = sel;
      }
      oddocaddattr(doc, "title", title);
      estdocaddtext(doc, title, strlen(title), "UTF-8", ESTDOCNONLY);
      free(sel);
    }
  }
}


/* marge attributes to the tail of the text */
void docsettattrs(ODDOC *doc){
  int i;
  const char *val;
  for(i = 0; i < cblistnum(tattrlist); i++){
    if(!(val = oddocgetattr(doc, cblistval(tattrlist, i, NULL))) || val[0] == '\0') continue;
    estdocaddtext(doc, val, strlen(val), "UTF-8", ESTDOCNONLY);
  }
}


/* show list of normalized words in a document */
void showwlist(ODDOC *doc){
  const CBLIST *nwords;
  const char *normal;
  int i, first;
  nwords = oddocnwords(doc);
  first = TRUE;
  for(i = 0; i < cblistnum(nwords); i++){
    normal = cblistval(nwords, i, NULL);
    if(normal[0] == '\0') continue;
    if(!first) putchar(' ');
    printf("%s", normal);
    first = FALSE;
  }
  putchar('\n');
}


/* show attributes and words of a document */
void showdoc(ODDOC *doc){
  const char *tmp;
  const CBLIST *nwords, *awords;
  int i;
  printf("URI: %s\n", oddocuri(doc));
  if((tmp = oddocgetattr(doc, "title")) != NULL) printf("Title: %s\n", tmp);
  if((tmp = oddocgetattr(doc, "author")) != NULL) printf("Author: %s\n", tmp);
  if((tmp = oddocgetattr(doc, "date")) != NULL) printf("Date: %s\n", tmp);
  if((tmp = oddocgetattr(doc, "type")) != NULL) printf("Type: %s\n", tmp);
  if((tmp = oddocgetattr(doc, "encoding")) != NULL) printf("Encoding: %s\n", tmp);
  if((tmp = oddocgetattr(doc, "size")) != NULL) printf("Size: %s\n", tmp);
  printf("\n");
  nwords = oddocnwords(doc);
  awords = oddocawords(doc);
  for(i = 0; i < cblistnum(nwords); i++){
    printf("%s\t%s\n", cblistval(nwords, i, NULL), cblistval(awords, i, NULL));
  }
}



/* END OF FILE */
