/*************************************************************************************************
 * The test cases of the q-gram database API
 *                                                      Copyright (C) 2007-2008 Mikio Hirabayashi
 * This file is part of Tokyo Dystopia.
 * Tokyo Dystopia is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License or any later version.  Tokyo Dystopia is distributed in the hope
 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 * License for more details.
 * You should have received a copy of the GNU Lesser General Public License along with Tokyo
 * Dystopia; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 * Boston, MA 02111-1307 USA.
 *************************************************************************************************/


#include <tcqdb.h>
#include "myconf.h"

#define TEXTBUFSIZ     65536             // buffer for texts
#define DEFAVGLEN      16                // default average text length


/* global variables */
const char *g_progname;                  // program name
int g_dbgfd;                             // debugging output


/* function prototypes */
int main(int argc, char **argv);
static void usage(void);
static void iprintf(const char *format, ...);
static int myrand(int range);
static bool mysynccb(int total, int current, const char *msg, void *opq);
static void setrndtext(char *buf, int avg, int min, bool en);
static void setidtext(char *buf, uint64_t id, bool en);
static int runwrite(int argc, char **argv);
static int runread(int argc, char **argv);
static int runwicked(int argc, char **argv);
static int procwrite(const char *path, int rnum, int opts, int etnum, int64_t icsiz, int omode,
                     int alen, bool en, int topts);
static int procread(const char *path, int rnum, int omode, int alen, int mlen, bool en,
                    int smode, int topts);
static int procwicked(const char *path, int rnum, int opts, int etnum, int64_t icsiz, int omode,
                      int alen, bool en, int topts);


/* main routine */
int main(int argc, char **argv){
  g_progname = argv[0];
  g_dbgfd = -1;
  const char *ebuf = getenv("TCDBGFD");
  if(ebuf) g_dbgfd = atoi(ebuf);
  srand((unsigned int)(tctime() * 1000) % UINT_MAX);
  if(argc < 2) usage();
  int rv = 0;
  if(!strcmp(argv[1], "write")){
    rv = runwrite(argc, argv);
  } else if(!strcmp(argv[1], "read")){
    rv = runread(argc, argv);
  } else if(!strcmp(argv[1], "wicked")){
    rv = runwicked(argc, argv);
  } else {
    usage();
  }
  return rv;
}


/* print the usage and exit */
static void usage(void){
  fprintf(stderr, "%s: test cases of the q-gram database API of Tokyo Dystopia\n", g_progname);
  fprintf(stderr, "\n");
  fprintf(stderr, "usage:\n");
  fprintf(stderr, "  %s write [-tl] [-td|-tb] [-et num] [-ic num] [-nl|-nb]"
          " [-la num] [-en] [-rc] [-ra] [-rs] path rnum\n", g_progname);
  fprintf(stderr, "  %s read [-nl|-nb] [-la num] [-lm num] [-en] [-rc] [-ra] [-rs] [-sp|-ss|-sf]"
          " path rnum\n", g_progname);
  fprintf(stderr, "  %s wicked [-tl] [-td|-tb] [-et num] [-ic num] [-nl|-nb]"
          " [-la num] [-en] [-rc] [-ra] [-rs] path rnum\n", g_progname);
  fprintf(stderr, "\n");
  exit(1);
}


/* print formatted information string and flush the buffer */
static void iprintf(const char *format, ...){
  va_list ap;
  va_start(ap, format);
  vprintf(format, ap);
  fflush(stdout);
  va_end(ap);
}


/* print error message of hash database */
static void eprint(TCQDB *qdb, const char *func){
  const char *path = tcqdbpath(qdb);
  int ecode = tcqdbecode(qdb);
  fprintf(stderr, "%s: %s: %s: error: %d: %s\n",
          g_progname, path ? path : "-", func, ecode, tcqdberrmsg(ecode));
}


/* get a random number */
static int myrand(int range){
  return (int)((double)range * rand() / (RAND_MAX + 1.0));
}


/* callback function for sync progression */
static bool mysynccb(int total, int current, const char *msg, void *opq){
  if(total < 10 || current % (total / 10) == 0) printf("[sync:%d:%d:%s]\n", total, current, msg);
  return true;
}


/* fill a record buffer */
static void setrndtext(char *buf, int avg, int min, bool en){
  int len = myrand(avg * 2);
  if(len < 3 || len > myrand(avg * 2) + 1) len = myrand(avg + 1);
  if(len < 2) len = myrand(avg + 1);
  len = tclmax(min, tclmin(len, TEXTBUFSIZ - 1));
  if(en){
    char *wp = buf;
    if(len > 0){
      if(myrand(4) == 0){
        *(wp++) = 'A' + myrand('Z' - 'A' + 1);
      } else {
        *(wp++) = 'a' + myrand('z' - 'a' + 1);
      }
      len--;
    }
    for(int i = 0; i < len; i++){
      if(myrand(avg + 1) == 0){
        if(myrand(2) == 0){
          *(wp++) = 'A' + myrand('Z' - 'A' + 1);
        } else {
          *(wp++) = '0' + myrand('9' - '0' + 1);
        }
      } else {
      *(wp++) = 'a' + myrand('z' - 'a' + 1);
      }
    }
    *wp = '\0';
  } else {
    uint16_t ary[TEXTBUFSIZ];
    len = tclmin(len, TEXTBUFSIZ / 4 - 1);
    for(int i = 0; i < len; i++){
      ary[i] = myrand(UINT16_MAX) / (myrand(UINT8_MAX) + 1);
    }
    tcstrucstoutf(ary, len, buf);
  }
}


/* fill a record buffer corresponding to an ID number */
static void setidtext(char *buf, uint64_t id, bool en){
  unsigned int tid = (unsigned int)id;
  id += rand_r(&tid);
  int len = (((rand_r(&tid) ^ rand_r(&tid)) >> 4) & 0x1f) + 1;
  if(en){
    char *tbl = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+ ";
    char *wp = buf;
    for(int i = 0; i < len; i++){
      *(wp++) = tbl[id&0x3f];
      id >>= 4;
      if(id < 1) id = rand_r(&tid);
    }
    *wp = '\0';
  } else {
    uint16_t ary[TEXTBUFSIZ];
    len = tclmin(len, TEXTBUFSIZ / 4 - 1);
    for(int i = 0; i < len; i++){
      ary[i] = (rand_r(&tid) >> 4) & UINT16_MAX;
    }
    tcstrucstoutf(ary, len, buf);
  }
}


/* parse arguments of write command */
static int runwrite(int argc, char **argv){
  char *path = NULL;
  char *rstr = NULL;
  int opts = 0;
  int etnum = 0;
  int64_t icsiz = 0;
  int omode = 0;
  int alen = DEFAVGLEN;
  bool en = false;
  int topts = TCTNLOWER | TCTNNOACC | TCTNSPACE;
  for(int i = 2; i < argc; i++){
    if(!path && argv[i][0] == '-'){
      if(!strcmp(argv[i], "-tl")){
        opts |= QDBTLARGE;
      } else if(!strcmp(argv[i], "-td")){
        opts |= QDBTDEFLATE;
      } else if(!strcmp(argv[i], "-tb")){
        opts |= QDBTTCBS;
      } else if(!strcmp(argv[i], "-et")){
        if(++i >= argc) usage();
        etnum = atoi(argv[i]);
      } else if(!strcmp(argv[i], "-ic")){
        if(++i >= argc) usage();
        char *suffix;
        icsiz = strtoll(argv[i], &suffix, 10);
        if(*suffix == 'k' || *suffix == 'K'){
          icsiz *= 1024LL;
        } else if(*suffix == 'm' || *suffix == 'M'){
          icsiz *= 1024LL * 1024LL;
        } else if(*suffix == 'g' || *suffix == 'G'){
          icsiz *= 1024LL * 1024LL * 1024LL;
        }
      } else if(!strcmp(argv[i], "-nl")){
        omode |= QDBONOLCK;
      } else if(!strcmp(argv[i], "-nb")){
        omode |= QDBOLCKNB;
      } else if(!strcmp(argv[i], "-la")){
        if(++i >= argc) usage();
        alen = atoi(argv[i]);
      } else if(!strcmp(argv[i], "-en")){
        en = true;
      } else if(!strcmp(argv[i], "-rc")){
        topts &= ~TCTNLOWER;
      } else if(!strcmp(argv[i], "-ra")){
        topts &= ~TCTNNOACC;
      } else if(!strcmp(argv[i], "-rs")){
        topts &= ~TCTNSPACE;
      } else {
        usage();
      }
    } else if(!path){
      path = argv[i];
    } else if(!rstr){
      rstr = argv[i];
    } else {
      usage();
    }
  }
  if(!path || !rstr) usage();
  int rnum = atoi(rstr);
  if(rnum < 1 || alen < 1) usage();
  int rv = procwrite(path, rnum, opts, etnum, icsiz, omode, alen, en, topts);
  return rv;
}


/* parse arguments of read command */
static int runread(int argc, char **argv){
  char *path = NULL;
  char *rstr = NULL;
  int omode = 0;
  int alen = DEFAVGLEN;
  int mlen = 1;
  bool en = false;
  int topts = TCTNLOWER | TCTNNOACC | TCTNSPACE;
  int smode = QDBSSUBSTR;
  for(int i = 2; i < argc; i++){
    if(!path && argv[i][0] == '-'){
      if(!strcmp(argv[i], "-nl")){
        omode |= QDBONOLCK;
      } else if(!strcmp(argv[i], "-nb")){
        omode |= QDBOLCKNB;
      } else if(!strcmp(argv[i], "-la")){
        if(++i >= argc) usage();
        alen = atoi(argv[i]);
      } else if(!strcmp(argv[i], "-lm")){
        if(++i >= argc) usage();
        mlen = atoi(argv[i]);
      } else if(!strcmp(argv[i], "-en")){
        en = true;
      } else if(!strcmp(argv[i], "-rc")){
        topts &= ~TCTNLOWER;
      } else if(!strcmp(argv[i], "-ra")){
        topts &= ~TCTNNOACC;
      } else if(!strcmp(argv[i], "-rs")){
        topts &= ~TCTNSPACE;
      } else if(!strcmp(argv[i], "-sp")){
        smode = QDBSPREFIX;
      } else if(!strcmp(argv[i], "-ss")){
        smode = QDBSSUFFIX;
      } else if(!strcmp(argv[i], "-sf")){
        smode = QDBSFULL;
      } else {
        usage();
      }
    } else if(!path){
      path = argv[i];
    } else if(!rstr){
      rstr = argv[i];
    } else {
      usage();
    }
  }
  if(!path || !rstr) usage();
  int rnum = atoi(rstr);
  if(rnum < 1 || alen < 1 || mlen < 0) usage();
  int rv = procread(path, rnum, omode, alen, mlen, en, smode, topts);
  return rv;
}


/* parse arguments of wicked command */
static int runwicked(int argc, char **argv){
  char *path = NULL;
  char *rstr = NULL;
  int opts = 0;
  int etnum = 0;
  int64_t icsiz = 0;
  int omode = 0;
  int alen = DEFAVGLEN;
  bool en = false;
  int topts = TCTNLOWER | TCTNNOACC | TCTNSPACE;
  for(int i = 2; i < argc; i++){
    if(!path && argv[i][0] == '-'){
      if(!strcmp(argv[i], "-tl")){
        opts |= QDBTLARGE;
      } else if(!strcmp(argv[i], "-td")){
        opts |= QDBTDEFLATE;
      } else if(!strcmp(argv[i], "-tb")){
        opts |= QDBTTCBS;
      } else if(!strcmp(argv[i], "-et")){
        if(++i >= argc) usage();
        etnum = atoi(argv[i]);
      } else if(!strcmp(argv[i], "-ic")){
        if(++i >= argc) usage();
        char *suffix;
        icsiz = strtoll(argv[i], &suffix, 10);
        if(*suffix == 'k' || *suffix == 'K'){
          icsiz *= 1024LL;
        } else if(*suffix == 'm' || *suffix == 'M'){
          icsiz *= 1024LL * 1024LL;
        } else if(*suffix == 'g' || *suffix == 'G'){
          icsiz *= 1024LL * 1024LL * 1024LL;
        }
      } else if(!strcmp(argv[i], "-nl")){
        omode |= QDBONOLCK;
      } else if(!strcmp(argv[i], "-nb")){
        omode |= QDBOLCKNB;
      } else if(!strcmp(argv[i], "-la")){
        if(++i >= argc) usage();
        alen = atoi(argv[i]);
      } else if(!strcmp(argv[i], "-en")){
        en = true;
      } else if(!strcmp(argv[i], "-rc")){
        topts &= ~TCTNLOWER;
      } else if(!strcmp(argv[i], "-ra")){
        topts &= ~TCTNNOACC;
      } else if(!strcmp(argv[i], "-rs")){
        topts &= ~TCTNSPACE;
      } else {
        usage();
      }
    } else if(!path){
      path = argv[i];
    } else if(!rstr){
      rstr = argv[i];
    } else {
      usage();
    }
  }
  if(!path || !rstr) usage();
  int rnum = atoi(rstr);
  if(rnum < 1 || alen < 1) usage();
  int rv = procwicked(path, rnum, opts, etnum, icsiz, omode, alen, en, topts);
  return rv;
}


/* perform write command */
static int procwrite(const char *path, int rnum, int opts, int etnum, int64_t icsiz, int omode,
                     int alen, bool en, int topts){
  iprintf("<Writing Test>\n  path=%s  rnum=%d  opts=%d  etnum=%d  icsiz=%lld  omode=%d"
          "  alen=%d  en=%d  topts=%d\n\n",
          path, rnum, opts, etnum, (long long)icsiz, omode, alen, en, topts);
  bool err = false;
  double stime = tctime();
  TCQDB *qdb = tcqdbnew();
  if(g_dbgfd >= 0) tcqdbsetdbgfd(qdb, g_dbgfd);
  tcqdbsetsynccb(qdb, mysynccb, NULL);
  if(!tcqdbtune(qdb, etnum, opts)){
    eprint(qdb, "tcqdbtune");
    err = true;
  }
  if(!tcqdbsetcache(qdb, icsiz, -1)){
    eprint(qdb, "tcqdbsetcache");
    err = true;
  }
  if(!tcqdbopen(qdb, path, QDBOWRITER | QDBOCREAT | QDBOTRUNC | omode)){
    eprint(qdb, "tcqdbopen");
    err = true;
  }
  for(int i = 1; i <= rnum; i++){
    char text[TEXTBUFSIZ];
    setrndtext(text, alen, 1, en);
    tctextnormalize(text, topts);
    if(!tcqdbput(qdb, i, text)){
      eprint(qdb, "tcqdbput");
      err = true;
      break;
    }
    if(rnum > 250 && i % (rnum / 250) == 0){
      putchar('.');
      fflush(stdout);
      if(i == rnum || i % (rnum / 10) == 0) iprintf(" (%08d)\n", i);
    }
  }
  iprintf("token number: %llu\n", (unsigned long long)tcqdbtnum(qdb));
  iprintf("size: %llu\n", (unsigned long long)tcqdbfsiz(qdb));
  if(!tcqdbclose(qdb)){
    eprint(qdb, "tcqdbclose");
    err = true;
  }
  tcqdbdel(qdb);
  iprintf("time: %.3f\n", tctime() - stime);
  iprintf("%s\n\n", err ? "error" : "ok");
  return err ? 1 : 0;
}


/* perform read command */
static int procread(const char *path, int rnum, int omode, int alen, int mlen, bool en,
                    int smode, int topts){
  iprintf("<Reading Test>\n  path=%s  rnum=%d  omode=%d  alen=%d  mlen=%d  en=%d"
          "  smode=%d  topts=%d\n\n", path, rnum, omode, alen, mlen, en, smode, topts);
  bool err = false;
  double stime = tctime();
  TCQDB *qdb = tcqdbnew();
  if(g_dbgfd >= 0) tcqdbsetdbgfd(qdb, g_dbgfd);
  if(!tcqdbopen(qdb, path, QDBOREADER | omode)){
    eprint(qdb, "tcqdbopen");
    err = true;
  }
  for(int i = 1; i <= rnum; i++){
    char text[TEXTBUFSIZ];
    setrndtext(text, alen, mlen, en);
    tctextnormalize(text, topts);
    int num;
    uint64_t *res = tcqdbsearch(qdb, text, smode, &num);
    if(res){
      tcfree(res);
    } else {
      eprint(qdb, "tcqdbsearch");
      err = true;
      break;
    }
    if(rnum > 250 && i % (rnum / 250) == 0){
      putchar('.');
      fflush(stdout);
      if(i == rnum || i % (rnum / 10) == 0) iprintf(" (%08d)\n", i);
    }
  }
  iprintf("token number: %llu\n", (unsigned long long)tcqdbtnum(qdb));
  iprintf("size: %llu\n", (unsigned long long)tcqdbfsiz(qdb));
  if(!tcqdbclose(qdb)){
    eprint(qdb, "tcqdbclose");
    err = true;
  }
  tcqdbdel(qdb);
  iprintf("time: %.3f\n", tctime() - stime);
  iprintf("%s\n\n", err ? "error" : "ok");
  return err ? 1 : 0;
}


/* perform wicked command */
static int procwicked(const char *path, int rnum, int opts, int etnum, int64_t icsiz, int omode,
                      int alen, bool en, int topts){
  iprintf("<Wicked Writing Test>\n  path=%s  rnum=%d  opts=%d  etnum=%d  icsiz=%lld  omode=%d"
          "  alen=%d  en=%d  topts=%d\n\n",
          path, rnum, opts, etnum, (long long)icsiz, omode, alen, en, topts);
  bool err = false;
  double stime = tctime();
  TCQDB *qdb = tcqdbnew();
  if(g_dbgfd >= 0) tcqdbsetdbgfd(qdb, g_dbgfd);
  if(!tcqdbtune(qdb, etnum, opts)){
    eprint(qdb, "tcqdbtune");
    err = true;
  }
  if(!tcqdbsetcache(qdb, icsiz, -1)){
    eprint(qdb, "tcqdbsetcache");
    err = true;
  }
  if(!tcqdbopen(qdb, path, QDBOWRITER | QDBOCREAT | QDBOTRUNC | omode)){
    eprint(qdb, "tcqdbopen");
    err = true;
  }
  int rnd = 0;
  for(int i = 1; i <= rnum && !err; i++){
    char text[TEXTBUFSIZ];
    setidtext(text, i, en);
    tctextnormalize(text, topts);
    if(myrand(5) == 0) rnd = myrand(100);
    if(rnd < 90){
      putchar('P');
      if(!tcqdbput(qdb, i, text)){
        eprint(qdb, "tcqdbput");
        err = true;
      }
    } else if(rnd < 95){
      putchar('O');
      if(!tcqdbout(qdb, i, text)){
        eprint(qdb, "tcqdbout");
        err = true;
      }
    } else {
      putchar('S');
      int num;
      uint64_t *res = tcqdbsearch(qdb, text, QDBSSUBSTR, &num);
      if(res){
        tcfree(res);
      } else {
        eprint(qdb, "tcqdbsearch");
        err = true;
      }
    }
    if(i % 50 == 0) iprintf(" (%08d)\n", i);
  }
  if(rnum % 50 > 0) iprintf(" (%08d)\n", rnum);
  iprintf("token number: %llu\n", (unsigned long long)tcqdbtnum(qdb));
  iprintf("size: %llu\n", (unsigned long long)tcqdbfsiz(qdb));
  if(!tcqdbclose(qdb)){
    eprint(qdb, "tcqdbclose");
    err = true;
  }
  tcqdbdel(qdb);
  iprintf("time: %.3f\n", tctime() - stime);
  iprintf("%s\n\n", err ? "error" : "ok");
  return err ? 1 : 0;
}



// END OF FILE
