/*
 * cannadicΥե뤫鼭Υ
 *
 * Funded by IPA̤Ƨեȥ¤ 2001 8/22
 */
/*
 * ɤߤindexȤʻѴʸ(=entry)򸡺
 * 빽¤ˤʤäƤ롣
 * եϥͥåȥХȥѤ
 */

/*
 * Funded by IPA̤Ƨեȥ¤ 2002 1/1
 * Copyright (C) 2000-2003 TABATA Yusuke
 * Copyright (C) 2001-2002 TAKAI Kousuke
 */
/* $Id: mkdic.c,v 1.10 2002/03/28 11:56:07 yusuke Exp $ */

#include <sys/types.h>
#include <netinet/in.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#include <xstr.h>
#include <wtype.h>

#define MAX_LINE_LEN 1024
#define WORDS_PER_PAGE 64
#define NR_HEADER_SECTIONS 16
#define SECTION_ALIGNMENT 8

#define DEFAULT_FN "anthy.dic"
static const char *output_fn = DEFAULT_FN;

static const char *progname;
static FILE *page_out, *page_index_out, *entry_index_out, *entry_out;

/** ñ */
struct word_entry {
  char *wt;
  int freq;
  char *word;
};

/** ɤ */
struct entry_stat {
  xstr *index_str;
  int nr_entries;
  struct word_entry *entries;
  struct entry_stat *next;
};

/* νΥե򥪡ץ󤹤 */
static void open_output_files(void)
{
  if (!(page_index_out	= tmpfile ()) ||
      !(page_out	= tmpfile ()) ||
      !(entry_index_out	= tmpfile ()) ||
      !(entry_out	= tmpfile ()))
    {
      fprintf (stderr, "%s: cannot open temporary file: %s\n",
	       progname, strerror (errno));
      exit (2);
    }
}

static void flush_output_files (void)
{
  if (ferror (page_index_out)  ||
      ferror (page_out)        ||
      ferror (entry_index_out) ||
      ferror (entry_out))
    {
      fprintf (stderr, "%s: write error\n", progname);
      exit (1);
    }
  if (fflush (page_index_out)  ||
      fflush (page_out)	       ||
      fflush (entry_index_out) ||
      fflush (entry_out))
    {
      fprintf (stderr, "%s: write error: %s\n", progname, strerror (errno));
      exit (1);
    }
}

/* 2ĤʸζʬĹ */
static int common_len(xstr *s1, xstr *s2)
{
  int m,i;
  if (!s1 || !s2) {
    return 0;
  }
  if (s1->len < s2->len) {
    m = s1->len;
  }else{
    m = s2->len;
  }
  for (i = 0; i < m; i++) {
    if (s1->str[i] != s2->str[i]) {
      return i;
    }
  }
  return m;
}

/* ͥåȥbyteorder4bytes񤭽Ф */
static void write_nl(FILE *fp, int i)
{
  i = htonl(i);
  fwrite(&i, sizeof(int), 1, fp);
}

/*
 * 2ĤʸκʬϤ
 * AAA ABBB Ȥ2Ĥʸ򸫤ˤ
 * ABBBAAAΤ2ʸäBBBդΤȤ
 * \0x2BBBȽϤ롣
 */
static int output_diff(xstr *p, xstr *c)
{
  int i, m, l = 1;
  m = common_len(p, c);
  if (p && p->len > m) {
    fprintf(page_out, "%c", p->len - m + 1);
  }else{
    fprintf(page_out, "%c", 1);
  }
  for (i = m; i < c-> len; i++) {
    char buf[3];
    l += anthy_sputxchar(buf, c->str[i]);
    fputs(buf, page_out);
  }
  return l;
}

static void print_usage(void)
{
  printf("please use mkanthydic command.\n");
  exit(0);
}

static void parse_args(int argc, char **argv)
{
  int i;
  for (i = 1; i < argc; i++) {
    if (!strcmp(argv[i], "--help")) {
      print_usage();
    }
    if (i + 1 < argc) {
      if (!strcmp(argv[i], "-o")) {
	output_fn = argv[i + 1];
	i++;
      } else if (!strcmp(argv[i], "-uc")) {
	printf("uc = %s\n", argv[i + 1]);
	i++;
      }
    }
  }
}

static char *read_line(char *buf)
{
  while(fgets(buf, MAX_LINE_LEN, stdin)) {
    if (buf[0] != '#') {
      int len = strlen(buf);
      if (buf[len - 1] == '\n') {
	buf[len - 1] = 0;
      }
      return buf;
    }
  }
  return NULL;
}

static xstr *get_index(char *buf)
{
  char *sp;
  xstr *xs;
  sp = strchr(buf, ' ');
  *sp = 0;
  xs = anthy_cstr_to_xstr(buf);
  *sp = ' ';
  return xs;
}

static char *get_entry(char *buf)
{
  char *sp;
  sp = strchr(buf, ' ');
  while(*sp == ' ') {
    sp ++;
  }
  return sp;
}

static void begin_new_page(int i)
{
  fputc(0, page_out);
  write_nl(page_index_out, i);
}

static void output_entry_index(int i)
{
  write_nl(entry_index_out, i);
}

static struct entry_stat *begin_new_entry()
{
  struct entry_stat *es;
  es = malloc(sizeof(struct entry_stat));
  es->nr_entries = 0;
  es->entries = 0;
  es->next = NULL;
  return es;
}

/** κǸˡȥĤɲä */
static void
push_back_word_entry(struct entry_stat *es, const char *wt,
		     int freq, const char *word)
{
  es->entries = realloc(es->entries,
			sizeof(struct word_entry) *
			(es->nr_entries + 1));

  es->entries[es->nr_entries].wt = strdup(wt);
  es->entries[es->nr_entries].freq = freq;
  es->entries[es->nr_entries].word = strdup(word);
  es->nr_entries ++;
}

/** ȥʬ䤷ơ */
static void push_back_entry(struct entry_stat *es, const char *ent)
{
  char *buf = alloca(strlen(ent) + 1);
  char *cur = buf;
  char *n;
  char wtbuf[20];
  int freq = 0;
  strcpy(buf, ent);

  while (1) {
    /* ȡڤ */
    n = strchr(cur, ' ');
    if (n) {
      *n = 0;
    }
    if (cur[0] == '#') {
      /* ʻ */
      char *t;
      strcpy(wtbuf, cur);
      t = strchr(wtbuf, '*');
      freq = 0;
      if (t) {
	*t = 0;
	t++;
	freq = atoi(t);
      }
    } else {
      /* ȥ */
      push_back_word_entry(es, wtbuf, freq, cur);
    }
    if (!n) {
      return ;
    }
    cur = n;
    cur ++;
  }
}

/** Ʊȥ̵꤬å */
static int check_same_word(struct entry_stat *es, int idx)
{
  struct word_entry *base = &es->entries[idx];
  int i;
  for (i = idx -1; i >= 0; i--) {
    struct word_entry *cur = &es->entries[i];
    if (base->freq != cur->freq) {
      return 0;
    }
    if (strcmp(base->wt, cur->wt)) {
      return 0;
    }
    if (strcmp(base->word, cur->word)) {
      return 0;
    }
    return 1;
  }
  return 0;
}

/** qsortѤδؿ */
static int compare_word_entry(const void *p1, const void *p2)
{
  const struct word_entry *e1 = p1;
  const struct word_entry *e2 = p2;
  return e2->freq - e1->freq;
}

/** ĤɤߤФ륨ȥƤϤ */
static int output_entry(struct entry_stat *es)
{
  int i;
  int count = 0;

  if (!es) {
    return 0;
  }
  /* ȥ¤٤ */
  qsort(es->entries, es->nr_entries,
	sizeof(struct word_entry),
	compare_word_entry);
  /* ƥȥϤ */
  for (i = 0; i < es->nr_entries; i++) {
    struct word_entry *we = &es->entries[i];
    /**/
    if (check_same_word(es, i)) {
      continue;
    }
    if (i) {
      /* ܰʹߤ϶򤫤Ϥޤ */
      count += fprintf(entry_out, " ");
    }
    /* ʻ٤Ϥ */
    if (i == 0||
	(strcmp(es->entries[i-1].word, we->word) ||
	 strcmp(es->entries[i-1].wt, we->wt) ||
	 es->entries[i-1].freq != we->freq)) {
      count += fprintf(entry_out, "%s", we->wt);
      if (we->freq) {
	count += fprintf(entry_out, "*%d", we->freq);
      }
      count += fprintf(entry_out, " ");
    }
    /* ñϤ */
    count += fprintf(entry_out, "%s", we->word);
  }
  for (i = 0; i < es->nr_entries; i++) {
    struct word_entry *we = &es->entries[i];
    free(we->wt);
    free(we->word);
  }

  fputc(0, entry_out);
  return count + 1;
}

/** Ԥɤ߹ǽϤ
 * ΥޥɤΥ */
static struct entry_stat * parse_dict(void)
{
  xstr *cur, *prev = NULL;
  char buf[MAX_LINE_LEN];
  char *ent;
  int count = 0;
  struct entry_stat *es = NULL;
  struct entry_stat *es_head = NULL;
  struct entry_stat *es_tmp = NULL;

  while(read_line(buf) && (cur = get_index(buf))) {
    ent = get_entry(buf);
    if (cur->len > 30) {
      /* ɤߤ30ʸ̵ۤ */
      anthy_free_xstr(cur);
      continue;
    }
    if (prev && !anthy_xstrcmp(prev, cur)) {
      /* ƱñʤΤǡȥ */
      push_back_entry(es, ent);
    } else {
      /* ñ */
      es_tmp = begin_new_entry();
      /* ꥹȤˤĤʤ */
      if (!es) {
	es_head = es_tmp;
      } else {
	es->next = es_tmp;
      }
      es = es_tmp;
      /**/
      es->index_str = anthy_xstr_dup(cur);
      push_back_entry(es, ent);
      count ++;
    }
    if (prev) {
      anthy_free_xstr(prev);
    }
    prev = cur;
  }

  return es_head;
}

static void do_output(struct entry_stat *es)
{
  int count = 0;
  xstr *prev = NULL;
  int entry_index = 0;
  int page_index = 0;

  write_nl(page_index_out, page_index);

  for (; es; es = es->next) {
    if ((count % WORDS_PER_PAGE) == 0 && count) {
      /* ڡ */
      page_index ++;
      prev = NULL;
      begin_new_page(page_index);
    }
    page_index += output_diff(prev, es->index_str);
    output_entry_index(entry_index);
    entry_index += output_entry(es);
    count ++;
    prev = es->index_str;
  }
  /* ǸΥȥλ */
  entry_index += output_entry(es);
  write_nl(entry_index_out, entry_index);
  write_nl(page_index_out, 0);
  printf("Total %d words (%d pages).\n", count, count / WORDS_PER_PAGE + 1);
}

static int get_size (FILE *fp)
{
  if (!fp) {
    return 0;
  }
  return (ftell (fp) + SECTION_ALIGNMENT - 1) & (-SECTION_ALIGNMENT);
}

static void copy_file (FILE *in, FILE *out)
{
  int i;
  size_t nread;
  char buf[BUFSIZ];

  /* Pad OUT to the next aligned offset.  */
  for (i = ftell (out); i & (SECTION_ALIGNMENT - 1); i++)
    fputc (0, out);

  /* Copy the contents.  */
  rewind (in);
  while ((nread = fread (buf, 1, sizeof buf, in)) > 0)
    if (fwrite (buf, 1, nread, out) < nread)
      {
	/* Handle short write (maybe disk full).  */
	fprintf (stderr, "%s: %s: write error: %s\n",
		 progname, output_fn, strerror (errno));
	exit (1);
      }
}

static void link_dics(void)
{
  FILE *fp;
  int buf[NR_HEADER_SECTIONS];
  int i;

  fp = fopen (output_fn, "w");
  if (!fp)
    {
      fprintf (stderr, "%s: %s: cannot create: %s\n",
	       progname, output_fn, strerror (errno));
      exit (1);
    }

  buf[0] = NR_HEADER_SECTIONS * sizeof(int);
  buf[1] = 0;
  buf[2] = buf[0];		/* ȥΥǥåΤ륪եå */
  buf[3] = buf[2] + get_size (entry_index_out);	/* ȥΤ륪եå */
  buf[4] = buf[3] + get_size (entry_out);	/* ڡΤ륪եå */
  buf[5] = buf[4] + get_size (page_out);	/* ڡΥǥåΤ
						   եå */
  for (i = 6; i < NR_HEADER_SECTIONS; i++) {
    buf[i] = 0;
  }

  for (i = 0; i < NR_HEADER_SECTIONS; i++) {
    write_nl(fp, buf[i]);
  }
  copy_file (entry_index_out, fp);
  copy_file (entry_out, fp);
  copy_file (page_out, fp);
  copy_file (page_index_out, fp);

  if (fclose (fp))
    {
      fprintf (stderr, "%s: %s: write error: %s\n",
	       progname, output_fn, strerror (errno));
      exit (1);
    }
}


int main(int argc, char **argv)
{
  struct entry_stat *es;
  progname = argv[0];
  parse_args(argc, argv);
  open_output_files();
  es = parse_dict();
  do_output(es);
  flush_output_files();

  link_dics();
  return 0;
}
