/*
 * EUC-JP ܸ
 *
 * same license as uim
 *
 */
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>

#include "main.h"

struct token {
  char *str;
  int str_len;
  int buf_len;
};

struct row {
  int index;
  int nr_tokens;
  struct token **tokens;
  /**/
  struct row *next;
};

struct table {
  struct row *head;
  int tmp;
};

struct parser_stat {
  int row;
  int column;
  int fatal;
};

static struct token *
token_new()
{
  struct token *t = malloc(sizeof(struct token));
  t->str = NULL;
  t->str_len = 0;
  t->buf_len = 0;
  return t;
}

static struct row *
row_new()
{
  struct row *r = malloc(sizeof(struct row));
  r->nr_tokens = 0;
  r->tokens = NULL;
  return r;
}

static void
row_set(struct row *r, int nth, struct token *t)
{
  if (nth >= r->nr_tokens) {
    int i;
    r->tokens = realloc(r->tokens,
			sizeof(struct token *) * (nth + 1));
    for (i = r->nr_tokens; i < nth + 1; i++) {
      r->tokens[i] = NULL;
    }
    r->nr_tokens = nth + 1;
  }
  r->tokens[nth] = t;
}

static void
token_append(struct token *t, char c)
{
  if (t->str_len + 2 > t->buf_len) {
    t->buf_len += 4;
    t->str = realloc(t->str, t->buf_len);
  }
  t->str[t->str_len] = c;
  t->str_len ++;
  t->str[t->str_len] = 0;
}

static void
token_print(struct token *t)
{
  if (!t) {
    printf("{null}\n");
    return ;
  }
  printf("%d(/%d):", t->str_len, t->buf_len);
  if (t->str) {
    printf("(%s)\n", t->str);
  } else {
    printf("{null}\n");
  }
}

static void
token_free(struct token *t)
{
  if (!t) {
    return ;
  }
  free(t->str);
  free(t);
}

static int
tag_match(struct token *t, const char *s)
{
  int len;
  if (!t) {
    return 1;
  }
  if (!t->str) {
    return 1;
  }
  if (t->str[0] != '<') {
    return 1;
  }
  len = strlen(s);
  return strncasecmp(&t->str[1], s, len);
}

static void
read_tag(FILE *fp, struct token *t)
{
  int ch;
 again:
  ch = fgetc(fp);
  if (ch == -1) {
    return ;
  }
  token_append(t, ch);
  if (ch == '>') {
    return ;
  }
  goto again;
}

static void
read_str(FILE *fp, struct token *t)
{
  int ch;
 again:
  ch = fgetc(fp);
  if (ch == -1) {
    return ;
  }
  if (ch == '<') {
    ungetc(ch, fp);
    return ;
  }
  token_append(t, ch);
  goto again;
}

static struct token *
read_token(FILE *fp)
{
  struct token *t;
  int ch;
 again:
  ch = fgetc(fp);
  if (ch == -1) {
    return NULL;
  }
  if (isspace(ch)) {
    goto again;
  }
  t = token_new();
  token_append(t, ch);
  if (ch == '<') {
    read_tag(fp, t);
  } else {
    read_str(fp, t);
  }
  return t;
}

static struct token *
read_data(struct parser_stat *ps, FILE *fp)
{
  struct token *body, *td;
  body = read_token(fp);
  td = read_token(fp);
  if (!tag_match(td, "/td")) {
    return body;
  }
  return NULL;
}

static void
read_row(struct parser_stat *ps, struct table *tab, FILE *fp)
{
  struct token *t;
  struct row *r = row_new();
  r->index = ps->row;
  t = read_token(fp);
  while (!tag_match(t, "td")) {
    t = read_data(ps, fp);
    if (t) {
      row_set(r, ps->column, t);
    }
    t = read_token(fp);
    ps->column ++;
  }
  /**/
  r->next = tab->head;
  tab->head = r;
  /**/
  ps->column = 0;
}

static void
output_word(struct row *r)
{
  printf("%s %d %s\n", r->tokens[1]->str, 1,
	 r->tokens[0]->str);
  printf("ʻ = ̾\n");
  printf("³ = n\n");
  printf("³ = n\n");
  printf("³ = n\n");
  printf("촴Τߤʸ = y\n");
  printf("ʽ³ = y\n");
  printf("\n");
}

static void
read_table(struct parser_stat *ps, FILE *fp)
{
  struct token *t;
  struct table tab;
  struct row *r;
  /**/
  tab.head = NULL;
  /**/
  t = read_token(fp);
  ps->row = 0;
  ps->column = 0;
  while (tag_match(t, "tr")) {
    t = read_token(fp);
  }
  while (!tag_match(t, "tr")) {
    read_row(ps, &tab, fp);
    t = read_token(fp);
    ps->row ++;
    ps->column = 0;
  }
  /**/
  for (r = tab.head; r; r = r->next) {
    if (r->nr_tokens < 4) {
      continue;
    }
    if (r->index == 0) {
      /* exclude 1st one */
      continue;
    }
    output_word(r);
  }
}

static void
read_file(struct parser_stat *ps, const char *fn)
{
  struct token *t;
  FILE *fp;
  fp = fopen(fn, "r");
  if (!fp) {
    return ;
  }
  ps->fatal = 0;
  while ((t = read_token(fp))) {
    if (!tag_match(t, "table")) {
      read_table(ps, fp);
    }
  }
  fclose(fp);
}

static char *
chomp_buf(unsigned char *buf)
{
  int len = strlen((char *)buf);
  int i;
  for (i = len - 1; i > 0; i--) {
    if (isspace((int)buf[i]) ||
	buf[i] == '\n') {
      buf[i] = 0;
    } else {
      break;
    }
  }
  return (char *)buf;
}

static void
read_url_list(struct parser_stat *ps, char *arg)
{
  char buf[1000];
  FILE *fp;
  fp = fopen(arg, "r");
  if (!fp) {
    return ;
  }
  while (fgets(buf, 1000, fp)) {
    char *url;
    FILE *tmp_fp;
    if (buf[0] == '#' ) {
      continue;
    }
    url = chomp_buf((unsigned char *)buf);
    tmp_fp = fopen("tmp-tmp", "w");
    if (tmp_fp) {
      http_get(buf, tmp_fp);
      fclose(tmp_fp);
      read_file(ps, "tmp-tmp");
    }
  }
  fclose(fp);
}

static void
print_usage()
{
  printf(PACKAGE"\n");
}

int
main(int argc, char **argv)
{
  struct parser_stat ps;
  int i;
  const char *prev_arg = "";
  for (i = 1; i < argc; i++) {
    char *arg = argv[i];
    if (arg[0] != '-') {
      if (!strcmp(prev_arg, "-f")) {
	read_url_list(&ps, arg);
      } else {
	read_file(&ps, arg);
      }
    } else if (!strcmp("--version", arg)) {
      print_usage();
      exit(0);
    }
    /**/
    prev_arg = arg;
  }
  /**/
  return 0;
}
