/*
 * ʸ⤷ñİʾ奻åȤˤmetawordȤư
 * ǤϳƼmetaword
 *
 * init_metaword_tab() metawordΤξ
 * anthy_make_metaword_all() contextmetaword
 * anthy_print_metaword() ꤵ줿metawordɽ
 *
 * Funded by IPA̤Ƨեȥ¤ 2001 10/29
 * Copyright (C) 2000-2001 TABATA Yusuke, UGAWA Tomoharu
 */
#include <stdlib.h>
#include <stdio.h>

#include <record.h>
#include <splitter.h>
#include <xstr.h>
#include "wordborder.h"

/** ꤷ򥫥Сextentõ */
struct extent *
anthy_find_extent(struct splitter_context *sc,
		  int from, int len, int force)
{
  int i;
  struct char_node *cnode;
  struct extent *ex;

  if (len == 0) {
    return NULL;
  }
  cnode = &sc->word_split_info->cnode[from];
  if (len <= cnode->max_len && cnode->ex[len-1]) {
    return cnode->ex[len-1];
  }
  if (!force) {
    return 0;
  }
  /* ݥ󥿤 */
  if (len > cnode->max_len) {
    cnode->ex = realloc(cnode->ex, sizeof(struct extent *) *
			len);
    for (i = cnode->max_len; i < len; i++) {
      cnode->ex[i] = 0;
    }
  }

  /* extentν */
  ex = anthy_smalloc(sc->word_split_info->ExAllocator);
  cnode->ex[len-1] = ex;
  ex->mw = 0;
  ex->best = 0;
  ex->score = 0;
  ex->from = from;
  ex->len = len;
  ex->is_dummy_char = 0;
  if (ex->len == 1) {
    xchar xc;
    xc = *sc->ce[ex->from].c;
    if (anthy_get_xchar_type(xc) == XCT_NONE) {
      ex->is_dummy_char = 1;
    }
  }
  if (ex->from + ex->len == sc->char_count) {
    ex->is_right_end = 1;
  } else {
    ex->is_right_end = 0;
  }

  if (cnode->max_len < len) {
    cnode->max_len = len;
  }
  return ex;
}
/* ƥmetawordɲä */
static void
commit_metaword(struct splitter_context *sc,
		struct meta_word *mw)
{
  struct word_split_info_cache *info = sc->word_split_info;
  struct extent *ex = anthy_find_extent(sc, mw->from, mw->len, 1);

  /* ƱĥΡɤΥꥹ */
  mw->next = info->cnode[mw->from].mw;
  info->cnode[mw->from].mw = mw;

  /* ƱϰϤĥΡ */
  mw->same_extent = ex->mw;
  ex->mw = mw;
  /* anthy_print_metaword(sc, mw); */
}

void
anthy_print_metaword(struct splitter_context *sc,
		     struct meta_word *mw)
{
  printf("*meta word %d(%d-%d):%d* ",
	 mw->type, mw->from, mw->len, mw->score);
  if (mw->wl) {
    anthy_print_word_list(sc, mw->wl);
  } else {
    printf("\n");
  }
}

static struct meta_word *
alloc_metaword(struct splitter_context *sc)
{
  struct meta_word *mw;
  mw = anthy_smalloc(sc->word_split_info->MwAllocator);
  mw->type = MW_SINGLE;
  mw->score = 0;
  mw->wl = 0;
  mw->mw1 = 0;
  mw->mw2 = 0;
  mw->parent = 0;
  mw->cand_hint = 0;
  mw->si = 0;
  return mw;
}

/*
 * wordlistĤʤ롢metaword򥳥ߥåȤ
 */
static void
make_simple_metaword(struct splitter_context *sc)
{
  int i;
  for (i = 0; i < sc->char_count; i++) {
    struct word_list *wl;
    for (wl = sc->word_split_info->cnode[i].wl;
	 wl; wl = wl->next) {
      struct meta_word *mw = alloc_metaword(sc);
      mw->wl = wl;
      mw->from = wl->from;
      mw->len = wl->len;
      mw->score = wl->score;
      mw->type = MW_SINGLE;
      commit_metaword(sc, mw);
    }
  }
}

/*
 * metawordºݤ˷礹뤿
 */
static void
do_combine(struct splitter_context *sc,
	   enum metaword_type type,
	   struct meta_word *mw, struct meta_word *mw2)
{
  struct meta_word *n;
  n = alloc_metaword(sc);
  n->from = mw->from;
  n->len = mw->len + mw2->len;
  n->score = mw->score + mw2->score + 1;
  n->type = type;
  n->mw1 = mw;
  n->mw2 = mw2;
  commit_metaword(sc, n);
}

/*
 * ưϢѷ + ƻ첽 ֡䤹פʤ
 */
static void
try_combine_v_renyou_a(struct splitter_context *sc,
		       struct meta_word *mw, struct meta_word *mw2)
{
  wtype_t w1 = mw->wl->core_wt;
  wtype_t w2 = mw2->wl->core_wt;
  if (anthy_wtype_get_pos(w1) == POS_V
      && anthy_wtype_get_ct(w1) == CT_RENYOU
      && anthy_wtype_get_pos(w2) == POS_A) {
    /* ƻǤϤΤǼΥå */
    if (anthy_get_seq_ent_wtype_freq(mw2->wl->core_seq, 
				     anthy_wtype_a_tail_of_v_renyou)) {
      do_combine(sc, MW_V_RENYOU_A, mw, mw2);
    }
  }
}

/*
 * ưϢѷ + ̾첽  (Τ)פʤ
 */
static void
try_combine_v_renyou_t(struct splitter_context *sc,
		       struct meta_word *mw, struct meta_word *mw2)
{
  wtype_t w1 = mw->wl->core_wt;
  wtype_t w2 = mw2->wl->core_wt;
  if (anthy_wtype_get_pos(w1) == POS_V
      && anthy_wtype_get_ct(w1) == CT_RENYOU
      && anthy_wtype_get_pos(w2) == POS_NOUN
      && anthy_wtype_get_scos(w2) == SCOS_T40) {
    do_combine(sc, MW_V_RENYOU_T, mw, mw2);
  }
}

/*
 *  + ̾礹
 */
static void
try_combine_name(struct splitter_context *sc,
		 struct meta_word *mw, struct meta_word *mw2)
{
  int f, f2;
  f = anthy_get_seq_flag(mw->wl->core_seq);
  f2 = anthy_get_seq_flag(mw2->wl->core_seq);
  if ((f & NF_FAMNAME) && (f2 & NF_FSTNAME)) {
    if (anthy_wtype_get_scos(mw->wl->core_wt) == SCOS_FAMNAME &&
	anthy_wtype_get_scos(mw2->wl->core_wt) == SCOS_FSTNAME) {
      do_combine(sc, MW_NAMEPAIR, mw, mw2);
    }
  }
}

static void
try_combine_10_1(struct splitter_context *sc,
		 struct meta_word *mw, struct meta_word *mw2)
{
  int f1, f2;
  f1 = anthy_get_seq_flag(mw->wl->core_seq);
  f2 = anthy_get_seq_flag(mw2->wl->core_seq);
  if (!(f1 & NF_NUM)) {
    return ;
  }
  if (!(f2 & NF_NUM)) {
    return ;
  }
  if (anthy_get_seq_ent_wtype_freq(mw->wl->core_seq, anthy_wtype_n10) &&
      anthy_get_seq_ent_wtype_freq(mw2->wl->core_seq, anthy_wtype_n1)) {
    do_combine(sc, MW_NUM_XX, mw, mw2);
  }
}

/* ٤metawordȷǤ뤫å */
static void
try_combine_metaword(struct splitter_context *sc,
		     struct meta_word *mw, struct meta_word *mw2)
{
  if (!mw->wl || !mw2->wl) {
    return ;
  }
  if (mw->wl->postfix_len + mw->wl->dep_len == 0 
      && mw->wl->prefix_len == 0
      && mw2->wl->prefix_len == 0) {
    try_combine_name(sc, mw, mw2);
    try_combine_v_renyou_a(sc, mw, mw2);
    try_combine_v_renyou_t(sc, mw, mw2);
    try_combine_10_1(sc, mw, mw2);
  }
}


static void
combine_metaword(struct splitter_context *sc)
{
  int i;

  struct word_split_info_cache *info = sc->word_split_info;
  /* metawordκüˤ롼 */
  for (i = 0; i < sc->char_count; i++){
    struct meta_word *mw, *mw2;
    /* metawordΥ롼 */
    for (mw = info->cnode[i].mw;
	 mw; mw = mw->next) {
      /* metawordüãƤʤ */
      if (mw->len + i < sc->char_count) {
	/* metawordαmetawordΤĤҤȤĤ */
	for (mw2 = info->cnode[mw->len+i].mw; 
	     mw2; mw2 = mw2->next) {
	  /* Ǥ뤫å */
	  try_combine_metaword(sc, mw, mw2);
	}
      }
    }
  }
}

static void
make_dummy_metaword(struct splitter_context *sc, int from,
		    int len, int orig_len)
{
  int score = 0;
  struct meta_word *mw, *n;
  struct extent *ex;

  ex = anthy_find_extent(sc, from, orig_len, 0);
  if (ex) {
    for (mw = ex->mw;
	 mw; mw = mw->same_extent) {
      if (mw->score > score) {
	score = mw->score;
      }
    }
  }

  n = alloc_metaword(sc);
  n->type = MW_DUMMY;
  n->from = from;
  n->len = len;
  n->score = 3 * score * len / orig_len;
  commit_metaword(sc, n);
}

/*
 * ʸ򿭤Ф餽ФƤ
 */
static void
make_expanded_metaword_all(struct splitter_context *sc)
{
  int i, j;
  if (anthy_select_section("EXPANDPAIR", 0) == -1) {
    return ;
  }
  for (i = 0; i < sc->char_count; i++) {
    for (j = 1; j < sc->char_count - i; j++) {
      /* ƤʬʸФ */
      xstr xs;
      xs.len = j;
      xs.str = sc->ce[i].c;
      if (anthy_select_column(&xs, 0) == 0) {
	/* ʬʸϲ˳оݤȤʤä */
        int k;
        int nr = anthy_get_nr_values();
        for (k = 0; k < nr; k++) {
          xstr *exs;
          exs = anthy_get_nth_xstr(k);
          if (exs && exs->len <= sc->char_count - i) {
            xstr txs;
            txs.str = sc->ce[i].c;
            txs.len = exs->len;
            if (!anthy_xstrcmp(&txs, exs)) {
              make_dummy_metaword(sc, i, txs.len, j);
            }
          }
        }
      }
    }
  }
}

static void
make_ochaire_metaword(struct splitter_context *sc,
		      int from, int len)
{
  struct meta_word *mw, *mw0;
  int count;
  int s;
  int j;
  int seg_len;

  count = anthy_get_nth_value(0);
  for (s = 0, j = 0; j < count - 1; j++) {
    s += anthy_get_nth_value(j * 2 + 1);
  }
  seg_len = anthy_get_nth_value((count - 1) * 2 + 1);
  mw = alloc_metaword(sc);
  mw->type = MW_OCHAIRE_LEAF;
  mw->from = from + s;
  mw->len = seg_len;
  mw->cand_hint = anthy_get_nth_xstr((count - 1) * 2 + 2);
  for (j-- ; j >= 0; j--) {
    struct meta_word *n;
    seg_len = anthy_get_nth_value(j * 2 + 1);
    s -= seg_len;
    n = alloc_metaword(sc);
    n->type = MW_OCHAIRE_LEAF;
    n->mw1 = mw;
    n->from = from + s;
    n->len = seg_len;
    n->cand_hint = anthy_get_nth_xstr(j * 2 + 2);
    commit_metaword(sc, mw);
    mw = n;
  }
  commit_metaword(sc, mw);
  mw0 = alloc_metaword(sc);
  mw0->type = MW_OCHAIRE;
  mw0->mw1 = mw;
  mw0->from = from;
  mw0->len = len;
  mw0->score = OCHAIRE_SCORE;
  commit_metaword(sc, mw0);
  from += len - 1;
}

/*
 * ʣʸȤ򤫤鸡
 */
static void
make_ochaire_metaword_all(struct splitter_context *sc)
{
  int i;
  if (anthy_select_section("OCHAIRE", 0) == -1) {
    return ;
  }
  for (i = 0; i < sc->char_count; i++) {
    xstr xs;
    xs.len = sc->char_count - i;
    xs.str = sc->ce[i].c;
    if (anthy_select_longest_column(&xs) == 0) {
      xstr* key;
      int len;
      anthy_mark_column_used();
      key = anthy_get_index_xstr();
      len = key->len;
      make_ochaire_metaword(sc, i, len);
      i += len - 1; /* 󸫤Ĥä meta_word μʸϤ */
      break;
    }
  }
}

/*
 * metawordθλ¿ʸ򤯤äĤmetaword
 */
static void
make_metaword_with_depchar(struct splitter_context *sc,
			   struct meta_word *mw)
{
  int j;
  /* metawordΤȤ³ƤΩǤʤʸο */
  for (j = 0; mw->from + mw->len + j < sc->char_count; j++) {
    int p = mw->from + mw->len + j;
    if (!(anthy_get_xchar_type(*sc->ce[p].c) & XCT_PART)) {
      break;
    }
  }
  /* ΩǤʤʸΤ */
  if (j > 0) {
    struct meta_word *n;
    n = alloc_metaword(sc);
    n->type = MW_DUMMY;
    n->from = mw->from;
    n->len = mw->len + j;
    n->score = mw->score;
    commit_metaword(sc, n);
  }
}

static void 
make_metaword_with_depchar_all(struct splitter_context *sc)
{
  int i;
  struct word_split_info_cache *info = sc->word_split_info;

  /* metawordФ */
  for (i = 0; i < sc->char_count; i++) {
    struct meta_word *mw;
    for (mw = info->cnode[i].mw;
	 mw; mw = mw->next) {
      make_metaword_with_depchar(sc, mw);
    }
  }
}

static void 
bias_to_single_segment_metaword(struct splitter_context *sc)
{
  struct extent *ex;
  struct meta_word *mw;

  ex = anthy_find_extent(sc, 0, sc->char_count, 0);
  if (!ex) {
    return ;
  }
  for (mw = ex->mw;
       mw; mw = mw->same_extent) {
    if ((mw->type == MW_SINGLE &&
	mw->wl &&
	mw->wl->prefix_len == 0 &&
	mw->wl->postfix_len == 0) ||
	(mw->type == MW_V_RENYOU_A ||
	 mw->type == MW_V_RENYOU_T)){
      mw->score *= 2;
    }
  }
}

static void 
bias_to_single_char_metaword(struct splitter_context *sc)
{
  int i;
  struct extent *ex;

  for (i = 0; i < sc->char_count; i++) {
    struct meta_word *mw;
    ex = anthy_find_extent(sc, i, 1, 0);
    if (!ex) {
      continue;
    }
    for (mw = ex->mw; mw; mw = mw->same_extent) {
      mw->score = mw->score * 1 / 10;
    }
  }
}

void
anthy_make_metaword_all(struct splitter_context *sc)
{
  /* ޤword_listämetaword */
  make_simple_metaword(sc);

  /* metaword礹 */
  combine_metaword(sc);

  /* 礵줿ʸ */
  make_expanded_metaword_all(sc);

  /* Ĺʤɤε桢¾ε */
  make_metaword_with_depchar_all(sc);

  /* 򤤤 */
  make_ochaire_metaword_all(sc);

  /* ñʸǤʸ򥫥СǤʤ顤ʸ˲ */
  bias_to_single_segment_metaword(sc);

  /* ʸʸϸ */
  bias_to_single_char_metaword(sc);
}
