/* Copyright (c) 1991-2002 Doshita Lab. Speech Group, Kyoto University */
/* Copyright (c) 2000-2002 Speech and Acoustics Processing Lab., NAIST */
/*   All rights reserved   */

/* voca_load_htkdict.c --- read in vocabulary data */

/* $Id: voca_load_htkdict.c,v 1.7 2002/10/15 07:17:46 ri Exp $ */

/* format is HTK Dictionary format */

/* word-internal context dependency is considered in this function */

#include <sent/stddefs.h>
#include <sent/vocabulary.h>
#include <sent/htk_hmm.h>

/* 
 * dictinary format:
 * 
 * 1 words per line.
 * 
 * fields: GrammarEntry [OutputString] phone1 phone2 ....
 * 
 *     GrammarEntry
 *		   (for N-gram)
 *		   word name in N-gram
 *                 (for DFA)
 *                 terminal symbol ID
 *
 *     [OutputString]
 *		   String to output when the word is recognized.
 *
 *     {OutputString}
 *		   String to output when the word is recognized.
 *                 Also specifies that this word is transparent
 * 
 *     phone1 phon2 ....
 *		   sequence of logical HMM name (normally phoneme)
 *                 to express the pronunciation
 */

static char trbuf[3][20];
static char chbuf[30];
static char nophone[1];
static int  trp_l, trp, trp_r;

/* return string of triphone name composed from last 3 call */
/* returns NULL on end */
char *
cycle_triphone(char *p)
{
  int i;
  
  if (p == NULL) {		/* initialize */
    nophone[0]='\0';
    for(i=0;i<3;i++) trbuf[i][0] = '\0';
    trp_l = 0;
    trp   = 1;
    trp_r = 2;
    return NULL;
  }

  strcpy(trbuf[trp_r],p);

  chbuf[0]='\0';
  if (trbuf[trp_l][0] != '\0') {
    strcat(chbuf,trbuf[trp_l]);
    strcat(chbuf,HMM_LC_DLIM);
  }
  if (trbuf[trp][0] == '\0') {
    i = trp_l;
    trp_l = trp;
    trp = trp_r;
    trp_r = i;
    return NULL;
  }
  strcat(chbuf, trbuf[trp]);
  if (trbuf[trp_r][0] != '\0') {
    strcat(chbuf,HMM_RC_DLIM);
    strcat(chbuf,trbuf[trp_r]);
  }
  i = trp_l;
  trp_l = trp;
  trp = trp_r;
  trp_r = i;

  return(chbuf);
}
char *
cycle_triphone_flush()
{
  return(cycle_triphone(nophone));
}


/* parse winfo and set maximum state length */
static void
set_maxwn(WORD_INFO *winfo)
{
  int w,p,n;
  int maxwn;

  maxwn = 0;
  for (w=0;w<winfo->num;w++) {
    n = 0;
    for (p=0;p<winfo->wlen[w];p++) {
      n += hmm_logical_state_num(winfo->wseq[w][p]) - 2;
    }
    if (maxwn < n) maxwn = n;
  }
  winfo->maxwn = maxwn;
}

/* parse winfo and set maximum word length */
static void
set_maxwlen(WORD_INFO *winfo)
{
  WORD_ID w;
  int maxwlen;
  maxwlen = 0;
  for(w=0;w<winfo->num;w++) {
    if (maxwlen < winfo->wlen[w]) maxwlen = winfo->wlen[w];
  }
  winfo->maxwlen = maxwlen;
}


#define PHONEMELEN_STEP  10	/* malloc base */

/* read in vocabulary file */
boolean				/* TRUE on success, FALSE on any error word */
voca_load_htkdict(
     FILE *fp,
     WORD_INFO *winfo,
     HTK_HMM_INFO *hmminfo,	/* if NULL, phonemes are ignored */
     boolean ignore_tri_conv)	/* TRUE if convert to triphone should be ignored */
{
  boolean ok_flag = TRUE;
  WORD_ID vnum;
  static char buf[MAXLINELEN];
  boolean do_conv = FALSE;

  if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
    do_conv = TRUE;

  winfo_init(winfo);

  vnum = 0;
  while (getl(buf, sizeof(buf), fp) != NULL) {
    if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, ignore_tri_conv, do_conv, &ok_flag) == FALSE) break;
    vnum++;
    if (vnum >= winfo->maxnum) winfo_expand(winfo);
  }
  winfo->num = vnum;

  /* compute maxwn */
  set_maxwn(winfo);
  set_maxwlen(winfo);

  return(ok_flag);
}


/* read in vocabulary file (file descriptor version) */
boolean				/* TRUE on success, FALSE on any error word */
voca_load_htkdict_fd(
     int fd,
     WORD_INFO *winfo,
     HTK_HMM_INFO *hmminfo,	/* if NULL, phonemes are ignored */
     boolean ignore_tri_conv)	/* TRUE if convert to triphone should be ignored */
{
  boolean ok_flag = TRUE;
  WORD_ID vnum;
  static char buf[MAXLINELEN];
  boolean do_conv = FALSE;

  if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
    do_conv = TRUE;

  winfo_init(winfo);

  vnum = 0;
  while(getl_fd(buf, MAXLINELEN, fd) != NULL) {
    if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, ignore_tri_conv, do_conv, &ok_flag) == FALSE) break;
    vnum++;
    if (vnum >= winfo->maxnum) winfo_expand(winfo);
  }
  winfo->num = vnum;

  /* compute maxwn */
  set_maxwn(winfo);
  set_maxwlen(winfo);

  return(ok_flag);
}

boolean
voca_load_htkdict_line(char *buf, int vnum,
		       WORD_INFO *winfo,
		       HTK_HMM_INFO *hmminfo,
		       boolean ignore_tri_conv,
		       boolean do_conv,
		       boolean *ok_flag)
{
  char *ptmp, *lp = NULL, *p;
  static char cbuf[50];
  int tmpmaxlen, len;
  HMM_Logical **tmpwseq;
  HMM_Logical *tmplg;

  if (strmatch(buf, "DICEND")) return FALSE;
  
  /* GrammarEntry */
  if ((ptmp = mystrtok(buf, " \t\n")) == NULL) {
    j_printerr("line %d: corrupted data\n", vnum+1);
    *ok_flag = FALSE;
    return TRUE;
  }
  winfo->wname[vnum] = strcpy((char *)mymalloc(strlen(ptmp)+1), ptmp);

  /* OutputString */
  if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
    j_printerr("line %d: corrupted data\n", vnum+1);
    *ok_flag = FALSE;
    free(winfo->wname[vnum]);
    return TRUE;
  }
  switch(ptmp[0]) {
  case '[':			/* not transparent word */
    winfo->is_transparent[vnum] = FALSE;
    ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0);
    break;
  case '{':			/* transparent word */
    winfo->is_transparent[vnum] = TRUE;
    ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0);
    break;
  default:
    j_printerr("line %d: word %s has no output string\n", vnum+1, winfo->wname[vnum]);
    *ok_flag = FALSE;
    free(winfo->wname[vnum]);
    return TRUE;
  }
  if (ptmp == NULL) {
    j_printerr("line %d: corrupted data\n", vnum+1);
    *ok_flag = FALSE;
    free(winfo->wname[vnum]);
    return TRUE;
  }
  winfo->woutput[vnum] = strcpy((char *)mymalloc(strlen(ptmp)+1), ptmp);
    
  /* phoneme sequence */
  if (hmminfo == NULL) {
    /* don't read */
    winfo->wseq[vnum] = NULL;
    winfo->wlen[vnum] = 0;
  } else {
    tmpmaxlen = PHONEMELEN_STEP;
    tmpwseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * tmpmaxlen);
    len = 0;
      
    if (do_conv) {
      /* convert phoneme to triphone expression (word-internal) */
      cycle_triphone(NULL);
      if ((lp = mystrtok(NULL, " \t\n")) == NULL) {
	j_printerr("line %d: word %s has no phoneme.\n", vnum+1, winfo->wname[vnum]);
	*ok_flag = FALSE;
	free(winfo->wname[vnum]);
	free(winfo->woutput[vnum]);
	return TRUE;
      }
      cycle_triphone(lp);
    }

    for (;;) {
      if (do_conv) {
/*	if (lp != NULL) j_printf(" %d%s",len,lp);*/
	if (lp != NULL) lp = mystrtok(NULL, " \t\n");
	if (lp != NULL) p = cycle_triphone(lp);
	else p = cycle_triphone_flush();
      } else {
	p = mystrtok(NULL, " \t\n");
      }
      if (p == NULL) break;

      /* both defined/pseudo phone is allowed */
      tmplg = htk_hmmdata_lookup_logical(hmminfo, p);
      if (tmplg == NULL && do_conv) {
	/* fall to (pseudo-)monophone */
	tmplg = htk_hmmdata_lookup_logical(hmminfo, center_name(p,cbuf));
	if (tmplg != NULL) {
	  /* use monophone instead */
	  if (len == 0) {
	    j_printerr("Warning: line %d: no triphone matches \"*-%s\",", vnum+1, p);
	  } else if (lp == NULL) {
	    j_printerr("Warning: line %d: no triphone matches \"%s+*\",", vnum+1, p);
	  } else {
	    j_printerr("Warning: line %d: word-internal triphone \"%s\" is missing,\n", vnum+1, p);
	  }
	  if (tmplg->is_pseudo) {
	    j_printerr(" use pseudo monophone \"%s\"\n", cbuf);
	  } else {
	    j_printerr(" use monophone \"%s\"\n", cbuf);
	  }
	}
      }
      if (tmplg == NULL) {	/* not found */
	if (do_conv) {
	  j_printerr("line %d: logical phone for both \"%s\" and \"%s\" is missing\n", vnum+1, p, cbuf);
	} else {
	  j_printerr("line %d: logical phone \"%s\" not found\n", vnum+1, p);
	}
	*ok_flag = FALSE;
	free(winfo->wname[vnum]);
	free(winfo->woutput[vnum]);
	return TRUE;
      }

      tmpwseq[len] = tmplg;
      len++;
      if (len >= tmpmaxlen) {
	/* expand wseq area by PHONEMELEN_STEP */
	tmpmaxlen += PHONEMELEN_STEP;
	tmpwseq = (HMM_Logical **)myrealloc(tmpwseq, sizeof(HMM_Logical *) * tmpmaxlen);
      }
    }
    if (len == 0) {
      j_printerr("line %d: no phone specified.\n", vnum+1);
      *ok_flag = FALSE;
      free(winfo->wname[vnum]);
      free(winfo->woutput[vnum]);
      return TRUE;
    }
    winfo->wseq[vnum] = tmpwseq;
    winfo->wlen[vnum] = len;
  }
  
  return(TRUE);
}


/* convert monophone dictionary to word-internal triphone */
boolean
voca_mono2tri(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo)
{
  WORD_ID w;
  int ph;
  char *p;
  HMM_Logical *tmplg;
  boolean ok_flag = TRUE;
  
  for (w=0;w<winfo->num;w++) {
    cycle_triphone(NULL);
    cycle_triphone(winfo->wseq[w][0]->name);

    for (ph = 0; ph < winfo->wlen[w] ; ph++) {
      if (ph == winfo->wlen[w] - 1) {
	p = cycle_triphone_flush();
      } else {
	p = cycle_triphone(winfo->wseq[w][ph + 1]->name);
      }
      if ((tmplg = htk_hmmdata_lookup_logical(hmminfo, p)) == NULL) {
	j_printerr("voca_mono2tri: word \"%s[%s]\"(id=%d): HMM \"%s\" not found\n", winfo->wname[w], winfo->woutput[w], w, p);
	ok_flag = FALSE;
	continue;
      }
      winfo->wseq[w][ph] = tmplg;
    }
  }
  return (ok_flag);
}

/* append whole winfo to other */
/* assume both use the same hmminfo */
void				/* TRUE on success, FALSE on any error word */
voca_append(
	    WORD_INFO *dstinfo,	/* append to this word_info */
	    WORD_INFO *srcinfo,	/* append this to dst */
	    int coffset,	/* category ID offset */
	    int woffset)	/* word ID offset */
{
  WORD_ID n, w;
  int i;
  static char buf[MAXLINELEN];
  boolean do_conv = FALSE;

  n = woffset;
  for(w=0;w<srcinfo->num;w++) {
    /* copy data */
    dstinfo->wlen[n] = srcinfo->wlen[w];
    dstinfo->wname[n] = strcpy((char *)mymalloc(strlen(srcinfo->wname[w])+1), srcinfo->wname[w]);
    dstinfo->woutput[n] = strcpy((char *)mymalloc(strlen(srcinfo->woutput[w])+1), srcinfo->woutput[w]);
    dstinfo->wseq[n] = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * srcinfo->wlen[w]);
    for(i=0;i<srcinfo->wlen[w];i++) {
      dstinfo->wseq[n][i] = srcinfo->wseq[w][i];
    }
    dstinfo->is_transparent[n] = srcinfo->is_transparent[w];
    /* offset category ID by coffset */
    dstinfo->wton[n] = srcinfo->wton[w] + coffset;
    
    n++;
    if (n >= dstinfo->maxnum) winfo_expand(dstinfo);
  }
  dstinfo->num = n;

  /* compute maxwn */
  set_maxwn(dstinfo);
  set_maxwlen(dstinfo);
}

