/**************************************************************************
*   Copyright (C) 2003 by Hideki Ikemoto , (c)2004 by 421                 *
*   ikemo@wakaba.jp                                                       *
*                                                                         *
*   This program is free software; you can redistribute it and/or modify  *
*   it under the terms of the GNU General Public License as published by  *
*   the Free Software Foundation; either version 2 of the License, or     *
*   (at your option) any later version.                                   *
***************************************************************************/

/*--------------------------------------*/
/*  miscellaneous utilities for parsing */

#include <qmutex.h>
#include <qregexp.h>
#include <qdatetime.h>
#include <dom/html_document.h>
#include <dom/html_element.h>
#include <dom/dom_text.h>
#include <klocale.h>
#include <kdebug.h>

#include "qcp932codec.h"
#include "parsemisc.h"
#include "datinfo.h"  /* struct RESDAT is defined. */
#include "kita-utf8.h"
#include "kita-utf16.h"

#define KITA_RESDIGIT 4

using namespace Kita;


/*---------------------------------------------------*/

/* Text codec */

QCp932Codec* ParseMisc::m_qcpCodec = NULL;
QTextCodec* ParseMisc::m_utf8Codec = NULL;
QMutex ParseMisc:: m_codexMutex;

QString ParseMisc::qcpToUnicode( const QString& str ){

    QMutexLocker locker( & m_codexMutex ); /* QTextCodec is not reentrant. */

    if(!m_qcpCodec) m_qcpCodec = new QCp932Codec();

    return m_qcpCodec->toUnicode( str );
}


QString ParseMisc::utf8ToUnicode( const QString& str ){

    QMutexLocker locker( & m_codexMutex ); /* QTextCodec is not reentrant. */

    if(!m_utf8Codec) m_utf8Codec = QTextCodec::codecForName( "utf8" );

    return m_utf8Codec->toUnicode( str );
}



/*------------------------------------------------------------*/
/*------------------------------------------------------------*/

/* parsing functions                                          */



/*------------------------------------------*/
/*                                          */
/*      Parsing Engine for Title            */
/*                                          */
/*------------------------------------------*/

/*
  struct RESDAT resdat should be parsed by parseResDat before
  calling this function. struct RESDAT is defined in datinfo.h

  If mode = PARSEMODE_DOM, titlenode is DOM tree of title node.
  titletext is ignored.

  If mode = PARSEMODE_HTML, titletext is HTML text of title.
  DOM tree is not created. Both hdoc and titlenode are ignored.
  
  If mode = PARSEMODE_TEXT, titletext is plain text of title.
  DOM tree is not created. Both hdoc and titlenode are ignored.
*/

/* public */
void ParseMisc::parseTITLEdat(

    /* input */
    int mode,                /* mode */
    DOM::HTMLDocument& hdoc, /* root node of DOM document*/
    int num,                 /* number of res */
    bool showMailAddress,
    const RESDAT& resdat,    /* RESDAT is defined in datinfo.h */

    /* output */
    DOM::Element &titlenode, /* DOM tree of title */
    QString& titletext       /* HTML or plain text of title */

    ){

    /*-----------------------------*/
    /* init                        */
    /*-----------------------------*/

    if( !resdat.parsed ){
	titletext = QString::null;
	return;
    }

    const QString& str_name = resdat.name;
    const QString& str_address = resdat.address;
    const QString& str_id = resdat.id;
    const QString& str_dateId = resdat.dateId;
    const QDateTime& dt_dateTime = resdat.dateTime;
    
    unsigned int i;
    DOM::Element tmpelm;
    QString linkstr,linkstr2,linkurl,tmpstr;

    QString colonstr = utf8ToUnicode( KITAUTF8_COLON );
    QString colonnamestr = utf8ToUnicode( KITAUTF8_NAME );

    titletext = QString::null;

    
    /*-----------------------------*/    
    /* number                      */
    /*-----------------------------*/
    
    linkstr = QString( "%1" ).arg( num );
    linkurl = "#write"+linkstr;

    switch( mode ){
	
    case PARSEMODE_DOM:
	
	tmpelm = titlenode.appendChild( hdoc.createElement( "A" ) );
	{
	    /* set anchor id = number */
	    tmpelm.setAttribute( "href", linkurl );
	    tmpelm.appendChild( hdoc.createTextNode( linkstr ));
	}
	break;

    case PARSEMODE_HTML:
	titletext += "<a href=\"" + linkurl +"\">";
	titletext += linkstr;
	titletext +="</a>";
	break;

    case PARSEMODE_TEXT:
	titletext += linkstr;
	break;	
	
    }

    
    /*-----------------------------*/
    /* name & mail address         */
    /*-----------------------------*/
    
    DOM::Element namenode;
    linkurl = QString::null;

    /* parse name strings */
    parseBODYdatText(PARSEMODE_TEXT,str_name,linkstr);

    tmpstr = " " + colonnamestr;
    switch( mode ){
	
    case PARSEMODE_DOM:
	titlenode.appendChild( hdoc.createTextNode( tmpstr ) );
	namenode = titlenode.appendChild( hdoc.createElement( "B" ) );
	break;

    case PARSEMODE_HTML:
    case PARSEMODE_TEXT:
	titletext += tmpstr;
	break;	
    }

    /* show name with mail address, or show name only */
    if ( showMailAddress || str_address == QString::null ){

	const QChar *chpt = linkstr.unicode();
	unsigned int length = linkstr.length();

	unsigned int pos;
	int refNum[2];
	i = 0;

	bool ancChain = TRUE;
	/* ancChain is chain for anchor. For examle, if anchor "2"
	   appeared, ancChain is set to TRUE. Moreover, if next strings
	   are "=5", anchor for 5 is also set. Thus, we can obtain anchors
	   for strings "2=5" as follows:

	   <a href="#2">2</a><a href="#5">=5</a>
	*/
	
	/* show name */
	for(;;){

	    linkurl = QString::null;
	    linkstr2 = QString::null;

	    /* get strings & anchor for digits */
	    if(showMailAddress && ancChain){
		
		if( ( ancChain = parseResAnchor(chpt+i,length-i,linkstr2,refNum,pos) ) ) {

		    linkurl = QString("#%1").arg(refNum[0]);
		    if(refNum[1]) linkurl += QString("-%1").arg(refNum[1]);
		}

		i += pos;
	    }
	    
	    else{ /* get strings for non-digits */

		while( i < length ) linkstr2 += chpt[ i++ ];
	    }

	    if( linkstr2 != QString::null){

		switch(mode){

		case PARSEMODE_DOM:

		    if( linkurl != QString::null){ /* create anchor */
		
			tmpelm = namenode.appendChild( hdoc.createElement( "A" ) );
			{
			    tmpelm.setAttribute( "href", linkurl );
			    tmpelm.appendChild( hdoc.createTextNode( linkstr2  ) );
			}

		
		    } else { 
	
			tmpelm = namenode.appendChild( hdoc.createElement( "SPAN" ) );
			{
			    tmpelm.setAttribute( "style", "color: green" );
			    tmpelm.appendChild( hdoc.createTextNode( linkstr2 ) );
			}
		    }

		    break;

		case PARSEMODE_HTML:
		case PARSEMODE_TEXT:
	
		    if( mode == PARSEMODE_HTML
			&& linkurl != QString::null){ /* create anchor */
    
			titletext += "<a href=\"" + linkurl +"\">";
			titletext += linkstr2;
			titletext +="</a>";

		    }
		    else titletext += linkstr2;

		    break;
		}
	    }

	    if( i >= linkstr.length()) break;	    

	} /* for(;;) */


	/* show mail address */	
	switch(mode){

	case PARSEMODE_DOM:

	    if ( showMailAddress && str_address != QString::null ) {
		tmpstr = QString( " [" ) + str_address + "]";
		titlenode.appendChild( hdoc.createTextNode( tmpstr ) );
	    }

	    break;

	case PARSEMODE_HTML:
	case PARSEMODE_TEXT:

	    if ( showMailAddress && str_address != QString::null ) {
		tmpstr = QString( " [" ) + str_address + "]";
		titletext += tmpstr;
	    }
	
	    break;
	}
	
    } /* if ( showMailAddress || str_address == QString::null ) */

    /* don't show mail address (i.e. showMailAddress == FALSE) */
    else {

	linkurl = QString( "mailto:" ) + str_address;

	switch(mode){

	case PARSEMODE_DOM:
	    tmpelm = namenode.appendChild( hdoc.createElement( "A" ) );
	    {
		tmpelm.setAttribute( "href", linkurl );
		tmpelm.setAttribute( "title", str_address );
		tmpelm.appendChild( hdoc.createTextNode( linkstr ) );
	    }
	    break;

	case PARSEMODE_HTML:
	
	    titletext += "<a href=\"" + linkurl;
	    titletext += " title=\"" + str_address + "\">";
	    titletext += linkstr;
	    titletext +="</a>";

	    break;
	    
	case PARSEMODE_TEXT:
	    titletext += linkstr;
	    break;
	}
    }
    

    /*-----------------------------*/
    /* date & ID                   */
    /*-----------------------------*/
    
    if ( str_dateId != QString::null ) {

	/* show date & ID */
      tmpstr = colonstr + str_dateId;
    }
    else {

	/* show date only */
	QString weekstr;
	switch( dt_dateTime.date().dayOfWeek() ){
	case 1: weekstr = utf8ToUnicode( KITAUTF8_MONDAY ); break;
	case 2: weekstr = utf8ToUnicode( KITAUTF8_TUESDAY ); break;
	case 3: weekstr = utf8ToUnicode( KITAUTF8_WEDNESDAY ); break;
	case 4: weekstr = utf8ToUnicode( KITAUTF8_THURSDAY ); break;
	case 5: weekstr = utf8ToUnicode( KITAUTF8_FRIDAY ); break;
	case 6: weekstr = utf8ToUnicode( KITAUTF8_SATURDAY ); break;
	case 7: weekstr = utf8ToUnicode( KITAUTF8_SUNDAY ); break;
	}
	tmpstr = " " + colonstr
	    + dt_dateTime.toString( i18n( "yyyy/MM/dd" ) )
	    + weekstr
	    + dt_dateTime.toString( i18n( " hh:mm" ) ) +" ";
    }

    switch(mode){

    case PARSEMODE_DOM:
	titlenode.appendChild( hdoc.createTextNode( tmpstr ) );
	break;

    case PARSEMODE_HTML:
    case PARSEMODE_TEXT:
	titletext += tmpstr;
	break;

    }

    /* show ID */
    if ( str_id != QString::null){
	
	if ( str_id == "???" ) tmpstr = "ID:" + str_id;
	else  tmpstr = ":" + str_id;

	switch(mode){

	case PARSEMODE_DOM:

	    if(tmpstr.left(3) != "ID:"){
		tmpelm = titlenode.appendChild( hdoc.createElement( "A" ) );
		{
		    tmpelm.setAttribute( "href", "#idpop" + str_id );
		    tmpelm.appendChild( hdoc.createTextNode( "ID" ) );
		}
	    }
	    
	    titlenode.appendChild( hdoc.createTextNode( tmpstr ) );
	    break;

	case PARSEMODE_HTML:

	    if(tmpstr.left(3) != "ID:")
		titletext += "<a href=\"#idpop" + str_id + "\">ID</a>";
	    titletext += tmpstr;
	    break;
	    
	case PARSEMODE_TEXT:
	    if(tmpstr.left(3) != "ID:") titletext += "ID";
	    titletext += tmpstr;
	    break;
	}
    }
    
}


/*-----------------------------------------------------*/
/* For convenience.
   mode can be set to PARSEMODE_HTML or PARSEMODE_TEXT */ /* public */
void ParseMisc::parseTITLEdatText(

    /* input */
    int mode,                /* mode = PARSEMODE_HTML or PARSEMODE_TEXT */
    int num,
    bool showMailAddress,
    const RESDAT& resdat,          /* RESDAT is defined in datinfo.h */

    /* output */
    QString& titletext       /* HTML or plain text of title */
    
    ){
    
    /* dummy */
    DOM::HTMLDocument hdoc;
    DOM::Element titlenode;

    parseTITLEdat(mode,hdoc,num,showMailAddress,resdat,titlenode,titletext);
}

    



/*------------------------------------------*/
/*                                          */
/*      Parsing Engine for Body             */
/*                                          */
/*------------------------------------------*/

/*
  If mode = PARSEMODE_DOM, bodynode is DOM tree of body.
  bodytext is ignored.

  If mode = PARSEMODE_HTML, bodytext is HTML text of body.
  DOM tree is not created. Both hdoc and bodynode are ignored.
  
  If mode = PARSEMODE_TEXT, bodytext is plain text of body.
  DOM tree is not created. Both hdoc and bodynode are ignored.
*/

/* public */
void ParseMisc::parseBODYdat(

    /* input */
    int mode,                /* mode */
    const QString &rawStr,   /* raw strings of body text */
    DOM::HTMLDocument& hdoc, /* root node of DOM document*/
    bool showAA,             /* show AA (for KDE3.1x) */
    
    /* output */
    DOM::Element &bodynode, /* DOM tree of body */
    QString& bodytext       /* HTML or plain text of body */

    ){

    /*-----------------------------------------*/
    /* init                                    */
    
    unsigned int i, i2, index, pos, length = rawStr.length();
    DOM::Element tmpelm;
    QString linkstr, linkurl;
	    
    const QChar *chpt = rawStr.unicode();
    QString lineStr = QString::null;
    bodytext = QString::null;
    
    bool ancChain = FALSE;
    /* ancChain is chain for anchor. For examle, if anchor "&gt;2"
       appeared, ancChain is set to TRUE. Moreover, if next strings
       are "=5", anchor for 5 is also set. Thus, we can obtain anchors
       for strings "&gt;2=5" as follows:

       <a href="#2">&gt;2</a><a href="#5">=5</a>
    */


    /*-----------------------------------------*/
    
    for ( i = index = 0 ; i < length ; i++ ) {

        switch ( chpt[ i ].unicode() ) {

        case '<':

	    /* " <br> " */
	    if (chpt[i + 1] == 'b' && chpt[i + 2] == 'r' && chpt[i + 3]  == '>'){

		i2 = i - index;
		if(i  > 0 && chpt[i-1] == ' ') i2--; /* remove space before <br> */
		lineStr += rawStr.mid( index, i2 );

		switch ( mode ){

		case PARSEMODE_DOM:
		
		    /* add BR node */
		    bodynode.appendChild( hdoc.createTextNode( lineStr ) );
		    bodynode.appendChild( hdoc.createElement( "BR" ) );

		    /* show Ascii Art (for KDE3.1*) */
		    if ( showAA ) {

			/* put the span node after BR node */
			tmpelm = bodynode.appendChild( hdoc.createElement( "SPAN" ) );
			{
			    tmpelm.setAttribute( "style", "color: white" );
			    tmpelm.appendChild( hdoc.createTextNode( "" ) );
			}
		    }
		    
		    break;
		    
		case PARSEMODE_HTML:

		    bodytext += lineStr;
		    bodytext += "<br>";
		    /* show Ascii Art (for KDE3.1*) */
		    if ( showAA ) {
			bodytext += "<span style=\"color: white\"></span>";
		    }
		    break;
		    
		case PARSEMODE_TEXT:

		    bodytext += lineStr;
		    bodytext += '\n';

		    break;
		}

		index = i + 4;
		if( chpt[index] == ' ') index++; /* remove space after <br> */
		i = index - 1;
		lineStr = QString::null;
		ancChain = FALSE;
	    }

	    /*----------------------------------------*/
	    
	    /* remove HTML tags <[^>]*>  */
	    else{
		lineStr += rawStr.mid( index, i - index );
		while( chpt[i] != '>' && i < length ) i++;
		index = i+1;
	    }

	    break;

            /*----------------------------------------*/

	case 'h': /* "http://" or "ttp://" or "tp:" */
	case 't':

	    if( mode != PARSEMODE_TEXT
		&& parseLink( chpt + i, length - i,linkstr, linkurl, pos) ){

		lineStr += rawStr.mid( index, i - index );
		
		switch ( mode ){

		case PARSEMODE_DOM:

		    /* create A node */
		    bodynode.appendChild( hdoc.createTextNode( lineStr ) );

		    tmpelm = bodynode.appendChild( hdoc.createElement( "A" ) );
		    {
			tmpelm.setAttribute( "href", linkurl );
			tmpelm.appendChild( hdoc.createTextNode( linkurl ));
		    }

		    break;

		case PARSEMODE_HTML:
		    bodytext += lineStr;
		    bodytext += "<a href=\"" + linkurl +"\">";
		    bodytext += linkstr;
		    bodytext +="</a>";
		    break;
		}

		index = i + pos;
		i = index -1;
		lineStr = QString::null;
            }
 
	    break;

            /*----------------------------------*/

        case '&':
	    
	    /* &gt; */
            if ( ( mode == PARSEMODE_DOM || mode == PARSEMODE_HTML )
		 && chpt[i + 1] == 'g' && chpt[i + 2] == 't' && chpt[i + 3]  == ';')
                ancChain = createResAnchor( mode, rawStr, hdoc, bodynode, bodytext, chpt, i, index, lineStr);
	    /* special char */
	    else if ( mode == PARSEMODE_DOM || mode == PARSEMODE_TEXT ){

		QString tmpstr;
		tmpstr = parseSpecialChar(chpt + i, pos);

		if ( tmpstr != QString::null ){
		    lineStr += rawStr.mid( index, i - index ) + tmpstr;
		    index = i + pos;
		    i = index - 1;
		}
	    }

            break;

            /*----------------------------------------*/
	    
            /* unicode '>'  */
        case UTF16_BRACKET:
	    if( mode != PARSEMODE_TEXT )
                ancChain = createResAnchor( mode, rawStr, hdoc, bodynode, bodytext, chpt, i, index, lineStr);
            break;
	    
            /*----------------------------------*/

	default:
	    if(mode != PARSEMODE_TEXT && ancChain)
                ancChain = createResAnchor( mode, rawStr, hdoc, bodynode, bodytext, chpt, i, index, lineStr);
        }
    }


    /*---------------------------*/

    lineStr += rawStr.mid( index );
	
    switch ( mode ){

    case PARSEMODE_DOM:
	bodynode.appendChild( hdoc.createTextNode( lineStr ) );
	break;

    case PARSEMODE_HTML:
    case PARSEMODE_TEXT:	
	bodytext += lineStr;
	break;
    }
    
}


/*-----------------------------------------------------*/
/* For convenience.
   mode can be set to PARSEMODE_HTML or PARSEMODE_TEXT */ /* public */
void ParseMisc::parseBODYdatText(

    /* input */
    int mode,                /* mode = PARSEMODE_HTML or PARSEMODE_TEXT */
    const QString &rawStr,   /* raw strings of body text */
    
    /* output */
    QString& bodytext       /* HTML or plain text of body */

    ){

    /* dummy */
    DOM::HTMLDocument hdoc;
    DOM::Element bodynode;

    parseBODYdat(mode,rawStr,hdoc,FALSE,bodynode,bodytext);
}



/*--------------------------------------------*/
/* get HTML from RESDAT                       */
/* 
  resdat should be parsed by parseResDat before
  calling this function.
  
  struct RESDAT is defined in datinfo.h .     */ /* public */
QString ParseMisc::ResDatToHtml(const RESDAT& resdat, int num, bool showAddr )
{
    QString result,titletext,bodytext;
    
    ParseMisc::parseTITLEdatText(PARSEMODE_HTML,num,showAddr,resdat,titletext);
    ParseMisc::parseBODYdatText(PARSEMODE_HTML,resdat.body,bodytext);
 
    result = QString( "<dl><dt>" );
    result += titletext;
    result += "</dt><dd>";
    result += bodytext;
    result += "<br/><br/></dd></dl>";
    
    return result;
}



/*--------------------------------------------*/
/* get HTML from raw data                     */ /* public */

QString ParseMisc::DatToHtml(const QString& rawData, int num, bool showAddr )
{
    QString tmpstr;
    RESDAT resdat;

    resdat.linestr = rawData;
    resdat.set = TRUE;
    resdat.parsed = FALSE;
    parseResDat(resdat,tmpstr);

    return ResDatToHtml(resdat,num,showAddr);
}




/*-------------------------------------------------------*/
/*-------------------------------------------------------*/

/* public utils                                          */


/*------------------------------------------*/
/* parsing function for anchor (>>digits)   */
/*------------------------------------------*/ /* public */

/* This fuction parses res anchor.                          

   For example, if cdat = "&gt;12-20", then

   linkstr = ">12-20",
   refNum[0] = 12,
   refNum[1] = 20,
   pos (= length of cdat ) = 9,
   ret = TRUE;

*/

bool ParseMisc::parseResAnchor(

    /* input */
    const QChar *cdat, const unsigned int length,

    /* output */
    QString& linkstr, int* refNum, unsigned int& pos ){

    struct LocalFunc {
        static bool isHYPHEN( unsigned short c )
        {

            /* UTF-16 */
            if ( c == '-'
                    || ( c >= 0x2010 && c <= 0x2015 )
                    || ( c == 0x2212 )
                    || ( c == 0xFF0D )      /* UTF8: 0xEFBC8D */
               ) {
                return TRUE;
            }

            return FALSE;
        }
    };

    bool ret = FALSE;
    int i;

    if( length == 0 ) return FALSE;
    
    linkstr = QString::null;
    refNum[0] = 0;
    refNum[1] = 0;    
    pos = 0;

    /* check '>' twice */
    for ( i = 0;i < 2;i++ ) {
	
        if ( cdat[ pos ].unicode() == UTF16_BRACKET ) {
            linkstr += cdat[ pos ];
            pos++;
        } else if ( cdat[ pos ] == '&' && cdat[ pos + 1 ] == 'g'  /* &gt; */
                    && cdat[ pos + 2 ] == 't' && cdat[ pos + 3 ] == ';' ) {
            linkstr += ">";
            pos += 4;
        }

    }

    /* check ',' */
    if ( !pos ) {
        if ( cdat[ pos ] == ',' || cdat[ pos ].unicode() == UTF16_COMMA ) {
            linkstr += ",";
            pos ++;
        }
    }

    /* check '=' */
    if ( !pos ) {
        if ( cdat[ pos ] == '=' || cdat[ pos ].unicode() == UTF16_EQ ) {
            linkstr += "=";
            pos ++;
        }
    }

    /* check digits */
    int hyphen = 0;

	for ( i = 0 ; i < KITA_RESDIGIT + 1 && pos < length ; i++, pos++ ) {

	    unsigned short c = cdat[ pos ].unicode();

	    if ( ( c < UTF16_0 || c > UTF16_9 )
		 && ( c < '0' || c > '9' )
		 && ( !LocalFunc::isHYPHEN( c )
		      || ( i == 0 && LocalFunc::isHYPHEN( c ) )
		      || ( hyphen && LocalFunc::isHYPHEN( c ) ) )
		) break;

	    linkstr += cdat[ pos ];

	    if ( LocalFunc::isHYPHEN( c ) ) {
		hyphen = 1;
		i = -1;
	    } else {
		if ( c >= UTF16_0 ) c = '0' + cdat[ pos ].unicode() - UTF16_0;
		refNum[hyphen] *= 10;
		refNum[hyphen] += c - '0';
	    }

	    ret = TRUE;
	}

    return ret;
}


/*-----------------------------------------------------*/
/* parsing function for special char (such as &hearts; */
/*-----------------------------------------------------*/ /* public */

/* For example, if cdat = "&amp;", then

   pos (= length of cdat) = 5,
   retstr = "&".
*/

QString ParseMisc::parseSpecialChar(

    /* input */
    const QChar *cdat,  

    /* output */
    unsigned int& pos ){

    struct LocalFunc {
        static int isEqual( const QChar *cdat, const QString& str)
	    {
		int i = 0;
		while ( str.at( i ) != '\0' ) {
		    if ( *cdat != str.at( i ) ) return 0;
		    cdat++;i++;
		}
		return i;
	    }
    };

    QString retstr = QString::null;

    if ( ( pos = LocalFunc::isEqual( cdat , "&gt;" ) ) ) retstr = ">";
    else if ( ( pos = LocalFunc::isEqual( cdat , "&lt;" ) ) ) retstr = "<";
    else if ( ( pos = LocalFunc::isEqual( cdat , "&nbsp;" ) ) ) retstr = " ";
    else if ( ( pos = LocalFunc::isEqual( cdat , "&amp;" ) ) ) retstr = "&";
    else if ( ( pos = LocalFunc::isEqual( cdat , "&quot;" ) ) ) retstr = "\"";

    else if ( ( pos = LocalFunc::isEqual( cdat , "&hearts;" ) ) )
	retstr = utf8ToUnicode( KITAUTF8_HEART );

    else if ( ( pos = LocalFunc::isEqual( cdat , "&diams;" ) ) )
	retstr = utf8ToUnicode( KITAUTF8_DIA );

    else if ( ( pos = LocalFunc::isEqual( cdat , "&clubs;" ) ) )
	retstr = utf8ToUnicode( KITAUTF8_CLUB );

    else if ( ( pos = LocalFunc::isEqual( cdat , "&spades;" ) ) )
	retstr = utf8ToUnicode( KITAUTF8_SPADE );

    return retstr;
}



/* copied from comment.cpp  */ /* public */
void ParseMisc::parseDateId( const QString& str ,
                               QString &idstr,
                               QString &dateIdstr, QDateTime &dTime )

{
    QRegExp regexp( "(\\d\\d(\\d\\d)?)/(\\d\\d)/(\\d\\d) (\\d\\d):(\\d\\d)(:\\d\\d)?( ID:(.*))?" );

    if ( regexp.search( str ) == -1 ) {
        dateIdstr = str;
        return ;
    }

    int year = regexp.cap( 1 ).toInt();
    if ( year >= 70 && year < 100 ) {
        year += 1900;
    } else if ( year < 70 ) {
        year += 2000;
    }

    QDateTime
    dateTime( QDate( year, regexp.cap( 3 ).toInt(), regexp.cap( 4 ).toInt() ),
              QTime( regexp.cap( 5 ).toInt(), regexp.cap( 6 ).toInt(), regexp.cap( 7 ).toInt() ) );

    dTime = dateTime;
    idstr = regexp.cap( 9 );
}


/*--------------------------------------*/
/* split raw data, then get ID, name,
   date, body text, subject, etc.

   (input) "resdat.linestr","resdat.set"
   
   "resdat.linestr" is raw data, and
   "resdat.set" should be set to TRUE
   before calling this.

   struct RESDAT is defined in datinfo.h.
   see also datinfo.h.                   */ /* public */
bool ParseMisc::parseResDat(RESDAT& resdat, QString& subject)
{
    if( resdat.parsed ) return TRUE;
    if( !resdat.set ) return FALSE;

    resdat.parsed = TRUE;
    
    /* split dat */
    QString idstr = "none";
    QStringList list = QStringList::split( "<>", resdat.linestr, true );

    if ( list.size() == 5 ) {

	resdat.broken = FALSE;

	resdat.name = list[ 0 ];
	parseBODYdatText( PARSEMODE_TEXT, resdat.name, resdat.parsedName );
	resdat.address = list[ 1 ];
	parseDateId(list[ 2 ],resdat.id,resdat.dateId,resdat.dateTime );
	resdat.body = list[ 3 ].mid( 1 ); /* remove space after <> */

	/* get subject */
	if (list[ 4 ] != QString::null ) {
	    subject = list[ 4 ];
	}

    }
    else resdat.broken = TRUE;

    return TRUE;
}



/*-----------------------------*/
/* parsing function for link   */

/* For example,

   cdat = "ttp://foo.com",

   then

   linkstr = "ttp://foo.com",
   linkurl = "http://foo.com",
   pos (= length of cdat) = 13,

   and return TRUE.
                                */ /* public */
bool ParseMisc::parseLink(

    /* input */
    const QChar *cdat, const unsigned int length,

    /* output */
    QString& linkstr, QString& linkurl, unsigned int& pos
    ){

    /*-----------------------------*/
    
    linkstr = QString::null;
    linkurl = QString::null;
    
    int i = isEqual( cdat  , "http://" );
    if(!i) i = isEqual( cdat  , "ttp://" );
    if(!i) i = isEqual( cdat  , "tp://" );
    if(!i) return FALSE;

    linkurl = "http://";
    pos = 0;
    
    while ( cdat[ pos ] != '/' && pos < length ) linkstr += cdat[ pos++ ];
    linkstr += "//"; pos += 2;
    while ( cdat[ pos ] >= '!' && cdat[ pos ] <= '~' &&
	    cdat[ pos ] != ' ' && cdat[ pos ] != '<' && cdat[ pos ] != '>'
	    && pos < length ){
	linkurl += cdat[ pos ];
	linkstr += cdat[ pos++ ];
    }
    if ( pos >= length ) return FALSE;

    return TRUE;
}



/*----------------------------------------------------------*/
/* parse URL to get correct URL of dat file.

(ex.1)

http://pc5.2ch.net/linux/dat/1069738960.dat#20-30

->

newURL = http://pc5.2ch.net/linux/dat/1069738960.dat
retstr = 20-30

(ex.2)


http://pc5.2ch.net/test/read.cgi/linux/1069738960/-100

->

newURL = http://pc5.2ch.net/linux/dat/1069738960.dat
refstr = 1-100                                                */ /* public */

KURL ParseMisc::parseURL( const KURL& url , QString& refstr )
{
    KURL newURL = url;
    refstr = QString::null;
    
    if ( url.path().contains( "/test/read.cgi" ) ) {
        QString tmp = url.path().section( "/test/read.cgi", 1 );

        QString newPath = QString( "/%1/dat/%2.dat" )
                          .arg( tmp.section( '/', 1, 1 ) )
                          .arg( tmp.section( '/', 2, 2 ) );
        newURL.setPath( newPath );

        QString refBase = tmp.section( '/', 3 );
        if ( ! refBase.isEmpty() ) {

            if ( refBase.at( 0 ) == '-' ) refstr = "1" + refBase;
            else refstr = refBase;
        }
    }
    else
    {
	if( newURL.hasRef() ) 
	{
	    refstr = newURL.ref();
	    newURL = newURL.protocol() + "://" + newURL.host() + newURL.path();
	}
    }

    kdDebug() << "newURL: " << newURL.url() << endl;
    return newURL;
}


/* for convenience */
KURL ParseMisc::parseURLonly( const KURL& url )
{
    QString refstr;

    return parseURL( url, refstr );
}


/*--------------------------------------------*/
/* private functions                          */


/* if cdat == str, return str.length() */ /* private */
int ParseMisc::isEqual( const QChar *cdat, const QString& str)
{
    int i = 0;
    while ( str.at( i ) != '\0' ) {
	if ( *cdat != str.at( i ) ) return 0;
	cdat++;i++;
    }
    return i;
}





/*------------------------*/
/* create node of anchor  */

/* This function is called from parseBODYdat
   internally. So, see also parseBODYdat */   /* private */
bool ParseMisc::createResAnchor(

    /* input */
    int mode,
    const QString &rawStr,
    DOM::HTMLDocument& hdoc ,

    /* output */
    DOM::Element &bodynode,
    QString& bodytext,

    /* internal variables */
    /* They are the same variables that ara used in parseBODYdat. */
    const QChar *chpt, unsigned int &i, unsigned int &index, QString& lineStr
    )
{
    /*-----------------------*/

    QString linkstr,linkurl;
    DOM::Element tmpelm;
    int refNum[2];
    unsigned int pos;
    unsigned int length = rawStr.length();

    /* parse anchor */
    if( !parseResAnchor(chpt+i,length-i,linkstr,refNum,pos) ){
	lineStr += rawStr.mid( index, i - index ) + linkstr;
	index = i + pos;
	i = index - 1;
	return FALSE;
    }

    /* create anchor */
    lineStr += rawStr.mid( index, i - index );
    linkurl = QString("#%1").arg(refNum[0]);
    if(refNum[1]) linkurl += QString("-%1").arg(refNum[1]);
	    
    switch ( mode ){

    case PARSEMODE_DOM:
    
	/* create 'A' element */
	bodynode.appendChild( hdoc.createTextNode( lineStr ) );
    
	tmpelm = bodynode.appendChild( hdoc.createElement( "A" ) );
	{
	    tmpelm.setAttribute( "href", linkurl );
	    tmpelm.appendChild( hdoc.createTextNode( linkstr ) );
	}

	break;

    case PARSEMODE_HTML:
	bodytext += lineStr;
	bodytext += "<a href=\"" + linkurl +"\">";
	bodytext += linkstr;
	bodytext +="</a>";
	break;	
    }
    
    index = i + pos;
    i = index - 1;
    lineStr = QString::null;
	
    return TRUE;
}



