/* -*- c++ -*- */
#include "formatter.h"
#include "exception.h"

#ifdef USE_XERCESC
#include <xercesc/util/PlatformUtils.hpp> /* {Xerces-C++2.2} */
#include <xercesc/util/XMLString.hpp>
#endif

using namespace aka2;

namespace {
  struct escape_entry {
    uchar_t uchar_;
    uchar_t escaped_[16];
  };

  const escape_entry escape_table[] = {
    {'"', {'&', 'q', 'u', 'o', 't', ';', 0}},
    {'&', {'&', 'a', 'm', 'p', ';',      0}},
    {'<', {'&', 'l', 't', ';',           0}},
    {0,   {                              0}}
  };

#ifdef __USING_EUC__
  const int default_encoding_ = babel::base_encoding::euc;
#endif

#ifdef __USING_SJIS__
  const int default_encoding_ = babel::base_encoding::sjis;
#endif


#ifdef __LITTLE_ENDIAN_COMPUTER__
  static const bom utf16_bom = {{ '\xff', '\xfe' }, 2};
  //static const bom utf32_bom = {{ 0xff, 0xfe, 0, 0 }, 4};
#endif

#ifdef __BIG_ENDIAN_COMPUTER__
  static const bom utf16_bom = {{ '\xfe', '\xff' }, 2};
  //static const bom utf32_bom = {{ 0, 0, 0xfe, 0xff }, 4};
#endif

  const bom* check_bom(const std::string &encoding) {
    if ((stricmp("utf-16", encoding.c_str()) == 0) ||
      	(stricmp("utf16", encoding.c_str()) == 0)) {
      return &utf16_bom;
    }
    else
      return 0;
  }
  

  /**
   * Encoding name entries.
   */

  const char *euc_names[] = {
    "EUC-JP",
    "EUCJP",
    0
  };

  const char *jis_names[] = {
    "JIS",
    "ISO-2022-JP",
    "ISO2022-JP",
    "ISO-2022JP",
    "ISO2022JP",
    0
  };

  const char *sjis_names[] = {
    "ShiftJIS",
    "SJIS",
    "Shift_JIS",
    "Shift-JIS",
    0
  };

  const char* utf16_names[] = {
    "UTF-16",
    "UTF16",
    0
  };

  const char* utf16be_names[] = {
    "UTF-16BE",
    "UTF16BE",
    0
  };

  const char* utf16le_names[] = {
    "UTF-16LE",
    "UTF16LE",
    0
  };

  const char* utf8_names[] = {
    "UTF-8",
    "UTF8",
    0
  };


  bool find_encoding_name(const char **names, const std::string &encoding) {
    for (const char** index = names; *index != 0; ++index) {
      if (stricmp(*index, encoding.c_str()) == 0)
	return true;
    }
    return false;
  }

  std::string canonicalize_encoding_name(const std::string &encoding) {
    if (find_encoding_name(euc_names, encoding))
      return euc_names[0];
    if (find_encoding_name(sjis_names, encoding))
      return sjis_names[0];
    if (find_encoding_name(utf16_names, encoding))
      return utf16_names[0];
    if (find_encoding_name(utf16be_names, encoding))
      return utf16be_names[0];
    if (find_encoding_name(utf16le_names, encoding))
      return utf16le_names[0];
    if (find_encoding_name(utf8_names, encoding))
      return utf8_names[0];
    throw internal_error();
    return "";
  }

  bool is_binary_encoding(const std::string &encoding) {
    if (find_encoding_name(utf16_names, encoding))
      return true;
    if (find_encoding_name(utf16be_names, encoding))
      return true;
    if (find_encoding_name(utf16le_names, encoding))
      return true;
    return false;
  }


} // namespace


#ifdef _WIN32

#include <stdio.h>
#include <io.h>
#include <fcntl.h> 
#include <iostream>

void formatter::set_binary_mode_for_stdio() {
  int fd = -1;
  if (ostm_ == static_cast<std::ostream*>(&std::cout))
    fd_ = _fileno(stdout);
  else if (ostm_ == static_cast<std::ostream*>(&std::cerr))
    fd_= _fileno(stderr);
  else if (ostm_ == static_cast<std::ostream*>(&std::clog))
    fd_ = _fileno(stderr);
  if (fd_ != -1) {
    *ostm_ << std::flush;
#ifdef __BORLANDC__
    saved_stream_mode_ = setmode(fd_, _O_BINARY);
#else
    saved_stream_mode_ = _setmode(fd_, _O_BINARY);
#endif
  }
}

void formatter::revert_stream_mode() {
  if (fd_ != -1) {
    *ostm_ << std::flush;
#ifdef __BORLANDC__
    setmode(fd_, saved_stream_mode_);
#else
    _setmode(fd_, saved_stream_mode_);
#endif
  }
}

#else

void formatter::set_binary_mode_for_stdio() {}
void formatter::revert_stream_mode() {}

#endif


bool formatter::use_xerces_transcoder_ = false;

void formatter::prepare(const std::string &encoding) {
  if (is_binary_encoding(encoding))
    set_binary_mode_for_stdio();
  set_encoding(encoding);
}


void formatter::write(const std::string &value) {
  ustring unistr = lcp_to_ucs2(value);
  write(unistr);
}

void formatter::write_attribute_entity(const std::string &entity) {
  write_entity(entity, 0);
}

void formatter::write_text_entity(const std::string &entity) {
  write_entity(entity, 1);
}

void formatter::write_entity(const std::string &value, const int escape_index) {
  ustring unistr = lcp_to_ucs2(value);
  ustring escaped;
  for (ustring::const_iterator it = unistr.begin(); it != unistr.end(); ++it) {
    const uchar_t uchar = *it;
    const escape_entry *entry = &escape_table[escape_index];
    while (entry->uchar_ != 0) {
      if (entry->uchar_ == uchar) {
        escaped += entry->escaped_;
        break;
      }
      ++entry;
    }
    if (entry->uchar_ == 0)
      escaped += uchar;
  }
  write(escaped);
}


void babel_formatter::set_encoding(const std::string &encoding) {

  std::string canonicalized = canonicalize_encoding_name(encoding);
  int enc_out = yggdrasil::get_encoding_from_label(canonicalized.c_str());
  if (enc_out == babel::base_encoding::unknown)
    throw aka2::internal_error();
  in_translator_ = 
    babel::manual_translate_engine<std::string, ustring>::
    create(default_encoding_, babel::base_encoding::unicode);
  out_translator_ = 
    babel::manual_translate_engine<ustring, std::string>::
    create(babel::base_encoding::unicode, enc_out);

  bom_ = check_bom(encoding);
}

ustring babel_formatter::lcp_to_ucs2(const std::string &source) {
  in_translator_.clear();
  in_translator_.translate(source);
  in_translator_.flush();
  return in_translator_.get_string();
}

void babel_formatter::write(const ustring &entity) {
  if (bom_ != 0) {
    ostm_->write(bom_->chars_, bom_->length_);
    bom_ = 0;
  }  
  out_translator_.clear();
  out_translator_.translate(entity);
  out_translator_.flush();
  const std::string &res = out_translator_;
  ostm_->write(res.c_str(), res.size());
}

#ifdef USE_XERCESC

void xerces_formatter::set_encoding(const std::string &encoding) {
  xercesc::XMLTransService::Codes resValue;
  out_transcoder_.reset(xercesc::XMLPlatformUtils::fgTransService->
			makeNewTranscoderFor(encoding.c_str(), resValue, 4096));
  if (resValue != xercesc::XMLTransService::Ok)
    throw aka2::internal_error();

  in_transcoder_.reset(xercesc::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder());
  bom_ = check_bom(encoding);
}

ustring xerces_formatter::lcp_to_ucs2(const std::string &source) {

  XMLCh *converted = in_transcoder_->transcode(source.c_str());
  ustring ret(converted);
  /** Freeing memory allocated in Xerces-C++ lib. */
  xercesc::XMLString::release(&converted);
  return ret;
}

void xerces_formatter::write(const ustring &entity) {
  const int maxBytes = 4096;
  unsigned int charsEaten;

  /** [g++2.96] std::basic_string<unsigned short>::c_str() does not work with g++2.96 STL */
  const XMLCh *toConvert = reinterpret_cast<const XMLCh*>(entity.data());

  XMLByte buffer[maxBytes];

  for (unsigned int offset = 0; offset < entity.size(); ) {
    unsigned int numBytes = out_transcoder_->
      transcodeTo(toConvert + offset,
                  entity.size(),
                  buffer,
                  maxBytes,
                  charsEaten,
                  xercesc::XMLTranscoder::UnRep_RepChar);

    /** For Xerces-C++.
     * Xerces-C++ 2.5 does not recognize 'UTF16' as UTF-16,
     * causing lack of BOM.
     * If BOM not found, add BOM.
     */
    if (bom_ != 0) {
      if (strncmp(bom_->chars_, reinterpret_cast<const char*>(buffer), 2) != 0) {
	ostm_->write(bom_->chars_, bom_->length_);
	bom_ = 0;
      }
    }

    ostm_->write(reinterpret_cast<char*>(buffer), numBytes);
    offset = charsEaten;
  }
}

#endif /* USE_XERCESC */
