// sktoolslib - common files for SK tools

// Copyright (C) 2012, 2014 - Stefan Kueng

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software Foundation,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
//

#include "stdafx.h"
#include "PathUtils.h"	//+[JOJO]
#include "TextFile.h"
#include "PathUtils.h"
#include "maxpath.h"
#include <memory>

////////////////////////////////////////////////////////////
// [JOJO] mlangd.ll
#ifdef JAPANESE_KANJI_TEST_1
typedef HRESULT (__stdcall *TConvertINetMultiByteToUnicode)(LPDWORD, DWORD, LPCSTR, LPINT, LPWSTR, LPINT);
typedef HRESULT (__stdcall *TConvertINetUnicodeToMultiByte)(LPDWORD, DWORD, LPCWSTR, LPINT, LPSTR, LPINT);
static TConvertINetMultiByteToUnicode ConvertINetMultiByteToUnicode;
static TConvertINetUnicodeToMultiByte ConvertINetUnicodeToMultiByte;
HMODULE LoadMlang(WCHAR *mlang_dll)
{
	HMODULE h = LoadLibrary(mlang_dll);	// "mlang.dll"
	ConvertINetMultiByteToUnicode = (TConvertINetMultiByteToUnicode)GetProcAddress(h, "ConvertINetMultiByteToUnicode");
	ConvertINetUnicodeToMultiByte = (TConvertINetUnicodeToMultiByte)GetProcAddress(h, "ConvertINetUnicodeToMultiByte");
	return h;
}
#endif
////////////////////////////////////////////////////////////

CTextFile::CTextFile(void) : pFileBuf(NULL)
    , filelen(0)
    , hasBOM(false)
    , encoding(AUTOTYPE)
{
}

CTextFile::~CTextFile(void)
{
    if (pFileBuf)
        delete [] pFileBuf;
}

bool CTextFile::Save(LPCTSTR path)
{
    if (pFileBuf == NULL)
        return false;
    HANDLE hFile = CreateFile(path, GENERIC_WRITE, FILE_SHARE_READ,
        NULL, CREATE_ALWAYS, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
    if (hFile == INVALID_HANDLE_VALUE)
        return false;
    DWORD byteswritten;
    if (!WriteFile(hFile, pFileBuf, filelen, &byteswritten, NULL))
    {
        CloseHandle(hFile);
        return false;
    }
    CloseHandle(hFile);
    return true;
}

bool CTextFile::Load(LPCTSTR path, UnicodeType& type, bool bUTF8)
{
    encoding = AUTOTYPE;
    type = AUTOTYPE;
    LARGE_INTEGER lint;
    if (pFileBuf)
        delete [] pFileBuf;
    pFileBuf = NULL;
    auto pathbuf = std::make_unique<TCHAR[]>(MAX_PATH_NEW);
    HANDLE hFile = INVALID_HANDLE_VALUE;
    int retrycounter = 0;

    if ((_tcslen(path) > 2 ) && (path[0] == '\\') && (path[1] == '\\'))
    {
        // UNC path
        _tcscpy_s(pathbuf.get(), MAX_PATH_NEW, _T("\\\\?\\UNC"));
        _tcscat_s(pathbuf.get(), MAX_PATH_NEW, &path[1]);
    }
    else
    {
        // 'normal' path
        _tcscpy_s(pathbuf.get(), MAX_PATH_NEW, _T("\\\\?\\"));
        _tcscat_s(pathbuf.get(), MAX_PATH_NEW, path);
    }

    do
    {
        if (retrycounter)
            Sleep(20);
        hFile = CreateFile(pathbuf.get(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
            NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
        retrycounter++;
    } while (hFile == INVALID_HANDLE_VALUE && retrycounter < 5);

    if (hFile == INVALID_HANDLE_VALUE)
        return false;
    filename = CPathUtils::GetFileName(path);
    if (!GetFileSizeEx(hFile, &lint))
    {
        CloseHandle(hFile);
        return false;
    }

    MEMORYSTATUSEX memex = {sizeof(MEMORYSTATUSEX)};
    GlobalMemoryStatusEx(&memex);

    DWORD bytesread = 0;
    DWORD bytestoread = min(lint.LowPart, DWORD(memex.ullAvailPhys/100UL));
    if (lint.HighPart)
        bytestoread = 500000;   // read 50kb if the file is too big: we only
                                // need to scan for the file type then.

    // if there isn't enough RAM available, only load a small part of the file
    // to do the encoding check. Then only load the full file in case
    // the encoding is UNICODE_LE since that's the only encoding we have
    // to convert first to do a proper search with.
    if (bytestoread < lint.LowPart)
    {
        auto tempfilebuf = std::make_unique<BYTE[]>(bytestoread+1);
        if (!ReadFile(hFile, tempfilebuf.get(), bytestoread, &bytesread, NULL))
        {
            CloseHandle(hFile);
            return false;
        }
        encoding = CheckUnicodeType(tempfilebuf.get(), bytesread);
        type = encoding;
        if (lint.HighPart)
        {
            CloseHandle(hFile);
            return false;
        }

        switch(encoding)
        {
        case BINARY:
        case UTF8:
        case ANSI:
            CloseHandle(hFile);
            return false;
            break;
        default:
            pFileBuf = new (std::nothrow) BYTE[lint.LowPart];
            for (unsigned long bc = 0; bc < bytesread; ++bc)
            {
                pFileBuf[bc] = tempfilebuf[bc];
            }
            break;
        }
    }
    else
    {
        pFileBuf = new (std::nothrow) BYTE[lint.LowPart];
    }
    if ((pFileBuf == NULL) || (!ReadFile(hFile, pFileBuf, lint.LowPart, &bytesread, NULL)))
    {
        delete [] pFileBuf;
        pFileBuf = NULL;
        CloseHandle(hFile);
        return false;
    }
    CloseHandle(hFile);
    filelen = lint.LowPart;

    // we have the file read into memory, now we have to find out what
    // kind of text file we have here.
    if (encoding == AUTOTYPE)
    {
        encoding = CheckUnicodeType(pFileBuf, bytesread);
        if ((bUTF8) && (encoding != BINARY))
            encoding = UTF8;
    }

    if (encoding == UNICODE_LE)
    {
        if ((bytesread > 1) && (*(unsigned char*)pFileBuf == 0xFF))
        {
            // remove the BOM
            textcontent = std::wstring(((wchar_t*)pFileBuf+1), (bytesread/sizeof(wchar_t))-1);
            hasBOM = true;
        }
        else
            textcontent = std::wstring((wchar_t*)pFileBuf, bytesread/sizeof(wchar_t));
    }
    else if ((encoding == UTF8) || ((encoding == BINARY) && (bUTF8)))
    {
        int ret = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, bytesread, NULL, 0);
        wchar_t * pWideBuf = new (std::nothrow) wchar_t[ret+1];
        if (pWideBuf == NULL)
            return false;
        int ret2 = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pFileBuf, bytesread, pWideBuf, ret+1);
        if (ret2 == ret)
        {
            if ((ret > 1) && (*pWideBuf == 0xFEFF))
            {
                // remove the BOM
                textcontent = std::wstring(pWideBuf+1, ret-1);
                hasBOM = true;
            }
            else
                textcontent = std::wstring(pWideBuf, ret);
        }
        delete [] pWideBuf;
    }
#ifdef JAPANESE_KANJI_TEST_1
    else if (encoding == JIS6226 || encoding == EUCJP)
    {
        int codePage;
        if (encoding == JIS6226)
            codePage = 50220;
        else // if (encoding == EUCJP)
            codePage = 51932;
        DWORD mode = 0;
        INT length = bytesread;
        int ret = 0;
        ConvertINetMultiByteToUnicode(&mode, codePage, (LPCSTR)pFileBuf, &length, NULL, &ret);
        wchar_t * pWideBuf = new (std::nothrow) WCHAR[ret + 1];
        if (pWideBuf == NULL)
            return false;
        int ret2 = ret + 1;
        ConvertINetMultiByteToUnicode(&mode, codePage, (LPCSTR)pFileBuf, &length, pWideBuf, &ret2);
        if (ret2 == ret)
            textcontent = std::wstring(pWideBuf, ret);
        delete [] pWideBuf;
    }
#endif
    else //if (encoding == ANSI)
    {
        int ret = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, bytesread, NULL, 0);
        wchar_t * pWideBuf = new (std::nothrow) wchar_t[ret+1];
        if (pWideBuf == NULL)
            return false;
        int ret2 = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, (LPCSTR)pFileBuf, bytesread, pWideBuf, ret+1);
        if (ret2 == ret)
            textcontent = std::wstring(pWideBuf, ret);
        delete [] pWideBuf;
    }
    type = encoding;
    if (type == BINARY)
        return true;
    return CalculateLines();
}

void CTextFile::SetFileContent(const std::wstring& content)
{
    if (pFileBuf)
        delete [] pFileBuf;
    pFileBuf = NULL;
    filelen = 0;

    if (encoding == UNICODE_LE)
    {
        if (hasBOM)
        {
            pFileBuf = new (std::nothrow) BYTE[(content.size()+2)*sizeof(wchar_t)];
            if (pFileBuf)
            {
                memcpy(pFileBuf, _T("\xFE\xFF"), 2*sizeof(wchar_t));
                memcpy(pFileBuf+4, content.c_str(), content.size()*sizeof(wchar_t));
                filelen = ((int)content.size()+2)*sizeof(wchar_t);
            }
        }
        else
        {
            pFileBuf = new (std::nothrow) BYTE[content.size()*sizeof(wchar_t)];
            if (pFileBuf)
            {
                memcpy(pFileBuf, content.c_str(), content.size()*sizeof(wchar_t));
                filelen = (int)content.size()*sizeof(wchar_t);
            }
        }
    }
    else if (encoding == UTF8)
    {
        if (hasBOM)
        {
            int ret = WideCharToMultiByte(CP_UTF8, 0, content.c_str(), -1, NULL, 0, NULL, NULL);
            pFileBuf = new (std::nothrow) BYTE[ret+3];
            if (pFileBuf)
            {
                memcpy(pFileBuf, "\xEF\xBB\xBF", 3);
                int ret2 = WideCharToMultiByte(CP_UTF8, 0, content.c_str(), -1, (LPSTR)pFileBuf+3, ret, NULL, NULL);
                filelen = ret2+2;
                if (ret2 != ret)
                {
                    delete [] pFileBuf;
                    pFileBuf = NULL;
                    filelen = 0;
                }
            }
        }
        else
        {
            int ret = WideCharToMultiByte(CP_UTF8, 0, content.c_str(), -1, NULL, 0, NULL, NULL);
            pFileBuf = new (std::nothrow) BYTE[ret];
            if (pFileBuf)
            {
                int ret2 = WideCharToMultiByte(CP_UTF8, 0, content.c_str(), -1, (LPSTR)pFileBuf, ret, NULL, NULL);
                filelen = ret2-1;
                if (ret2 != ret)
                {
                    delete [] pFileBuf;
                    pFileBuf = NULL;
                    filelen = 0;
                }
            }
        }
    }
    else if ((encoding == ANSI) || (encoding == BINARY))
    {
        int ret = WideCharToMultiByte(CP_ACP, 0, content.c_str(), (int)content.size()+1, NULL, 0, NULL, NULL);
        pFileBuf = new (std::nothrow) BYTE[ret];
        if (pFileBuf)
        {
            int ret2 = WideCharToMultiByte(CP_ACP, 0, content.c_str(), (int)content.size()+1, (LPSTR)pFileBuf, ret, NULL, NULL);
            filelen = ret2-1;
            if (ret2 != ret)
            {
                delete [] pFileBuf;
                pFileBuf = NULL;
                filelen = 0;
            }
        }
    }
    if (pFileBuf)
        textcontent = content;
    else
        textcontent = _T("");
}

bool CTextFile::ContentsModified(BYTE * pBuf, DWORD newLen)
{
    if (pFileBuf)
        delete [] pFileBuf;
    pFileBuf = pBuf;
    filelen = newLen;
    return true;
}

////////////////////////////////////////////////////////////
// [JOJO]
CTextFile::UnicodeType CTextFile::CheckUnicodeType(const BYTE * pBuffer, const int cb)
#ifdef JAPANESE_KANJI_TEST_1
{
    if ((encoding = _CheckUnicodeType(pBuffer, cb)) == ANSI)
    {
        for (int i = 0; i + 2 < cb; ++i)
        {
            const BYTE *b = pBuffer + i;
            if (b[0] == '\033')
            {
                // https://en.wikipedia.org/wiki/ISO/IEC_2022
                int b1 = b[1], b2 = b[2];
                // ESC ( B to switch to ASCII (1 byte per character)
                if      (b1 == '(' && b2 == 'B')
                    return JIS6226;
                // ESC ( J to switch to JIS X 0201-1976 (ISO/IEC 646:JP) Roman set (1 byte per character)
                else if (b1 == '(' && b2 == 'J')
                    return JIS6226;
                // ESC $ @ to switch to JIS X 0208-1978 (2 bytes per character)
                else if (b1 == '$' && b2 == '@')
                    return JIS6226;
                // ESC $ B to switch to JIS X 0208-1983 (2 bytes per character)
                else if (b1 == '$' && b2 == 'B')
                    return JIS6226;
                // ESC ( I to switch to JIS X 0201-1976 Kana set (1 byte per character)
                else if (b1 == '(' && b2 == 'I')
                    return JIS6226;
            }
        }
        int sjis = 0;
        int euc = 0;
        for (int i = 0; i + 1 < cb; )
        {
            int b1 = pBuffer[i    ];
            int b2 = pBuffer[i + 1];
            if ((0x81 <= b1 && b1 <= 0x9F || 0xE0 <= b1 && b1 <= 0xFC)
             && (0x40 <= b2 && b2 <= 0x7E || 0x80 <= b2 && b2 <= 0xFC))
            {
                sjis += 2;
                i += 2;
            }
            else
                ++i;
        }
        for (int i = 0; i + 1 < cb; )
        {
            int b1 = pBuffer[i    ];
            int b2 = pBuffer[i + 1];
            int b3;
            if (0xA1 <= b1 && b1 <= 0xFE
             && 0xA1 <= b2 && b2 <= 0xFE
             || b1 == 0x8E && 0xA1 <= b2 && b2 <= 0xDF)
            {
                euc += 2;
                i += 2;
            }
            else if (b1 == 0x8F
                  && 0xA1 <= b2 && b2 <= 0xFE
                  && i + 2 < cb && 0xA1 <= (b3 = pBuffer[i + 2]) && b3 <= 0xFE)
            {
                euc += 3;
                i += 3;
            }
            else
                ++i;
        }
        if (euc > sjis)
        {
            return EUCJP;
        }
        else if (sjis > euc)
        {
    //TODO: return SJIS;
            return ANSI;
        }
        // else return ANSI
    }
    return encoding;
}
////////////////////////////////////////////////////////////
CTextFile::UnicodeType CTextFile::_CheckUnicodeType(const BYTE * pBuffer, const int cb)
#endif
{
    if (cb < 2)
        return ANSI;
    UINT16 * pVal16 = (UINT16 *)pBuffer;
    UINT8 * pVal8 = (UINT8 *)(pVal16+1);
    // scan the whole buffer for a 0x0000 sequence
    // if found, we assume a binary file
    int nNull = 0;
    int nDblNull = 0;
    for (int i=0; i<(cb-2); i=i+2)
    {
        if (0x0000 == *pVal16++)
            ++nDblNull;
        if (0x00 == *pVal8++)
            ++nNull;
        if (0x00 == *pVal8++)
            ++nNull;
    }
    if (nDblNull > 2)   // arbitrary value: allow two double null chars to account for 'broken' text files
        return BINARY;
    if ((nNull > 3) && ((cb % 2) == 0)) // arbitrary value: allow three null chars to account for 'broken' text files
        return UNICODE_LE;
    pVal16 = (UINT16 *)pBuffer;
    pVal8 = (UINT8 *)(pVal16+1);
    if (*pVal16 == 0xFEFF)
        return UNICODE_LE;
    if (cb < 3)
        return ANSI;
    if (*pVal16 == 0xBBEF)
    {
        if (*pVal8 == 0xBF)
            return UTF8;
    }
    // check for illegal UTF8 chars
    pVal8 = (UINT8 *)pBuffer;
    for (int i=0; i<cb; ++i)
    {
        if ((*pVal8 == 0xC0) || (*pVal8 == 0xC1) || (*pVal8 >= 0xF5))
            return ANSI;
        pVal8++;
    }
    pVal8 = (UINT8 *)pBuffer;
    bool bUTF8 = false;
    for (int i=0; i<(cb-4); ++i)
    {
        if ((*pVal8 & 0xE0) == 0xC0)
        {
            pVal8++;i++;
            if ((*pVal8 & 0xC0)!=0x80)
                return ANSI;
            bUTF8 = true;
        }
        if ((*pVal8 & 0xF0) == 0xE0)
        {
            pVal8++;i++;
            if ((*pVal8 & 0xC0)!=0x80)
                return ANSI;
            pVal8++;i++;
            if ((*pVal8 & 0xC0)!=0x80)
                return ANSI;
            bUTF8 = true;
        }
        if ((*pVal8 & 0xF8) == 0xF0)
        {
            pVal8++;i++;
            if ((*pVal8 & 0xC0)!=0x80)
                return ANSI;
            pVal8++;i++;
            if ((*pVal8 & 0xC0)!=0x80)
                return ANSI;
            pVal8++;i++;
            if ((*pVal8 & 0xC0)!=0x80)
                return ANSI;
            bUTF8 = true;
        }
        pVal8++;
    }
    if (bUTF8)
        return UTF8;
    return ANSI;
}

////////////////////////////////////////////////////////////
// [JOJO] add
WCHAR* CTextFile::eol()
{
    for (auto it = textcontent.begin(), end = textcontent.end(); it != end; ++it) {
        if (*it == '\r')
        {
            if (++it == end || *it != '\n')
                return L"\r";
        }
        else if (*it == '\n')
        {
            return L"\n";
        }
    }
    return L"\r\n";
}
////////////////////////////////////////////////////////////

bool CTextFile::CalculateLines()
{
    ////////////////////////////////////////////////////////////
    // [JOJO] fix
    // fill an array with starting positions for every line in the loaded file
    linepositions.clear();
    linepositions.push_back(0);
    if (pFileBuf == NULL)
        return false;
    if (textcontent.empty())
        return true;
    const auto start = textcontent.begin();
    const auto end = textcontent.end();
    auto it = start;
    for (;;)
    {
        if (*it == '\r')
        {
            if (++it == end) break;
            if (*it == '\n')
            {
                // crlf lineending
                if (++it == end) break;
                linepositions.push_back(it - start);
            }
            else
            {
                // cr lineending
                linepositions.push_back(it - start);
            }
        }
        else if (*it == '\n')
        {
            // lf lineending
            if (++it == end) break;
            linepositions.push_back(it - start);
        }
        else
        {
            if (++it == end) break;
        }
    }
    ////////////////////////////////////////////////////////////
    linepositions.push_back(it - start);
    return true;
}

long CTextFile::LineFromPosition(size_t pos) const
{
    ////////////////////////////////////////////////////////////
    // [JOJO] fix
    long low = 0, high = (long)linepositions.size() - 1;
    if (pos >= textcontent.length())
        return high;

    // binary search
    while (low <= high)
    {
        long mid = (low + high) / 2;
        size_t value = linepositions[mid];
        if (value < pos)
            low = mid + 1;
        else if (value > pos)
            high = mid - 1;
        else
            return 1 + mid;
    }
    return low;
    ////////////////////////////////////////////////////////////
}

std::wstring CTextFile::GetLineString(long lineNumber) const
{
    ////////////////////////////////////////////////////////////
    // [JOJO] fix
    if (lineNumber < 1 || lineNumber >= (long)linepositions.size())
        return std::wstring();

    size_t startpos = linepositions[lineNumber - 1];
    size_t endpos   = linepositions[lineNumber    ];
    return std::wstring(textcontent.begin() + startpos, textcontent.begin() + endpos);
    ////////////////////////////////////////////////////////////
}

std::wstring CTextFile::GetFileNameWithoutExtension()
{
    return CPathUtils::GetFileNameWithoutExtension(filename);
}

std::wstring CTextFile::GetFileNameExtension()
{
    ////////////////////////////////////////////////////////////
    // [JOJO]
    return CPathUtils::GetFileNameExtension(filename);
    ////////////////////////////////////////////////////////////
}
