/* java_utf8.cpp
   Copyright (C) 2005 Free Software Foundation, Inc.

This file is part of Mysaifu JVM

Mysaifu JVM is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

Mysaifu JVM is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING.  If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.
*/

#include "StdAfx.h"
#include "java_utf8.h"
#include "ClassFile.h"
#include "instruction.h"
#include "common_funcs.h"

/**
 * UTF-8̕ۑɎgpnbVe[uvf
 */
struct hash_table_element {
	/**
	 * ̒
	 */
	int length;

	/**
	 * ΉStringIuWFNg̎QƁB݂ȂꍇNULL
	 */
	jstring string_reference;

	/**
	 * ̗vfւ̃N
	 */
	hash_table_element* next;

    // ̒ɕ{̂i[
};

/**
 * Ǘp\
 * hash_table_element̃̈́Aȉ̂悤ɊǗ
 *
 * hash_element_pool
 *
   +-------+
   | next  |--------> +----+ 
   +-------+          |next| --> ...
   | limit |-----+    +----+
   +-------+     |    | 
   |current|-+   |
   +-------+ |   |
   |elem1  | |   |
   +-------+ |   |
   |elem2  | |   |
   +-------+ |   |
   |   <-+   |
   |       |     |
   +-------+<----+

 * {I header 1݂A󂫗̈悪Ԃɖ߂ĂA󂫗̈ɓȂꍇ
 * V header A`F[B
 * `F[Ȃ̂h߁ATCYz hash_table_element vꂽꍇ́Ahash_table_element
 * ݂̂̊mۂsA`F[ȂB
 * ܂A󂫗̈悪wlȉɂȂhash_table_element_pool͊ǗΏۂ͂B(̗̈悩烁͊mۂȂj
 */
struct hash_table_element_pool {
	hash_table_element_pool* next;
	char* limit;
	char* current;
	// ̒ hash_table_element̃f[^i[Ă
};

/**
 * 擪 hash_table_element_pool
 */
static hash_table_element_pool* g_hash_table_element_pool;

/**
 * nbVe[uGgv[̃ftHgTCY
 */
#define DEFAULT_HASH_TABLE_ELEMENT_POOL_SIZE (16*1024)

/**
 * nbVe[ũTCYBf]܂B
 */
#define HASH_TABLE_SIZE	8191 

/**
 * nbVe[u
 */
static hash_table_element* g_utf8_table[HASH_TABLE_SIZE];

/**
 * O[oUTF-8
 */
const java_utf8* CLASS_CLASS_NAME;
const java_utf8* STRING_CLASS_NAME;
const java_utf8* CLINIT_METHOD_NAME;
const java_utf8* INIT_METHOD_NAME;
const java_utf8* FINALIZE_METHOD_NAME;
const java_utf8* VOID_NOARG_METHOD_DESCRIPTOR;
const java_utf8* CLONE_METHOD_NAME;
const java_utf8* JAVA_LANG_OBJECT_CLASS_NAME;
const java_utf8* JAVA_LANG_CLONEABLE_CLASS_NAME;
const java_utf8* JAVA_IO_SERIALIZABLE_CLASS_NAME;
const java_utf8* BOOLEAN_ARRAY_CLASS_NAME;
const java_utf8* BYTE_ARRAY_CLASS_NAME;
const java_utf8* SHORT_ARRAY_CLASS_NAME;
const java_utf8* CHAR_ARRAY_CLASS_NAME;
const java_utf8* INT_ARRAY_CLASS_NAME;
const java_utf8* LONG_ARRAY_CLASS_NAME;
const java_utf8* FLOAT_ARRAY_CLASS_NAME;
const java_utf8* DOUBLE_ARRAY_CLASS_NAME;
const java_utf8* PRIMITIVE_ARRAY_CLASS_NAMES[12];

const java_utf8* JAVA_LANG_VMCLASS_CLASS_NAME;
const java_utf8* JAVA_LANG_CLASS_CONSTRUCTOR_DESCRIPTOR;
const java_utf8* STRING_ARG_CONSTRUCTOR_DESCRIPTOR;
const java_utf8* CHAR_ARRAY_ARG_CONSTRUCTOR_DESCRIPTOR;

/**
 * O[oNeBJZNVB
 * O[oϐ𑀍삷ۂɂ́AKCRITICAL_SECTIONŕی삷
 */
extern CRITICAL_SECTION g_global_critical_section;

static hash_table_element* intern_element(const java_utf8* utf8, int length);
static hash_table_element_pool* alloc_hash_table_element_pool(int pool_size);

#define CHAR_VALUE(elem) ((char*) (elem + 1))

/**
 * PermanentHeap
 */
static PermanentHeap* g_permanent_heap;

/**
 * UTF-8֘A
 */
void init_utf8_settings() {
	// nbVe[u
	for (int i = 0; i < HASH_TABLE_SIZE; ++i) {
		g_utf8_table[i] = NULL;
	}

	g_permanent_heap = new PermanentHeap(DEFAULT_HASH_TABLE_ELEMENT_POOL_SIZE);
	g_permanent_heap->setMinimumAllocationSize(sizeof(hash_table_element) + 1);

	// pɂɎgp镶
	CLASS_CLASS_NAME = intern_utf8("java/lang/Class");

	STRING_CLASS_NAME = intern_utf8("java/lang/String");

	// <clinit>
	CLINIT_METHOD_NAME = intern_utf8("<clinit>");

	// <init>
	INIT_METHOD_NAME = intern_utf8("<init>");

	// "finalize()"\bh
	FINALIZE_METHOD_NAME = intern_utf8("finalize");

	// Ԃl void ňȂ\bh̃fBXNv^
	VOID_NOARG_METHOD_DESCRIPTOR = intern_utf8("()V");

	// clone() \bh
	CLONE_METHOD_NAME = intern_utf8("clone");
	
	// java/lang/Object
	JAVA_LANG_OBJECT_CLASS_NAME = intern_utf8("java/lang/Object");

	JAVA_LANG_CLONEABLE_CLASS_NAME = intern_utf8("java/lang/Cloneable");
	JAVA_IO_SERIALIZABLE_CLASS_NAME = intern_utf8("java/io/Serializable");

	// zNX
	// booleanzNX
	BOOLEAN_ARRAY_CLASS_NAME = intern_utf8("[Z");

	// bytezNX
	BYTE_ARRAY_CLASS_NAME = intern_utf8("[B");

	// charzNX
	CHAR_ARRAY_CLASS_NAME = intern_utf8("[C");
	
	// shortzNX
	SHORT_ARRAY_CLASS_NAME = intern_utf8("[S");
	
	// intzNX
	INT_ARRAY_CLASS_NAME = intern_utf8("[I");
	
	// longzNX
	LONG_ARRAY_CLASS_NAME = intern_utf8("[J");
	
	// floatzNX
	FLOAT_ARRAY_CLASS_NAME = intern_utf8("[F");
	
	// doublezNX
	DOUBLE_ARRAY_CLASS_NAME = intern_utf8("[D");
	
	PRIMITIVE_ARRAY_CLASS_NAMES[4] = BOOLEAN_ARRAY_CLASS_NAME;	// T_BOOLEAN = 4
	PRIMITIVE_ARRAY_CLASS_NAMES[5] = CHAR_ARRAY_CLASS_NAME;		// T_CHAR = 5
	PRIMITIVE_ARRAY_CLASS_NAMES[6] = FLOAT_ARRAY_CLASS_NAME;	// T_FLOAT = 6
	PRIMITIVE_ARRAY_CLASS_NAMES[7] = DOUBLE_ARRAY_CLASS_NAME;	// T_DOUBLE = 7
	PRIMITIVE_ARRAY_CLASS_NAMES[8] = BYTE_ARRAY_CLASS_NAME;		// T_BYTE = 8
	PRIMITIVE_ARRAY_CLASS_NAMES[9] = SHORT_ARRAY_CLASS_NAME;	// T_SHORT = 9
	PRIMITIVE_ARRAY_CLASS_NAMES[10] = INT_ARRAY_CLASS_NAME;		// T_INT = 10
	PRIMITIVE_ARRAY_CLASS_NAMES[11] = LONG_ARRAY_CLASS_NAME;	// T_INT = 11

	JAVA_LANG_VMCLASS_CLASS_NAME = intern_utf8("java/lang/VMClass");
	JAVA_LANG_CLASS_CONSTRUCTOR_DESCRIPTOR = intern_utf8("(Ljava/lang/Object;Ljava/security/ProtectionDomain;)V");
	STRING_ARG_CONSTRUCTOR_DESCRIPTOR = intern_utf8("(Ljava/lang/String;)V");
	CHAR_ARRAY_ARG_CONSTRUCTOR_DESCRIPTOR = intern_utf8("([C)V");

}

/**
 * nbVlvZ
 * java.lang.StringhashCode()Ɠ̌vZ@p
 */
static inline unsigned int get_hash_table_index(java_utf8* utf8, int length) {
	unsigned int hash = 0;
	while (length--) {
		hash = hash * 137 + (unsigned int) *utf8++;
	}
	return hash % HASH_TABLE_SIZE;
}

/**
 * VGg쐬
 */
static hash_table_element* create_element(const char* utf8, int length) {
	int total_size = sizeof(hash_table_element) + length + 1;
	hash_table_element* elem = (hash_table_element*) g_permanent_heap->allocate(total_size);

	if (! elem) {
	  fatal_error(FATAL_ERROR_NO_MEMORY);
	}

	// ɕi[
	char* value = (char*) (elem + 1);

	strncpy(value, utf8, length);
	value[length] = '\0';
	elem->length = length;
	return elem;
}

/**
 * "C^["s
 */
static hash_table_element* intern_element(const java_utf8* utf8, int length) {
	unsigned table_pos = get_hash_table_index(utf8, length);
	
	// }ʒu
	// iN͏ɕłj
	hash_table_element* elem = g_utf8_table[table_pos];
	java_utf8* intern = NULL;
	if (elem == NULL) {
		// 擪ɑ}
		hash_table_element* newelem = create_element(utf8, length);
		g_utf8_table[table_pos] = newelem;
		return newelem;
	}
	// Xg擪vfƔr
	if (length == elem->length) {
		int cmp = strncmp(utf8, CHAR_VALUE(elem), length);
		if (cmp == 0) {
			// 擪vfƈv
			return elem;
		}
	} else if (length < elem->length) {
		// 擪ɑ}
		hash_table_element* newelem = create_element(utf8, length);
		g_utf8_table[table_pos] = newelem;
		newelem->next = elem;
		return newelem;
	}
	if (elem->next == NULL) {
		// Xgɒǉ
		hash_table_element* newelem = create_element(utf8, length);
		elem->next = newelem;
		return newelem;
	} 

	// nbVR[hՓ˂Ă̂ŁAXgԂɌĂ
//	DBG(_T("intern_element():hash collision "));
//	DBG_INT(table_pos);
//	DBG(_T(" "));
//	DBG_UTF8(utf8);
//	DBG(_T("\n"));

	hash_table_element* newelem = NULL;
	while (elem->next != NULL) {
		if (length == elem->next->length) {
			int cmp =strncmp(utf8, CHAR_VALUE(elem->next), length);
			// Xg̗̎vfƔr
			if (cmp == 0) {
				// v
				return elem->next;
			}
		} else if (length < elem->next->length) {
			// ̗vfA^ꂽ񂪏ȏꍇ
			// V쐬vf}
			hash_table_element* newelem = create_element(utf8, length);
			newelem->next = elem->next;
			elem->next = newelem;
			return newelem;
		}
		elem = elem->next;
	}
	// ܂łꍇɂ́AXg̖ɒǉ
	newelem = create_element(utf8, length);
	elem->next = newelem;
	return newelem;
}

/**
 * w肳ꂽUTF-8uC^[v
 *
 * @param	utf8	ΏۂƂȂjava_utf8 \
 * @return	C^[ꂽ
 */
const java_utf8* intern_utf8(const char* utf8, int length) {
#if 0
	char* tmp = (char*) malloc(length + 1);
	if (tmp == NULL) {
		fatal_error(FATAL_ERROR_NO_MEMORY);
	}
	strncpy(tmp, utf8, length);
	tmp[length] = '\0';
	const java_utf8* result = intern_utf8(tmp);
	free(tmp);
#endif

	EnterCriticalSection(&g_global_critical_section);
	hash_table_element* elem = intern_element(utf8, length);
	LeaveCriticalSection(&g_global_critical_section);
	return CHAR_VALUE(elem);
}

/**
 * w肳ꂽUTF-8uC^[v
 *
 * @param	utf8	ΏۂƂȂjava_utf8 \
 * @return	C^[ꂽ
 */
const java_utf8* intern_utf8(const char* utf8) {
	EnterCriticalSection(&g_global_critical_section);
	hash_table_element* elem = intern_element(utf8, strlen(utf8));
	LeaveCriticalSection(&g_global_critical_section);
	return CHAR_VALUE(elem);
}

/**
 * w肳ꂽUTF-8uC^[vAΉStringIuWFNgԂ
 */
jstring intern_string(const char* utf8, frame* frame) {
	EnterCriticalSection(&g_global_critical_section);
	hash_table_element* elem = intern_element(utf8, strlen(utf8));
	LeaveCriticalSection(&g_global_critical_section);

	if (elem->string_reference == NULL) {
		jstring permstr = permanent_utf8_string(frame, utf8);
		InterlockedCompareExchangePointer(&elem->string_reference, permstr, NULL);
		POP_DISCARD(frame);
		if (elem->string_reference != permstr) {
			assert(false);
			// XbhƋʁAi[邱ƂłȂ
			// ݂ɂȂĂ܂̂ŁAO[oQƂ폜
			delete_global_reference(permstr);
		}
	}

	return elem->string_reference;
}

/**
 * TCHARUTF-8Ƃ̔rsBQ̕\ꍇtrueԂB
 */
bool equals(const TCHAR* str, const char* utf8) {
	int stridx = 0;
	u4 utf8idx = 0;
	u4 length = strlen(utf8);

	TCHAR c;
	while ((c = str[stridx++]) != 0) {
		// PoCg 0xxxxxxx
		if (c != 0 && (c & 0xff80) == 0) {
			if (utf8idx >= length) {
				// UTF-8oCgIĂ܂
				return false;
			}
			if (utf8[utf8idx++] != (u1) c) {
				return false;
			}
		} else if ((c == 0) || ((c & 0xf800) == 0)) { // QoCg 00000yyy:yyxxxxxx -> 110yyyyy + 10xxxxxx
			if (utf8idx+1 >= length) {
				// UTF-8QoCgǂݍނƂłȂ
				return false;
			}
			if ((utf8[utf8idx++] != (0xc0 | c >> 6))
				|| (utf8[utf8idx++] != (0x80 | (c & 0x003f)))) {
				return false;
			}
		} else { // RoCg zzzzyyyy:yyxxxxxx -> 1110zzzz + 10yyyyyy + 10xxxxxx
			if (utf8idx+2 >= length) {
				// UTF-83oCgǂݍނƂłȂ
				return false;
			}
			if ( utf8[utf8idx++] != (0xe0 | (c >> 12))
					|| utf8[utf8idx++] != (0x80 | ((c & 0x0fc0) >> 6)
					|| utf8[utf8idx++] != (0x80 | (c & 0x003f)))) {
				return false;
			}
		}
	}
	return true;
}

/**
 * java_utf8̓eATCHARɕϊ
 *
 * @param	utf8	ϊ
 * @param	buff	TCHAR̃obt@
 * @param	bufflen TCHARobt@̃TCY
 * @return	Rs[
 */
int convert_to_TCHAR(const java_utf8* utf8, TCHAR* buff, unsigned int bufflen) {
	unsigned int buffpos = 0;
	u4 bytepos = 0;
	u4 length = strlen(utf8);
	while (bytepos < length) {
		// obt@TCY𒴂ꍇiI[܂ށj
		if (buffpos + 1 >= bufflen) {
			break;
		}
		u1 b = utf8[bytepos++];
		if ((b & 0x80) == 0) {
			// PoCgF0xxxxxxx
			buff[buffpos] = (TCHAR) b;
		} else if ((b & 0xe0) == 0xc0) {
			// QoCg 110xxxxx : 10yyyyyy -> 00000xxxxxyyyyyy
			u1 b2 = utf8[bytepos++];
			buff[buffpos] = (TCHAR) (((b & 0x1f) << 6) | (b2 & 0x3f));
		} else if ((b & 0xf0) == 0xe0) {
			// RoCg 1110xxxx:10yyyyyy:10zzzzzz -> xxxxyyyyyyzzzzzz
			u1 b2 = utf8[bytepos++];
			u1 b3 = utf8[bytepos++];
			buff[buffpos] = (TCHAR) (((b & 0x0f) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3f));
		}
		buffpos++;
	}

	// I[
	buff[buffpos] = _T('\0');

	// ̒Ԃ
	return buffpos;
}

/**
 * TCHAR̓eAUTF-8ɕϊ
 * ϊUTF-8́AK'\0'ŏI[
 *
 * @param	src			TCHAR̃obt@
 * @param	srclen		ϊΏۂƂȂ镶B-1̏ꍇɂ_T('\0')܂łϊΏۂƂ
 * @param	utf8buff	ϊ̂̃f[^obt@
 * @param	utf8bufflen	ϊ̃obt@TCY
 * @return  Rs[oCg
 */
int convert_to_utf8(const TCHAR* src, int srclen, char* utf8buff, int utf8bufflen) {
	unsigned int srcpos = 0;
	int bytepos = 0;
	TCHAR c;
	for (;;) {
		c = src[srcpos++];
		if (srclen == -1) {
			if (c == _T('\0')) {
				break;
			}
		} else if (srclen-- <= 0) {
			break;
		}
		if (c >= 0x0001 && c <= 0x007f) {
			// PoCg 0xxxxxxx
			if (bytepos >= utf8bufflen - 1) {
				break;
			}
			utf8buff[bytepos++] = (u1) c;	// Pɏ8rbĝĂ
		} else if (c == 0x0000 || (c >= 0x0080 && c <= 0x07ff)) {
			// QoCg 110xxxxx 10xxxxxx
			if (bytepos >= utf8bufflen - 2) {
				break;
			}
			utf8buff[bytepos++] = 0xc0 | ((c >> 6) & 0x3f);
			utf8buff[bytepos++] = 0x80 | (c & 0x3f);
		} else {
			// RoCg 111xxxxx 10xxxxxx 10xxxxxx
			if (bytepos >= utf8bufflen - 3) {
				break;
			}
			utf8buff[bytepos++] = 0xe0 | ((c >> 12) & 0x0f);
			utf8buff[bytepos++] = 0x80 | ((c >> 6) & 0x003f);
			utf8buff[bytepos++] = 0x80 | (c & 0x3f);
		}
	}
	utf8buff[bytepos] = '\0';
	return bytepos;
}

/**
 * w肳ꂽTCHARUTF-8ɕϊꍇ̃oCgԂ
 *
 * @param	src		ϊΏۂƂȂ镶
 * @param	srclen	ϊ镶
 */
int get_utf8_length_of(const TCHAR* src, int srclen) {
	int count = 0;
	while (srclen--) {
		_TCHAR c = *src++;
		if (c >= 0x0001 && c <= 0x007f) {
			count++;
		} else if (c == 0x0000 || (c >= 0x0080 && 0x07ff)) {
			count += 2;
		} else {
			count += 3;
		}
	}
	return count;
}


/**
 * w肳ꂽoCg݂ʒuԂB
 */
int last_index_of(const java_utf8* utf8, u1 data) {
	u4 length = strlen(utf8);
	for (u4 i = length; i != 0; --i) {
		if (utf8[i - 1] == data) {
			return i;
		}
	}
	return -1;
}
