/** 
 * @file  SFMT.c
 * @brief SIMD oriented Fast Mersenne Twister(SFMT)
 *
 * @author Mutsuo Saito (Hiroshima University)
 * @author Makoto Matsumoto (Hiroshima University)
 *
 * Copyright (C) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
 * University. All rights reserved.
 *
 * The new BSD License is applied to this software, see LICENSE.txt
 */
//======================================================================
//-----------------------------------------------------------------------
/**
 * @file		RndSFMT.cpp
 * @brief		ZkEcCX^t@C
 * @note		irist[[NpɕύXĂ܂B
 *				\[Xmath/random/common/SFMTQƂĂB
 *
 * @author		t.sirayanagi
 * @version		1.0
 *
 * @par			copyright
 * Copyright (C) 2010-2011 Takazumi Shirayanagi\n
 * The new BSD License is applied to this software.
 * see iris_LICENSE.txt
*/
//-----------------------------------------------------------------------
//======================================================================
#define INCG_IRIS_RndSFMT_CPP_

//======================================================================
// include
#include "RndSFMT.h"
#include "../../iris_debug.h"

namespace iris {
namespace math {
namespace rnd {
namespace
{

/*----------------
  STATIC FUNCTIONS
  ----------------*/
inline static int idxof(int i);
inline static void rshift128(w128_t *out,  w128_t const *in, int shift);
inline static void lshift128(w128_t *out,  w128_t const *in, int shift);
template<u32 MEXP>
inline void gen_rand_all(CSFMT<MEXP>* lpct);
template<u32 MEXP>
inline void gen_rand_array(CSFMT<MEXP>* lpct, w128_t *array, int size);
inline static uint32_t func1(uint32_t x);
inline static uint32_t func2(uint32_t x);
template<u32 MEXP>
void period_certification(CSFMT<MEXP>* lpct);
#if defined(BIG_ENDIAN64) && !defined(ONLY64)
inline static void swap(w128_t *array, int size);
#endif

#if defined(HAVE_ALTIVEC)
  #include "../common/SFMT/SFMT-alti.h"
#elif defined(HAVE_SSE2)
  #include "../common/SFMT/SFMT-sse2.h"
#endif

/**
 * This function simulate a 64-bit index of LITTLE ENDIAN 
 * in BIG ENDIAN machine.
 */
#ifdef ONLY64
inline static int idxof(int i) {
    return i ^ 1;
}
#else
inline static int idxof(int i) {
    return i;
}
#endif
/**
 * This function simulates SIMD 128-bit right shift by the standard C.
 * The 128-bit integer given in in is shifted by (shift * 8) bits.
 * This function simulates the LITTLE ENDIAN SIMD.
 * @param out the output of this function
 * @param in the 128-bit data to be shifted
 * @param shift the shift value
 */
#ifdef ONLY64
inline static void rshift128(w128_t *out, w128_t const *in, int shift) {
    uint64_t th, tl, oh, ol;

    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);

    oh = th >> (shift * 8);
    ol = tl >> (shift * 8);
    ol |= th << (64 - shift * 8);
    out->u[0] = (uint32_t)(ol >> 32);
    out->u[1] = (uint32_t)ol;
    out->u[2] = (uint32_t)(oh >> 32);
    out->u[3] = (uint32_t)oh;
}
#else
inline static void rshift128(w128_t *out, w128_t const *in, int shift) {
    uint64_t th, tl, oh, ol;

    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);

    oh = th >> (shift * 8);
    ol = tl >> (shift * 8);
    ol |= th << (64 - shift * 8);
    out->u[1] = (uint32_t)(ol >> 32);
    out->u[0] = (uint32_t)ol;
    out->u[3] = (uint32_t)(oh >> 32);
    out->u[2] = (uint32_t)oh;
}
#endif
/**
 * This function simulates SIMD 128-bit left shift by the standard C.
 * The 128-bit integer given in in is shifted by (shift * 8) bits.
 * This function simulates the LITTLE ENDIAN SIMD.
 * @param out the output of this function
 * @param in the 128-bit data to be shifted
 * @param shift the shift value
 */
#ifdef ONLY64
inline static void lshift128(w128_t *out, w128_t const *in, int shift) {
    uint64_t th, tl, oh, ol;

    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);

    oh = th << (shift * 8);
    ol = tl << (shift * 8);
    oh |= tl >> (64 - shift * 8);
    out->u[0] = (uint32_t)(ol >> 32);
    out->u[1] = (uint32_t)ol;
    out->u[2] = (uint32_t)(oh >> 32);
    out->u[3] = (uint32_t)oh;
}
#else
inline static void lshift128(w128_t *out, w128_t const *in, int shift) {
    uint64_t th, tl, oh, ol;

    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);

    oh = th << (shift * 8);
    ol = tl << (shift * 8);
    oh |= tl >> (64 - shift * 8);
    out->u[1] = (uint32_t)(ol >> 32);
    out->u[0] = (uint32_t)ol;
    out->u[3] = (uint32_t)(oh >> 32);
    out->u[2] = (uint32_t)oh;
}
#endif

/**
 * This function represents the recursion formula.
 * @param r output
 * @param a a 128-bit part of the internal state array
 * @param b a 128-bit part of the internal state array
 * @param c a 128-bit part of the internal state array
 * @param d a 128-bit part of the internal state array
 */
#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
#ifdef ONLY64
inline static void do_recursion(LPSFMTCONTEXT lpct, w128_t *r, w128_t *a, w128_t *b, w128_t *c,
				w128_t *d) {
    w128_t x;
    w128_t y;

    lshift128(&x, a, lpct->SL2);
    rshift128(&y, c, lpct->SR2);
    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] 
	^ (d->u[0] << SL1);
    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] 
	^ (d->u[1] << SL1);
    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] 
	^ (d->u[2] << SL1);
    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] 
	^ (d->u[3] << SL1);
}
#else
inline static void do_recursion(LPSFMTCONTEXT lpct, w128_t *r, w128_t *a, w128_t *b, w128_t *c,
				w128_t *d) {
    w128_t x;
    w128_t y;
	int SR1 = lpct->SR1;
	int SL1 = lpct->SL1;

    lshift128(&x, a, lpct->SL2);
    rshift128(&y, c, lpct->SR2);
    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & lpct->MSK1) ^ y.u[0] 
	^ (d->u[0] << SL1);
    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & lpct->MSK2) ^ y.u[1] 
	^ (d->u[1] << SL1);
    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & lpct->MSK3) ^ y.u[2] 
	^ (d->u[2] << SL1);
    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & lpct->MSK4) ^ y.u[3] 
	^ (d->u[3] << SL1);
}
#endif
#endif

#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
/**
 * This function fills the internal state array with pseudorandom
 * integers.
 */
template<u32 MEXP>
inline void gen_rand_all(CSFMT<MEXP>* lpct) {
    int i;
	int POS1 = lpct->m_context.POS1;
	w128_t* sfmt = lpct->m_psfmt;
    w128_t *r1, *r2;

	r1 = &sfmt[CSFMT<MEXP>::N - 2];
	r2 = &sfmt[CSFMT<MEXP>::N - 1];
	for (i = 0; i < CSFMT<MEXP>::N - POS1; i++) {
		rnd::do_recursion(&lpct->m_context, &sfmt[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
	r1 = r2;
	r2 = &sfmt[i];
    }
	for (; i < CSFMT<MEXP>::N; i++) {
		rnd::do_recursion(&lpct->m_context, &sfmt[i], &sfmt[i], &sfmt[i + POS1 - CSFMT<MEXP>::N], r1, r2);
	r1 = r2;
	r2 = &sfmt[i];
    }
}

/**
 * This function fills the user-specified array with pseudorandom
 * integers.
 *
 * @param array an 128-bit array to be filled by pseudorandom numbers.  
 * @param size number of 128-bit pseudorandom numbers to be generated.
 */
template<u32 MEXP>
inline void gen_rand_array(CSFMT<MEXP>* lpct, w128_t *array, int size) {
    int i, j;
	int POS1 = lpct->POS1;
	w128_t* sfmt = lpct->m_psfmt;
    w128_t *r1, *r2;

	r1 = &sfmt[CSFMT<MEXP>::N - 2];
    r2 = &sfmt[CSFMT<MEXP>::N - 1];
    for (i = 0; i < CSFMT<MEXP>::N - POS1; i++) {
		do_recursion(lpct, &array[i], &sfmt[i], &sfmt[i + POS1], r1, r2);
		r1 = r2;
		r2 = &array[i];
    }
    for (; i < CSFMT<MEXP>::N; i++) {
		do_recursion(lpct, &array[i], &sfmt[i], &array[i + POS1 - CSFMT<MEXP>::N], r1, r2);
		r1 = r2;
		r2 = &array[i];
    }
    for (; i < size - CSFMT<MEXP>::N; i++) {
		do_recursion(lpct, &array[i], &array[i - CSFMT<MEXP>::N], &array[i + POS1 - CSFMT<MEXP>::N], r1, r2);
		r1 = r2;
		r2 = &array[i];
    }
    for (j = 0; j < 2 * CSFMT<MEXP>::N - size; j++) {
		sfmt[j] = array[j + size - CSFMT<MEXP>::N];
    }
    for (; i < size; i++, j++) {
		do_recursion(lpct, &array[i], &array[i - CSFMT<MEXP>::N], &array[i + POS1 - CSFMT<MEXP>::N], r1, r2);
		r1 = r2;
		r2 = &array[i];
		sfmt[j] = array[i];
    }
}
#endif

#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
inline static void swap(w128_t *array, int size) {
    int i;
    uint32_t x, y;

    for (i = 0; i < size; i++) {
	x = array[i].u[0];
	y = array[i].u[2];
	array[i].u[0] = array[i].u[1];
	array[i].u[2] = array[i].u[3];
	array[i].u[1] = x;
	array[i].u[3] = y;
    }
}
#endif
/**
 * This function represents a function used in the initialization
 * by init_by_array
 * @param x 32-bit integer
 * @return 32-bit integer
 */
static uint32_t func1(uint32_t x) {
    return (x ^ (x >> 27)) * (uint32_t)1664525UL;
}

/**
 * This function represents a function used in the initialization
 * by init_by_array
 * @param x 32-bit integer
 * @return 32-bit integer
 */
static uint32_t func2(uint32_t x) {
    return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
}

/**
 * This function certificate the period of 2^{MEXP}
 */
template<u32 MEXP>
void period_certification(CSFMT<MEXP>* lpct) {
    u32 inner = 0;
    int i, j;
    uint32_t work;

    for (i = 0; i < 4; i++)
		inner ^= lpct->m_psfmt32[idxof(i)] & lpct->m_context.parity[i];
    for (i = 16; i > 0; i >>= 1)
	inner ^= inner >> i;
    inner &= 1;
    /* check OK */
    if (inner == 1) {
	return;
    }
    /* check NG, and modification */
    for (i = 0; i < 4; i++) {
	work = 1;
	for (j = 0; j < 32; j++) {
	    if ((work & lpct->m_context.parity[i]) != 0) {
		lpct->m_psfmt32[idxof(i)] ^= work;
		return;
	    }
	    work = work << 1;
	}
    }
}

/*----------------
  PUBLIC FUNCTIONS
  ----------------*/
/**
 * This function returns the identification string.
 * The string shows the word size, the Mersenne exponent,
 * and all parameters of this generator.
 */
//const char *get_idstring(void) {
//    return IDSTR;
//}

#ifndef ONLY64
/**
 * This function generates pseudorandom 32-bit integers in the
 * specified array[] by one call. The number of pseudorandom integers
 * is specified by the argument size, which must be at least 624 and a
 * multiple of four.  The generation by this function is much faster
 * than the following gen_rand function.
 *
 * For initialization, init_gen_rand or init_by_array must be called
 * before the first call of this function. This function can not be
 * used after calling gen_rand function, without initialization.
 *
 * @param array an array where pseudorandom 32-bit integers are filled
 * by this function.  The pointer to the array must be \b "aligned"
 * (namely, must be a multiple of 16) in the SIMD version, since it
 * refers to the address of a 128-bit integer.  In the standard C
 * version, the pointer is arbitrary.
 *
 * @param size the number of 32-bit pseudorandom integers to be
 * generated.  size must be a multiple of 4, and greater than or equal
 * to (MEXP / 128 + 1) * 4.
 *
 * @note \b memalign or \b posix_memalign is available to get aligned
 * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
 * returns the pointer to the aligned memory block.
 */
template<u32 MEXP>
void fill_array32(CSFMT<MEXP>* lpct, uint32_t *array, int size) {
	IRIS_ASSERT(lpct->m_initialized);
	IRIS_ASSERT(lpct->m_idx == CSFMT<MEXP>::N32);
    IRIS_ASSERT(size % 4 == 0);
	IRIS_ASSERT(size >= CSFMT<MEXP>::N32);

    gen_rand_array(lpct, (w128_t *)array, size / 4);
	lpct->m_idx = CSFMT<MEXP>::N32;
}
#endif

/**
 * This function generates pseudorandom 64-bit integers in the
 * specified array[] by one call. The number of pseudorandom integers
 * is specified by the argument size, which must be at least 312 and a
 * multiple of two.  The generation by this function is much faster
 * than the following gen_rand function.
 *
 * For initialization, init_gen_rand or init_by_array must be called
 * before the first call of this function. This function can not be
 * used after calling gen_rand function, without initialization.
 *
 * @param array an array where pseudorandom 64-bit integers are filled
 * by this function.  The pointer to the array must be "aligned"
 * (namely, must be a multiple of 16) in the SIMD version, since it
 * refers to the address of a 128-bit integer.  In the standard C
 * version, the pointer is arbitrary.
 *
 * @param size the number of 64-bit pseudorandom integers to be
 * generated.  size must be a multiple of 2, and greater than or equal
 * to (MEXP / 128 + 1) * 2
 *
 * @note \b memalign or \b posix_memalign is available to get aligned
 * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
 * returns the pointer to the aligned memory block.
 */
template<u32 MEXP>
void fill_array64(CSFMT<MEXP> lpct, uint64_t *array, int size) {
    IRIS_ASSERT(lpct->m_initialized);
	IRIS_ASSERT(lpct->m_idx == CSFMT<MEXP>::N32);
    IRIS_ASSERT(size % 2 == 0);
	IRIS_ASSERT(size >= CSFMT<MEXP>::N64);

    gen_rand_array(lpct, (w128_t *)array, size / 2);
	lpct->m_idx = CSFMT<MEXP>::N32;

#if defined(BIG_ENDIAN64) && !defined(ONLY64)
    swap((w128_t *)array, size /2);
#endif
}

}	// end of namespace 

//======================================================================
// class
/**********************************************************************//**
 *
 * RXgN^
 *
*//***********************************************************************/
template<u32 MEXP>
CSFMT<MEXP>::CSFMT(void)
: m_psfmt32(nullptr)
#if !defined(BIG_ENDIAN64) || defined(ONLY64)
, m_psfmt64(nullptr) 
#endif
, m_idx(0)
, m_initialized(false)
{
	ZeroMemory(&m_context, sizeof(m_context));
	m_psfmt32 = &m_psfmt[0].u[0];
#if !defined(BIG_ENDIAN64) || defined(ONLY64)
	m_psfmt64 = (uint64_t*)&m_psfmt[0].u[0];
#endif
	SetSeed(19650218UL);
}

template<>
CSFMT<607>::CSFMT(void)
: m_psfmt32(nullptr)
#if !defined(BIG_ENDIAN64) || defined(ONLY64)
, m_psfmt64(nullptr) 
#endif
, m_idx(0)
, m_initialized(false)
{
	m_context.POS1 = 2;
	m_context.SL1 = 15;
	m_context.SL2 = 3;
	m_context.SR1 = 13;
	m_context.SR2 = 3;
	m_context.MSK1 = 0xfdff37ffU;
	m_context.MSK2 = 0xef7f3f7dU;
	m_context.MSK3 = 0xff777b7dU;
	m_context.MSK4 = 0x7ff7fb2fU;
	m_context.parity[0] = 0x00000001U;
	m_context.parity[1] = 0x00000000U;
	m_context.parity[2] = 0x00000000U;
	m_context.parity[3] = 0x5986f054U;
	m_psfmt32 = &m_psfmt[0].u[0];
#if !defined(BIG_ENDIAN64) || defined(ONLY64)
	m_psfmt64 = (uint64_t*)&m_psfmt[0].u[0];
#endif
	SetSeed(19650218UL);
}

template<>
CSFMT<1279>::CSFMT(void)
: m_psfmt32(nullptr)
#if !defined(BIG_ENDIAN64) || defined(ONLY64)
, m_psfmt64(nullptr) 
#endif
, m_idx(0)
, m_initialized(false)
{
	m_context.POS1 = 7;
	m_context.SL1 = 14;
	m_context.SL2 = 3;
	m_context.SR1 = 5;
	m_context.SR2 = 1;
	m_context.MSK1 = 0xf7fefffdU;
	m_context.MSK2 = 0x7fefcfffU;
	m_context.MSK3 = 0xaff3ef3fU;
	m_context.MSK4 = 0xb5ffff7fU;
	m_context.parity[0] = 0x00000001U;
	m_context.parity[1] = 0x00000000U;
	m_context.parity[2] = 0x00000000U;
	m_context.parity[3] = 0x20000000U;
	m_psfmt32 = &m_psfmt[0].u[0];
#if !defined(BIG_ENDIAN64) || defined(ONLY64)
	m_psfmt64 = (uint64_t*)&m_psfmt[0].u[0];
#endif
	SetSeed(19650218UL);
}

template<>
CSFMT<19937>::CSFMT(void)
: m_psfmt32(nullptr)
#if !defined(BIG_ENDIAN64) || defined(ONLY64)
, m_psfmt64(nullptr) 
#endif
, m_idx(0)
, m_initialized(false)
{
	m_context.POS1 = 122;
	m_context.SL1 = 18;
	m_context.SL2 = 1;
	m_context.SR1 = 11;
	m_context.SR2 = 1;
	m_context.MSK1 = 0xdfffffefU;
	m_context.MSK2 = 0xddfecb7fU;
	m_context.MSK3 = 0xbffaffffU;
	m_context.MSK4 = 0xbffffff6U;
	m_context.parity[0] = 0x00000001U;
	m_context.parity[1] = 0x00000000U;
	m_context.parity[2] = 0x00000000U;
	m_context.parity[3] = 0x13c9e684U;
	m_psfmt32 = &m_psfmt[0].u[0];
#if !defined(BIG_ENDIAN64) || defined(ONLY64)
	m_psfmt64 = (uint64_t*)&m_psfmt[0].u[0];
#endif
	SetSeed(19650218UL);
}


/**********************************************************************//**
 *
 * ̐ݒ
 *
 -----------------------------------------------------------------------
 * @param [in]	seed	= 
*//***********************************************************************/
template<u32 MEXP>
void CSFMT<MEXP>::SetSeed(u32 seed)
{
	IRand::SetSeed(seed);
    int i;
    m_psfmt32[rnd::idxof(0)] = seed;
    for (i = 1; i < N32; i++) {
	m_psfmt32[rnd::idxof(i)] = 1812433253UL * (m_psfmt32[rnd::idxof(i - 1)] 
					    ^ (m_psfmt32[rnd::idxof(i - 1)] >> 30))
	    + i;
    }
    m_idx = N32;
    period_certification<MEXP>(this);
    m_initialized = 1;
}

/**********************************************************************//**
 *
 * ̐ݒ
 *
 -----------------------------------------------------------------------
 * @param [in]	seed	= 
*//***********************************************************************/
template<u32 MEXP>
void CSFMT<MEXP>::SetArraySeed(u32 *init_key, s32 key_length)
{
	int i, j, count;
	uint32_t r;
	int lag;
	int mid;
	int size = N * 4;
	w128_t* sfmt = m_psfmt;
	uint32_t* psfmt32 = m_psfmt32;

	if (size >= 623) {
		lag = 11;
	} else if (size >= 68) {
		lag = 7;
	} else if (size >= 39) {
		lag = 5;
	} else {
		lag = 3;
	}
	mid = (size - lag) / 2;

	memset(sfmt, 0x8b, sizeof(m_psfmt));
	if (key_length + 1 > N32) {
		count = key_length + 1;
	} else {
		count = N32;
	}
	r = rnd::func1(psfmt32[rnd::idxof(0)] ^ psfmt32[rnd::idxof(mid)] 
	^ psfmt32[ rnd::idxof(N32 - 1) ]);
	psfmt32[rnd::idxof(mid)] += r;
	r += key_length;
	psfmt32[rnd::idxof(mid + lag)] += r;
	psfmt32[rnd::idxof(0)] = r;

	count--;
	for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
		r = rnd::func1(psfmt32[rnd::idxof(i)] ^ psfmt32[rnd::idxof((i + mid) % N32)] 
		^ psfmt32[rnd::idxof((i + N32 - 1) % N32)]);
		psfmt32[rnd::idxof((i + mid) % N32)] += r;
		r += init_key[j] + i;
		psfmt32[rnd::idxof((i + mid + lag) % N32)] += r;
		psfmt32[rnd::idxof(i)] = r;
		i = (i + 1) % N32;
	}
	for (; j < count; j++) {
		r = rnd::func1(psfmt32[rnd::idxof(i)] ^ psfmt32[rnd::idxof((i + mid) % N32)] 
		^ psfmt32[rnd::idxof((i + N32 - 1) % N32)]);
		psfmt32[rnd::idxof((i + mid) % N32)] += r;
		r += i;
		psfmt32[rnd::idxof((i + mid + lag) % N32)] += r;
		psfmt32[rnd::idxof(i)] = r;
		i = (i + 1) % N32;
	}
	for (j = 0; j < N32; j++) {
		r = rnd::func2(psfmt32[rnd::idxof(i)] + psfmt32[rnd::idxof((i + mid) % N32)] 
		+ psfmt32[rnd::idxof((i + N32 - 1) % N32)]);
		psfmt32[rnd::idxof((i + mid) % N32)] ^= r;
		r -= i;
		psfmt32[rnd::idxof((i + mid + lag) % N32)] ^= r;
		psfmt32[rnd::idxof(i)] = r;
		i = (i + 1) % N32;
	}

	m_idx = N32;
	period_certification<MEXP>(this);
	m_initialized = 1;
}

/**********************************************************************//**
 *
 * 32bit ̐
 *
*//***********************************************************************/
template<u32 MEXP>
u32 CSFMT<MEXP>::GenRand32(void)
{
    uint32_t r;

    IRIS_ASSERT(m_initialized);
	if (m_idx >= N32) {
		gen_rand_all<MEXP>(this);
		m_idx = 0;
    }
    r = m_psfmt32[m_idx++];
    return r;
}

/**********************************************************************//**
 *
 * 64bit ̐
 *
*//***********************************************************************/
template<u32 MEXP>
u64 CSFMT<MEXP>::GenRand64(void)
{
#if defined(BIG_ENDIAN64) && !defined(ONLY64)
    uint32_t r1, r2;
#else
    uint64_t r;
#endif
    IRIS_ASSERT(m_initialized);
    IRIS_ASSERT(m_idx % 2 == 0);

    if (m_idx >= N32) {
	gen_rand_all<MEXP>(this);
	m_idx = 0;
    }
#if defined(BIG_ENDIAN64) && !defined(ONLY64)
    r1 = m_psfmt32[lpct->m_idx];
    r2 = m_psfmt32[lpct->m_idx + 1];
    m_idx += 2;
    return ((uint64_t)r2 << 32) | r1;
#else
    r = m_psfmt64[m_idx / 2];
    m_idx += 2;
    return r;
#endif
}

// `
template class CSFMT<607>;
template class CSFMT<1279>;
template class CSFMT<19937>;

}	// end of namespace rnd
}	// end of namespace math
}	// end of namespace iris

