//======================================================================
//-----------------------------------------------------------------------
/**
 * @file		MathMatrix44.inl
 * @brief		4x4}gbNXt@C
 *
 * @author		t.sirayanagi
 * @version		1.0
 *
 * @par			copyright
 * Copyright (C) 2009-2011 Takazumi Shirayanagi\n
 * The new BSD License is applied to this software.
 * see iris_LICENSE.txt
*/
//-----------------------------------------------------------------------
//======================================================================
#ifndef INCG_IRIS_MathMatrix44_inl_
#define INCG_IRIS_MathMatrix44_inl_

namespace iris {
namespace math
{

//======================================================================
// function
/**
 * @brief	Pʃ}gbNX̐
 * @param [out]	pm0	= o̓}gbNX
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44* FpuMtx44Unit(IrisFMtx44* pm0)
{
	MATH_FPU_NULLASSERT( pm0 );
	pm0->x.x = pm0->y.y = pm0->z.z = pm0->w.w = 1.0f;
	pm0->x.y = pm0->x.z = pm0->x.w = 
	pm0->y.x = pm0->y.z = pm0->y.w =
	pm0->z.x = pm0->z.y = pm0->z.w = 
	pm0->w.x = pm0->w.y = pm0->w.z = 0.0f;
	return pm0;
}

/**
 * @brief	[s̐
 * @param [out]	pm0	= o̓}gbNX
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44Zero(IrisFMtx44* pm0)
{
	MATH_FPU_NULLASSERT( pm0 );
	FpuVec4Zero(&pm0->x);
	FpuVec4Zero(&pm0->y);
	FpuVec4Zero(&pm0->z);
	FpuVec4Zero(&pm0->w);
	return pm0;
}

/**
 * @brief	}gbNX̃Rs[
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44Copy(IrisFMtx44* pm0, const IrisFMtx44* pm1)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pm1 );
#if 1
	pm0->_00 = pm1->_00;
	pm0->_01 = pm1->_01;
	pm0->_02 = pm1->_02;
	pm0->_03 = pm1->_03;
	pm0->_10 = pm1->_10;
	pm0->_11 = pm1->_11;
	pm0->_12 = pm1->_12;
	pm0->_13 = pm1->_13;
	pm0->_20 = pm1->_20;
	pm0->_21 = pm1->_21;
	pm0->_22 = pm1->_22;
	pm0->_23 = pm1->_23;
	pm0->_30 = pm1->_30;
	pm0->_31 = pm1->_31;
	pm0->_32 = pm1->_32;
	pm0->_33 = pm1->_33;
#else
	FpuVec4Copy(&pm0->x, &pm1->x);
	FpuVec4Copy(&pm0->y, &pm1->y);
	FpuVec4Copy(&pm0->z, &pm1->z);
	FpuVec4Copy(&pm0->w, &pm1->w);
#endif
	return pm0;
}

/**
 * @brief	xNg̕sړʕݒ
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pv0	= ̓xNg
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44SetTransfer(IrisFMtx44* pm0, const IrisFVec4* pv0)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pv0 );
	pm0->w.x = pv0->x;
	pm0->w.y = pv0->y;
	pm0->w.z = pv0->z;
	pm0->w.w = pv0->w;
	return pm0;
}

/**
 * @brief	xNg̕sړʕ擾
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pv0	= ̓xNg
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFVec4*		FpuMtx44GetTransfer(IrisFVec4* pv0, const IrisFMtx44* pm0)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pv0 );
	pv0->x = pm0->w.x;
	pv0->y = pm0->w.y;
	pv0->z = pm0->w.z;
	pv0->w = pm0->w.w;
	return pv0;
}

/**
 * @brief	}gbNX̕sړ
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	pv0	= ړxNg
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44Transfer(IrisFMtx44* pm0, const IrisFMtx44* pm1, const IrisFVec4* pv0)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pm1 );
	MATH_FPU_NULLASSERT( pv0 );
	pm0->x.x = pm1->x.x;
	pm0->x.y = pm1->x.y;
	pm0->x.z = pm1->x.z;
	pm0->x.w = pm1->x.w;
	pm0->y.x = pm1->y.x;
	pm0->y.y = pm1->y.y;
	pm0->y.z = pm1->y.z;
	pm0->y.w = pm1->y.w;
	pm0->z.x = pm1->z.x;
	pm0->z.y = pm1->z.y;
	pm0->z.z = pm1->z.z;
	pm0->z.w = pm1->z.w;
	pm0->w.x = pm1->w.x + pv0->x;
	pm0->w.y = pm1->w.y + pv0->y;
	pm0->w.z = pm1->w.z + pv0->z;
	pm0->w.w = pv0->w;
	return pm0;
}

/**
 * @brief	xNgɃ}gbNXZ
 * @param [out]	pv0	= o̓xNg
 * @param [in]	pm0	= ̓}gbNX
 * @param [in]	pv1	= ̓xNg
 * @return	o̓xNg
*/
IRIS_FPU_INLINE IrisFVec4*		FpuMtx44Transform(IrisFVec4* pv0, const IrisFMtx44* pm0, const IrisFVec4* pv1)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pv0 );
	MATH_FPU_NULLASSERT( pv1 );
	f32 x = pv1->x;
	f32 y = pv1->y;
	f32 z = pv1->z;
	f32 w = pv1->w;
	pv0->x = F32_Mul(pm0->x.x, x) + F32_Mul(pm0->y.x, y) + F32_Mul(pm0->z.x, z) + F32_Mul(pm0->w.x, w);
	pv0->y = F32_Mul(pm0->x.y, x) + F32_Mul(pm0->y.y, y) + F32_Mul(pm0->z.y, z) + F32_Mul(pm0->w.y, w);
	pv0->z = F32_Mul(pm0->x.z, x) + F32_Mul(pm0->y.z, y) + F32_Mul(pm0->z.z, z) + F32_Mul(pm0->w.z, w);
	pv0->w = F32_Mul(pm0->x.w, x) + F32_Mul(pm0->y.w, y) + F32_Mul(pm0->z.w, z) + F32_Mul(pm0->w.w, w);
	return pv0;
}

/**
 * @brief	xNgɃ}gbNXZ
 * @param [out]	pv0	= o̓xNg
 * @param [in]	pm0	= ̓}gbNX
 * @param [in]	pv1	= ̓xNg
 * @return	o̓xNg
*/
IRIS_FPU_INLINE IrisFVec4*		FpuMtx44TransformXYZ(IrisFVec4* pv0, const IrisFMtx44* pm0, const IrisFVec4* pv1)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pv0 );
	MATH_FPU_NULLASSERT( pv1 );
	f32 x = pv1->x;
	f32 y = pv1->y;
	f32 z = pv1->z;
	pv0->x = F32_Mul(pm0->x.x, x) + F32_Mul(pm0->y.x, y) + F32_Mul(pm0->z.x, z);
	pv0->y = F32_Mul(pm0->x.y, x) + F32_Mul(pm0->y.y, y) + F32_Mul(pm0->z.y, z);
	pv0->z = F32_Mul(pm0->x.z, x) + F32_Mul(pm0->y.z, y) + F32_Mul(pm0->z.z, z);
	pv0->w = F32_Mul(pm0->x.w, x) + F32_Mul(pm0->y.w, y) + F32_Mul(pm0->z.w, z);
	return pv0;
}

/**
 * @brief	xNgɃ}gbNXZ
 * @param [out]	pv0	= o̓xNg
 * @param [in]	pm0	= ̓}gbNX
 * @param [in]	pv1	= ̓xNg
 * @return	o̓xNg
*/
IRIS_FPU_INLINE IrisFVec4*		FpuMtx44HomogeneousTransform(IrisFVec4* pv0, const IrisFMtx44* pm0, const IrisFVec4* pv1)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pv0 );
	MATH_FPU_NULLASSERT( pv1 );
	IRIS_ASSERT( pv0 != pv1 );	// TODO : AhXɖΉ
	pv0->x = F32_Mul(pm0->x.x, pv1->x) + F32_Mul(pm0->y.x, pv1->y) + F32_Mul(pm0->z.x, pv1->z) + pv1->w;
	pv0->y = F32_Mul(pm0->x.y, pv1->x) + F32_Mul(pm0->y.y, pv1->y) + F32_Mul(pm0->z.y, pv1->z) + pv1->w;
	pv0->z = F32_Mul(pm0->x.z, pv1->x) + F32_Mul(pm0->y.z, pv1->y) + F32_Mul(pm0->z.z, pv1->z) + pv1->w;
	pv0->w = F32_Mul(pm0->x.w, pv1->x) + F32_Mul(pm0->y.w, pv1->y) + F32_Mul(pm0->z.w, pv1->z) + pv1->w;
	return pv0;
}

/**
 * @brief	Q̃}gbNX̐
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	pm2	= ̓}gbNX
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44Mul(IrisFMtx44* pm0, const IrisFMtx44* pm1, const IrisFMtx44* pm2)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pm1 );
	MATH_FPU_NULLASSERT( pm2 );
	IRIS_ASSERT( pm0 != pm1 );
	IRIS_ASSERT( pm0 != pm2 );
	pm0->x.x = F32_Mul(pm1->x.x, pm2->x.x) + F32_Mul(pm1->y.x, pm2->x.y) + F32_Mul(pm1->z.x, pm2->x.z) + F32_Mul(pm1->w.x, pm2->x.w);
	pm0->x.y = F32_Mul(pm1->x.y, pm2->x.x) + F32_Mul(pm1->y.y, pm2->x.y) + F32_Mul(pm1->z.y, pm2->x.z) + F32_Mul(pm1->w.y, pm2->x.w);
	pm0->x.z = F32_Mul(pm1->x.z, pm2->x.x) + F32_Mul(pm1->y.z, pm2->x.y) + F32_Mul(pm1->z.z, pm2->x.z) + F32_Mul(pm1->w.z, pm2->x.w);
	pm0->x.w = F32_Mul(pm1->x.w, pm2->x.x) + F32_Mul(pm1->y.w, pm2->x.y) + F32_Mul(pm1->z.w, pm2->x.z) + F32_Mul(pm1->w.w, pm2->x.w);
	pm0->y.x = F32_Mul(pm1->x.x, pm2->y.x) + F32_Mul(pm1->y.x, pm2->y.y) + F32_Mul(pm1->z.x, pm2->y.z) + F32_Mul(pm1->w.x, pm2->y.w);
	pm0->y.y = F32_Mul(pm1->x.y, pm2->y.x) + F32_Mul(pm1->y.y, pm2->y.y) + F32_Mul(pm1->z.y, pm2->y.z) + F32_Mul(pm1->w.y, pm2->y.w);
	pm0->y.z = F32_Mul(pm1->x.z, pm2->y.x) + F32_Mul(pm1->y.z, pm2->y.y) + F32_Mul(pm1->z.z, pm2->y.z) + F32_Mul(pm1->w.z, pm2->y.w);
	pm0->y.w = F32_Mul(pm1->x.w, pm2->y.x) + F32_Mul(pm1->y.w, pm2->y.y) + F32_Mul(pm1->z.w, pm2->y.z) + F32_Mul(pm1->w.w, pm2->y.w);
	pm0->z.x = F32_Mul(pm1->x.x, pm2->z.x) + F32_Mul(pm1->y.x, pm2->z.y) + F32_Mul(pm1->z.x, pm2->z.z) + F32_Mul(pm1->w.x, pm2->z.w);
	pm0->z.y = F32_Mul(pm1->x.y, pm2->z.x) + F32_Mul(pm1->y.y, pm2->z.y) + F32_Mul(pm1->z.y, pm2->z.z) + F32_Mul(pm1->w.y, pm2->z.w);
	pm0->z.z = F32_Mul(pm1->x.z, pm2->z.x) + F32_Mul(pm1->y.z, pm2->z.y) + F32_Mul(pm1->z.z, pm2->z.z) + F32_Mul(pm1->w.z, pm2->z.w);
	pm0->z.w = F32_Mul(pm1->x.w, pm2->z.x) + F32_Mul(pm1->y.w, pm2->z.y) + F32_Mul(pm1->z.w, pm2->z.z) + F32_Mul(pm1->w.w, pm2->z.w);
	return pm0;
}

/**
 * @brief	}gbNX̃XP[O
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	s	= XJ[l
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44Scale(IrisFMtx44* pm0, const IrisFMtx44* pm1, f32 s)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pm1 );
	FpuVec4Scale(&pm0->x, &pm1->x, s);
	FpuVec4Scale(&pm0->y, &pm1->y, s);
	FpuVec4Scale(&pm0->z, &pm1->z, s);
	FpuVec4Scale(&pm0->w, &pm1->w, s);
	return pm0;
}

/**
 * @brief	}gbNX̓]us߂
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44Transpose(IrisFMtx44* pm0, const IrisFMtx44* pm1)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pm1 );
	pm0->x.x = pm1->x.x;
	f32 f01 = pm1->x.y;
	f32 f02 = pm1->x.z;
	f32 f03 = pm1->x.w;
	pm0->x.y = pm1->y.x;
	f32 f11 = pm1->y.y;
	f32 f12 = pm1->y.z;
	f32 f13 = pm1->y.w;
	pm0->x.z = pm1->z.x;
	f32 f21 = pm1->z.y;
	f32 f22 = pm1->z.z;
	f32 f23 = pm1->z.w;
	pm0->x.w = pm1->w.x;
	pm0->y.x = f01;
	pm0->y.y = f11;
	pm0->y.z = f21;
	pm0->y.w = pm1->w.y;
	pm0->z.x = f02;
	pm0->z.y = f12;
	pm0->z.z = f22;
	pm0->z.w = pm1->w.z;
	pm0->w.x = f03;
	pm0->w.y = f13;
	pm0->w.z = f23;
	pm0->w.w = pm1->w.w;
	return pm0;
}

/**
 * @brief	}gbNXZ]
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	rz	= Z]ʁiWAj
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44RotZ(IrisFMtx44* pm0, const IrisFMtx44* pm1, f32 rz)
{
	MATH_FPU_NULLASSERT( pm0 );
	f32 c = F32_Cos(rz);
	f32 s = F32_Sin(rz);
	if( pm1 == nullptr ) 
	{
		pm0->x.x = +c;
		pm0->x.y = +s;
		pm0->x.z = 0.0f;
		pm0->x.w = 0.0f;
		pm0->y.x = -s;
		pm0->y.y = +c;
		pm0->y.z = 0.0f;
		pm0->y.w = 0.0f;
		pm0->z.x = 0.0f;
		pm0->z.y = 0.0f;
		pm0->z.z = 1.0f;
		pm0->z.w = 0.0f;
		pm0->w.x = 0.0f;
		pm0->w.y = 0.0f;
		pm0->w.z = 0.0f;
		pm0->w.w = 1.0f;
	}
	else
	{
		IrisFMtx44 m;
		m.x.x = +c;
		m.x.y = +s;
		m.x.z = 0.0f;
		m.x.w = 0.0f;
		m.y.x = -s;
		m.y.y = +c;
		m.y.z = 0.0f;
		m.y.w = 0.0f;
		m.z.x = 0.0f;
		m.z.y = 0.0f;
		m.z.z = 1.0f;
		m.z.w = 0.0f;
		m.w.x = 0.0f;
		m.w.y = 0.0f;
		m.w.z = 0.0f;
		m.w.w = 1.0f;
		FpuMtx44Mul(pm0, &m, pm1);
	}
	return pm0;
}

/**
 * @brief	}gbNXZ]
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	rz	= Z]ʁiWAj
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44RotIdxZ(IrisFMtx44* pm0, const IrisFMtx44* pm1, u16 idz)
{
	MATH_FPU_NULLASSERT( pm0 );
	f32 c = F32_CosIdx(idz);
	f32 s = F32_SinIdx(idz);
	if( pm1 == nullptr ) 
	{
		pm0->x.x = +c;
		pm0->x.y = +s;
		pm0->x.z = 0.0f;
		pm0->x.w = 0.0f;
		pm0->y.x = -s;
		pm0->y.y = +c;
		pm0->y.z = 0.0f;
		pm0->y.w = 0.0f;
		pm0->z.x = 0.0f;
		pm0->z.y = 0.0f;
		pm0->z.z = 1.0f;
		pm0->z.w = 0.0f;
		pm0->w.x = 0.0f;
		pm0->w.y = 0.0f;
		pm0->w.z = 0.0f;
		pm0->w.w = 1.0f;
	}
	else
	{
		IrisFMtx44 m;
		m.x.x = +c;
		m.x.y = +s;
		m.x.z = 0.0f;
		m.x.w = 0.0f;
		m.y.x = -s;
		m.y.y = +c;
		m.y.z = 0.0f;
		m.y.w = 0.0f;
		m.z.x = 0.0f;
		m.z.y = 0.0f;
		m.z.z = 1.0f;
		m.z.w = 0.0f;
		m.w.x = 0.0f;
		m.w.y = 0.0f;
		m.w.z = 0.0f;
		m.w.w = 1.0f;
		FpuMtx44Mul(pm0, &m, pm1);
	}
	return pm0;
}

/**
 * @brief	}gbNXY]
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	ry	= Y]ʁiWAj
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44RotY(IrisFMtx44* pm0, const IrisFMtx44* pm1, f32 ry)
{
	MATH_FPU_NULLASSERT( pm0 );
	f32 c = F32_Cos(ry);
	f32 s = F32_Sin(ry);
	if( pm1 == nullptr ) 
	{
		pm0->x.x = +c;
		pm0->x.y = 0.0f;
		pm0->x.z = +s;
		pm0->x.w = 0.0f;
		pm0->y.x = 0.0f;
		pm0->y.y = 1.0f;
		pm0->y.z = 0.0f;
		pm0->y.w = 0.0f;
		pm0->z.x = -s;
		pm0->z.y = 0.0f;
		pm0->z.z = +c;
		pm0->z.w = 0.0f;
		pm0->w.x = 0.0f;
		pm0->w.y = 0.0f;
		pm0->w.z = 0.0f;
		pm0->w.w = 1.0f;
	}
	else
	{
		IrisFMtx44 m;
		m.x.x = +c;
		m.x.y = 0.0f;
		m.x.z = +s;
		m.x.w = 0.0f;
		m.y.x = 0.0f;
		m.y.y = 1.0f;
		m.y.z = 0.0f;
		m.y.w = 0.0f;
		m.z.x = -s;
		m.z.y = 0.0f;
		m.z.z = +c;
		m.z.w = 0.0f;
		m.w.x = 0.0f;
		m.w.y = 0.0f;
		m.w.z = 0.0f;
		m.w.w = 1.0f;
		FpuMtx44Mul(pm0, &m, pm1);
	}
	return pm0;
}

/**
 * @brief	}gbNXY]
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	idy	= Y]ʁiCfbNXj
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44RotIdxY(IrisFMtx44* pm0, const IrisFMtx44* pm1, u16 idy)
{
	MATH_FPU_NULLASSERT( pm0 );
	f32 c = F32_CosIdx(idy);
	f32 s = F32_SinIdx(idy);
	if( pm1 == nullptr ) 
	{
		pm0->x.x = +c;
		pm0->x.y = 0.0f;
		pm0->x.z = +s;
		pm0->x.w = 0.0f;
		pm0->y.x = 0.0f;
		pm0->y.y = 1.0f;
		pm0->y.z = 0.0f;
		pm0->y.w = 0.0f;
		pm0->z.x = -s;
		pm0->z.y = 0.0f;
		pm0->z.z = +c;
		pm0->z.w = 0.0f;
		pm0->w.x = 0.0f;
		pm0->w.y = 0.0f;
		pm0->w.z = 0.0f;
		pm0->w.w = 1.0f;
	}
	else
	{
		IrisFMtx44 m;
		m.x.x = +c;
		m.x.y = 0.0f;
		m.x.z = +s;
		m.x.w = 0.0f;
		m.y.x = 0.0f;
		m.y.y = 1.0f;
		m.y.z = 0.0f;
		m.y.w = 0.0f;
		m.z.x = -s;
		m.z.y = 0.0f;
		m.z.z = +c;
		m.z.w = 0.0f;
		m.w.x = 0.0f;
		m.w.y = 0.0f;
		m.w.z = 0.0f;
		m.w.w = 1.0f;
		FpuMtx44Mul(pm0, &m, pm1);
	}
	return pm0;
}

/**
 * @brief	}gbNXX]
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	rx	= X]ʁiWAj
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44RotX(IrisFMtx44* pm0, const IrisFMtx44* pm1, f32 rx)
{
	MATH_FPU_NULLASSERT( pm0 );
	f32 c = F32_Cos(rx);
	f32 s = F32_Sin(rx);
	if( pm1 == nullptr ) 
	{
		pm0->x.x = 1.0f;
		pm0->x.y = 0.0f;
		pm0->x.z = 0.0f;
		pm0->x.w = 0.0f;
		pm0->y.x = 0.0f;
		pm0->y.y = +c;
		pm0->y.z = +s;
		pm0->y.w = 0.0f;
		pm0->z.x = 0.0f;
		pm0->z.y = -s;
		pm0->z.z = +c;
		pm0->z.w = 0.0f;
		pm0->w.x = 0.0f;
		pm0->w.y = 0.0f;
		pm0->w.z = 0.0f;
		pm0->w.w = 1.0f;
	}
	else
	{
		IrisFMtx44 m;
		m.x.x = 1.0f;
		m.x.y = 0.0f;
		m.x.z = 0.0f;
		m.x.w = 0.0f;
		m.y.x = 0.0f;
		m.y.y = +c;
		m.y.z = +s;
		m.y.w = 0.0f;
		m.z.x = 0.0f;
		m.z.y = -s;
		m.z.z = +c;
		m.z.w = 0.0f;
		m.w.x = 0.0f;
		m.w.y = 0.0f;
		m.w.z = 0.0f;
		m.w.w = 1.0f;
		FpuMtx44Mul(pm0, &m, pm1);
	}
	return pm0;
}

/**
 * @brief	}gbNXX]
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	idx	= X]ʁiCfbNXj
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44RotIdxX(IrisFMtx44* pm0, const IrisFMtx44* pm1, u16 idx)
{
	MATH_FPU_NULLASSERT( pm0 );
	f32 c = F32_CosIdx(idx);
	f32 s = F32_SinIdx(idx);
	if( pm1 == nullptr ) 
	{
		pm0->x.x = 1.0f;
		pm0->x.y = 0.0f;
		pm0->x.z = 0.0f;
		pm0->x.w = 0.0f;
		pm0->y.x = 0.0f;
		pm0->y.y = +c;
		pm0->y.z = +s;
		pm0->y.w = 0.0f;
		pm0->z.x = 0.0f;
		pm0->z.y = -s;
		pm0->z.z = +c;
		pm0->z.w = 0.0f;
		pm0->w.x = 0.0f;
		pm0->w.y = 0.0f;
		pm0->w.z = 0.0f;
		pm0->w.w = 1.0f;
	}
	else
	{
		IrisFMtx44 m;
		m.x.x = 1.0f;
		m.x.y = 0.0f;
		m.x.z = 0.0f;
		m.x.w = 0.0f;
		m.y.x = 0.0f;
		m.y.y = +c;
		m.y.z = +s;
		m.y.w = 0.0f;
		m.z.x = 0.0f;
		m.z.y = -s;
		m.z.z = +c;
		m.z.w = 0.0f;
		m.w.x = 0.0f;
		m.w.y = 0.0f;
		m.w.z = 0.0f;
		m.w.w = 1.0f;
		FpuMtx44Mul(pm0, &m, pm1);
	}
	return pm0;
}

/**
 * @brief	}gbNX̉]
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	pv0	= ]xNgiWAj
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44Rot(IrisFMtx44* pm0, const IrisFMtx44* pm1, const IrisFVec4* pv0)
{
	MATH_FPU_NULLASSERT( pv0 );
	FpuMtx44RotZ(pm0, pm1, pv0->z);
	FpuMtx44RotY(pm0, pm0, pv0->y);
	FpuMtx44RotX(pm0, pm0, pv0->x);
	return pm0;
}

/**
 * @brief	}gbNX̉]
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @param [in]	pv0	= ]xNgiCfbNXj
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44RotIdx(IrisFMtx44* pm0, const IrisFMtx44* pm1, const IrisSVec4* pv0)
{
	MATH_FPU_NULLASSERT( pv0 );
	FpuMtx44RotIdxZ(pm0, pm1, (u16)pv0->z);
	FpuMtx44RotIdxY(pm0, pm0, (u16)pv0->y);
	FpuMtx44RotIdxX(pm0, pm0, (u16)pv0->x);
	return pm0;
}

/**
 * @brief	PʍsɂȂĂ邩ǂ
 * @param [in]	pm0	= ̓}gbNX
 * @return	^Ul
*/
IRIS_FPU_INLINE IrisBool		FpuMtx44IsUnit(const IrisFMtx44* pm0)
{
	MATH_FPU_NULLASSERT( pm0 );
	const IrisMtx44* m = (const IrisMtx44*)(pm0);
	if( m->im.x.x != 0x3F800000 || m->im.y.y != 0x3F800000 || m->im.z.z != 0x3F800000 || m->im.w.w != 0x3F800000 )
		return IRIS_FALSE;
	if( (             m->im.x.y | m->im.x.z | m->im.x.w
		| m->im.y.x             | m->im.y.z | m->im.y.w
		| m->im.z.x | m->im.z.y             | m->im.z.w
		| m->im.w.x | m->im.w.y | m->im.w.z             ) != 0 )
		return IRIS_FALSE;
	return IRIS_TRUE;
}

/**
 * @brief	}gbNX̃g[XԂ
 * @param [in]	pm0	= ̓}gbNX
 * @return	}gbNX̃g[X
*/
IRIS_FPU_INLINE f32			FpuMtx44Trace(const IrisFMtx44* pm0)
{
	MATH_FPU_NULLASSERT( pm0 );
	return (pm0->x.x + pm0->y.y + pm0->z.z + pm0->w.w);
}

/**
 * @brief	s񎮂Ԃ
 * @param [in]	pm0	= ̓}gbNX
 * @return	s
*/
IRIS_FPU_INLINE f32			FpuMtx44Determinant(const IrisFMtx44* pm0)
{
	MATH_FPU_NULLASSERT( pm0 );
	f32 a21 = pm0->x.y;
	f32 a31 = pm0->x.z;
	f32 a41 = pm0->x.w;
	f32 a22 = pm0->y.y;
	f32 a32 = pm0->y.z;
	f32 a42 = pm0->y.w;
	f32 a23 = pm0->z.y;
	f32 a33 = pm0->z.z;
	f32 a43 = pm0->z.w;
	f32 a24 = pm0->w.y;
	f32 a34 = pm0->w.z;
	f32 a44 = pm0->w.w;

	f32 d1 = F32_Mul(pm0->x.x
		, (F32_Mul(F32_Mul(a22, a33), a44)
		+  F32_Mul(F32_Mul(a23, a34), a42)
		+  F32_Mul(F32_Mul(a24, a32), a43)
		-  F32_Mul(F32_Mul(a24, a33), a42)
		-  F32_Mul(F32_Mul(a23, a32), a44)
		-  F32_Mul(F32_Mul(a22, a34), a43)) );
	f32 d2 = F32_Mul(pm0->y.x
		, (F32_Mul(F32_Mul(a21, a33), a44)
		+  F32_Mul(F32_Mul(a23, a34), a41)
		+  F32_Mul(F32_Mul(a24, a31), a43)
		-  F32_Mul(F32_Mul(a24, a33), a41)
		-  F32_Mul(F32_Mul(a23, a31), a44)
		-  F32_Mul(F32_Mul(a21, a34), a43)) );
	f32 d3 = F32_Mul(pm0->z.x
		, (F32_Mul(F32_Mul(a21, a32), a44)
		+  F32_Mul(F32_Mul(a22, a34), a41)
		+  F32_Mul(F32_Mul(a24, a31), a42)
		-  F32_Mul(F32_Mul(a24, a32), a41)
		-  F32_Mul(F32_Mul(a22, a31), a44)
		-  F32_Mul(F32_Mul(a21, a34), a42)) );
	f32 d4 = F32_Mul(pm0->w.x
		, (F32_Mul(F32_Mul(a21, a32), a43)
		+  F32_Mul(F32_Mul(a22, a33), a41)
		+  F32_Mul(F32_Mul(a23, a31), a42)
		-  F32_Mul(F32_Mul(a23, a32), a41)
		-  F32_Mul(F32_Mul(a22, a31), a43)
		-  F32_Mul(F32_Mul(a21, a33), a42)) );
	return (d1 - d2 + d3 - d4);
}

/**
 * @brief	]qsԂ
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44Adjoint(IrisFMtx44* pm0, const IrisFMtx44* pm1)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pm1 );
	int i, j, k;
	IrisFVec4 v[3];
	IrisFMtx33 minor;
	IrisFMtx44  adjoint;
	for(i=0; i < 4; ++i)
	{
		for(j=0; j < 4; ++j)
		{
			if(i==0) 
			{
				v[0] = pm1->y;
				v[1] = pm1->z;
				v[2] = pm1->w;
			}
			else if(i==1)
			{
				v[0] = pm1->x;
				v[1] = pm1->z;
				v[2] = pm1->w;
			} 
			else if(i==2)
			{
				v[0] = pm1->x;
				v[1] = pm1->y;
				v[2] = pm1->w;
			} 
			else
			{
				v[0] = pm1->x;
				v[1] = pm1->y;
				v[2] = pm1->z;
			}
			if(j==0) 
			{
				for(k=0; k < 3; ++k)
				{
					v[k].x = v[k].y;
					v[k].y = v[k].z;
					v[k].z = v[k].w;
				}
			}
			else if(j==1) 
			{
				for(k=0; k < 3; ++k)
				{
					v[k].x = v[k].x;
					v[k].y = v[k].z;
					v[k].z = v[k].w;
				}
			} 
			else if(j==2) 
			{
				for(k=0; k < 3; ++k)
				{
					v[k].x = v[k].x;
					v[k].y = v[k].y;
					v[k].z = v[k].w;
				}
			} 
			else
			{
				for(k=0; k < 3; ++k)
				{
					v[k].x = v[k].x;
					v[k].y = v[k].y;
					v[k].z = v[k].z;
				}
			}
			minor.x.x = v[0].x;
			minor.x.y = v[0].y;
			minor.x.z = v[0].z;
			minor.y.x = v[1].x;
			minor.y.y = v[1].y;
			minor.y.z = v[1].z;
			minor.z.x = v[2].x;
			minor.z.y = v[2].y;
			minor.z.z = v[2].z;

			if((i ^ j) & 1)
			{
				adjoint.m[j][i] = -FpuMtx33Determinant(&minor);
			} 
			else
			{
				adjoint.m[j][i] =  FpuMtx33Determinant(&minor);
			}
		}
	}
	FpuMtx44Copy(pm0, &adjoint);
	return pm0;
}

/**
 * @brief	tsԂ(s͉]+sړƉ)
 * @param [out]	pm0				= o̓}gbNX
 * @param [in]	pm1				= ̓}gbNX
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44Inverse(IrisFMtx44* pm0, const IrisFMtx44* pm1)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pm1 );
	IrisFMtx44 m;
	IrisFVec4 v;
	FpuMtx44Copy(&m, pm1);
	v.x = m.w.x; v.y = m.w.y; v.z = m.w.z;
	m.w.x = m.w.y = m.w.z = 0.0f;

	FpuMtx44Transpose(pm0, &m);
	FpuMtx44TransformXYZ(&v, pm0, &v);
	pm0->w.x = -v.x;
	pm0->w.y = -v.y;
	pm0->w.z = -v.z;
	return pm0;
}

/**
 * @brief	tsԂ
 * @param [out]	pm0				= o̓}gbNX
 * @param [out]	pDeterminant	= 
 * @param [in]	pm1				= ̓}gbNX
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44Inverse2(IrisFMtx44* pm0, f32* pDeterminant, const IrisFMtx44* pm1)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pm1 );
	f32 d = FpuMtx44Determinant(pm1);
	if( pDeterminant != nullptr ) *pDeterminant = d;
	if( d == 0.0f ) return nullptr;
	FpuMtx44Adjoint(pm0, pm1);
	FpuMtx44Scale(pm0, pm0, F32_Div(1.0f, d) );
	return pm0;
}

/**
 * @brief	ƕʂ蓊e}gNX߂
 * @param [out]	pm0				= o̓}gbNX
 * @param [in]	pv0				= xNg
 * @param [in]	pl0				= 
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44DropShadow(IrisFMtx44* pm0, const IrisFVec4* pv0, const IrisFPlane* pl0)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pv0 );
	MATH_FPU_NULLASSERT( pl0 );
	IrisUnion128 u;
	FpuPlaneNormalize(&u.fp, pl0);
	f32 d = FpuVec4InnerProductXYZ(&u.v.fv, pv0);
	FpuVec4Scale(&pm0->x, pv0, -u.fp.a);
	FpuVec4Scale(&pm0->y, pv0, -u.fp.b);
	FpuVec4Scale(&pm0->z, pv0, -u.fp.c);
	FpuVec4Scale(&pm0->w, pv0, -u.fp.d);
	pm0->x.x += d;
	pm0->y.y += d;
	pm0->z.z += d;
	pm0->w.w += d;
	return pm0;
}

/**
 * @brief	}gNX̐K
 * @param [out]	pm0				= o̓}gbNX
 * @param [in]	pm1				= ̓}gbNX
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44NormalizeXYZ(IrisFMtx44* pm0, const IrisFMtx44* pm1)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pm1 );
	// XY̊OςZ𓾂
	FpuVec4OuterProductXYZ(&pm0->z, &pm1->x, &pm1->y);
	// Oς瓾ZY̊OςX𓾂
	FpuVec4OuterProductXYZ(&pm0->x, &pm1->y, &pm0->z);
	FpuVec4Normalize(&pm0->x, &pm0->x);
	FpuVec4Normalize(&pm0->y, &pm1->y);
	FpuVec4Normalize(&pm0->z, &pm0->z);
	pm0->w = pm1->w;
	return pm0;
}

/**
 * @brief	8rbg̐x؂̂
 * @param [out]	pm0	= o̓}gbNX
 * @param [in]	pm1	= ̓}gbNX
 * @return	o̓}gbNX
*/
IRIS_FPU_INLINE IrisFMtx44*	FpuMtx44TruncatePrecision24(IrisFMtx44* pm0, const IrisFMtx44* pm1)
{
	MATH_FPU_NULLASSERT( pm0 );
	MATH_FPU_NULLASSERT( pm1 );
	FpuVec4TruncatePrecision24(&pm0->x, &pm1->x);
	FpuVec4TruncatePrecision24(&pm0->y, &pm1->y);
	FpuVec4TruncatePrecision24(&pm0->z, &pm1->z);
	FpuVec4TruncatePrecision24(&pm0->w, &pm1->w);
	return pm0;
}

}	// end of namespace math
}	// end of namespace iris

#endif
