#ifndef		__T_TRANS_ROTATE_H_INCLUDE_
#define		__T_TRANS_ROTATE_H_INCLUDE_

#include <math.h>
#include "../type/t_type_define.h"
#include "../contena/t_image_base.h"
#include "t_trans_template_algo.h"

namespace t_image_engine{

class t_trans_rotate{
public:
	// rotate 
	// srcpxdeg, Sʒucx, cyŉ]dstɏóBdst̃TCY͕ύXȂ
	inline static bool rotate_n(const t_image_interface& src, t_image_interface* dst, 
						float deg, int cx, int cy, stretch_type type = nearest_neighbor)
	{
		if(src.tag() != dst->tag())
			return false;
		switch(src.tag()){
		case rgb_24bit:		return t_trans_rotate_f<t_image_rgb>::func(src, dst, deg, cx, cy, type);
		case rgba_32bit:	return t_trans_rotate_f<t_image_rgba>::func(src, dst, deg, cx, cy, type);
		case gray_8bit:		return t_trans_rotate_f<t_image_gray>::func(src, dst, deg, cx, cy, type);
		}
		return false;
	}

protected:
	// resize template
	template <class _CLASS> class t_trans_rotate_f
	{
	public:
		// rotate
		inline static bool func(	const t_image_interface& src, t_image_interface* dst, 
							float deg, int sx, int sy, stretch_type type)
		{
			return _func(	reinterpret_cast<const _CLASS*>(&src), 
							reinterpret_cast<_CLASS*>(dst), deg, sx,sy, get_rotatefunc(type));
		}				

		// 摜^Cv
		typename typedef _CLASS::imagetype_ _TYPE;

		// rotate֐|C^
		typedef bool(*rotatefunc)(const _CLASS* src, _CLASS* dst,
			float deg, int cx, int cy, float sind, float cosd);

		// rotates
		inline static bool _func(const _CLASS* src, _CLASS* dst, float deg, int cx, int cy, rotatefunc rfunc)
		{
			if(!rfunc)
				return false;
			const float rad = deg * pi_f / 180.0f;
			const float sind = sinf(rad);
			const float cosd = cosf(rad);
			return (*rfunc)(src, dst, deg, cx, cy, sind, cosd);
		}

		// ]ASY̑I
		inline static rotatefunc get_rotatefunc( stretch_type type)
		{
			switch(type){
			case nearest_neighbor:	return f_nearest_neighbor;
			case bi_linear:			return f_bi_linear;
			case bi_cubic:			return f_bi_cubic;
			}
			return NULL;
		}

		// nearest_neighbor
		static bool f_nearest_neighbor(const _CLASS* src, _CLASS* dst, 
			float deg, int cx, int cy, float sind, float cosd)
		{	

#ifdef USING_SSE2 // 128bitꊇ
			_TYPE* dpt = dst->pointer();
			const _TYPE* spt = src->pointer_safe();
			const __m128 xaddm = _mm_set_ps1(cosd * 4);
			const __m128 yaddm = _mm_set_ps1(sind * 4);
			const __m128 xdefm = _mm_set_ps(cosd*3,cosd*2,cosd,0);
			const __m128 ydefm = _mm_set_ps(sind*3,sind*2,sind,0);
			const int mod = dst->width() & 0x3;
			const int cnt = dst->width() / 4;
			for(int y = 0; y < dst->height(); y++){
				float fx = -cx * cosd - (y - cy) * sind + cx;
				float fy = -cx * sind + (y - cy) * cosd + cy;
				__m128 fxm = _mm_add_ps( _mm_set_ps1(fx), xdefm);
				__m128 fym = _mm_add_ps( _mm_set_ps1(fy), ydefm);
				for(int x = 0; x < cnt+1; x++){
					int loop = (x==cnt)?mod:4;
					fxm = _mm_add_ps(fxm, xaddm);
					fym = _mm_add_ps(fym, yaddm);
					__m128i xposmi = _mm_cvttps_epi32(fxm);
					__m128i yposmi = _mm_cvttps_epi32(fym);
					for(int i = 0; i < loop; i++){
						if(!(	xposmi.m128i_i32[i] < 0 || xposmi.m128i_i32[i] >= dst->width() ||
								yposmi.m128i_i32[i] < 0 || yposmi.m128i_i32[i] >= dst->height())){
							*dpt = *(spt + yposmi.m128i_i32[i] * src->width() + xposmi.m128i_i32[i]);
						}
						dpt++;
					}
				}
			}
#else
			_TYPE* dpt = dst->pointer();
			const _TYPE* spt = src->pointer_safe();
			for(int y = 0; y < dst->height(); y++){
				float fx = -cx * cosd - (y - cy) * sind + cx;
				float fy = -cx * sind + (y - cy) * cosd + cy;
				for(int x = 0; x < dst->width(); x++, fx += cosd, fy += sind){
					const int xpos = static_cast<int>(fx); 
					const int ypos = static_cast<int>(fy); 
					if(!(xpos < 0 || xpos >= dst->width() || ypos < 0 || ypos >= dst->height())){
						*dpt = *(spt + ypos * src->width() + xpos); 
					}
					dpt++;
				}
			}
#endif
			return true;
		}

		// bi_linear
		static bool f_bi_linear(const _CLASS* src, _CLASS* dst, 
			float deg, int cx, int cy, float sind, float cosd)
		{
			_TYPE* dpt = dst->pointer();
			const _TYPE* spt = src->pointer_safe();
			const int w = src->width();
			for(int y = 0; y < dst->height(); y++){
				float fx = -cx * cosd - (y - cy) * sind + cx;
				float fy = -cx * sind + (y - cy) * cosd + cy;
				for(int x = 0; x < dst->width(); x++, fx += sind, fy += cosd){
					const int xpos = static_cast<int>(fx);;
					const int ypos = static_cast<int>(fy);;
					if(!(xpos < 0 || xpos > dst->width() || ypos < 0 || ypos > dst->height())){
						const _TYPE* sptb = spt + ypos * w + xpos;
						t_trans_stretch<_TYPE>::bi_linear_calc(
							dpt, sptb, sptb+w, sptb+1, sptb+w+1, xpos, ypos, fx, fx);
					}
					dpt++;
				}
			}
			return true;
		}

		// bi_cubic
		static bool f_bi_cubic(const _CLASS* src, _CLASS* dst, 
			float deg, int cx, int cy, float sind, float cosd)
		{
			typedef t_trans_stretch<_TYPE> algo;
			_TYPE* dpt = dst->pointer();
			const _TYPE* spt = src->pointer_safe();
			const int w = src->width();
			const int h = src->height();
			float wtable[16];
			const _TYPE* ptable[16];
			for(int y = 0; y < dst->height(); y++){
				float fx = -cx * cosd - (y - cy) * sind + cx;
				float fy = -cx * sind + (y - cy) * cosd + cy;
				for(int x = 0; x < dst->width(); x++, fx += sind, fy += cosd){
					const int xpos = static_cast<int>(fx);;
					const int ypos = static_cast<int>(fy);;
					if(!(xpos < 0 || xpos > dst->width() || ypos < 0 || ypos > dst->height())){
						const _TYPE* sptb = spt + ypos * w + xpos;
						for(int yy = -1; yy < 3; yy++){
							int p = (yy+1)*4+1;
							for(int xx = -1; xx < 3; xx++){
								wtable[p+xx] =	algo::bi_cubic_coef(fabs(fx-static_cast<float>(xpos+xx)))*
												algo::bi_cubic_coef(fabs(fy-static_cast<float>(ypos+yy)));
								ptable[p+xx] = (xpos+xx<0 || xpos+xx>w || ypos+yy<0 || ypos+yy>h)?
												sptb : sptb+w*yy+xx;
							}
						}
						algo::bi_cubic_calc(dpt, ptable, wtable);
					}
					dpt++;
				}
	
			}
			return false;
		}
	};
};

}


#endif