#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <fcntl.h>
#include <sched.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <signal.h>
#include <linux/soundcard.h>
#ifdef BUILD_ALSA5
#define BUILD_ALSA
#include <sys/asoundlib.h>
#endif
#ifdef BUILD_ALSA9
#define BUILD_ALSA
#include <alsa/asoundlib.h>
#endif


#include "gfx.h"

#define DEBUG 0

#define MAX_TIME_SAMPLES 250000

static double cpu_load = 0.80;

#ifdef USE_PENTIUM_TIMER
static inline unsigned long long int rdtsc(void)
{
	unsigned long long int x;
	__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));     return x;
}
#endif

#ifdef USE_PENTIUM_TIMER
  #define mygettime() (rdtsc() / cpu_hz)
#endif

#ifdef USE_GENERIC_TIMER
static double mygettime(void)
{
	static struct timeval mytv;
	gettimeofday(&mytv,NULL);
	return(mytv.tv_sec+mytv.tv_usec/1000000.0);  
}
#endif

static int calibrate_loop(void);
static int init_alsa_audio(int freq, int frags, int frag_size);
static int init_oss_audio(int freq, int frags, int frag_size);
static int set_realtime_priority(void);
static void *my_exithandler(void);
static void mydelay(int loops);
static void run_background_sync(int sync_usecs);
static void tone_sample_buffer(void);
static void load_sample_buffer(void);

/* common */
static pid_t pid  = -1;

#define OUTPUT_ALSA	0
#define OUTPUT_OSS	1
#ifdef BUILD_ALSA
static int output_type = OUTPUT_ALSA;
#else
static int output_type = OUTPUT_OSS;
#endif
static char *dsp_device = NULL;

/* for oss */
static int audio_fd = -1;

/* for alsa */
#ifdef BUILD_ALSA
static snd_pcm_t *pcm_handle = NULL;
#endif

/* buffer info */
static int aubuf_size, aufrag_size;
static short *sample_buffer;
static int sample_len;
static short *input_buffer;

static double cpu_hz=0.0;
static double fragment_latency=0.0;
static double fragment_latency_minus_1ms,fragment_latency_plus_1ms;
static double fragment_latency_minus_2ms,fragment_latency_plus_2ms;

static double cpu_latency=0.0;
static double cpu_latency_minus_02ms,cpu_latency_plus_02ms;
static double cpu_latency_minus_01ms,cpu_latency_plus_01ms;

static double buffer_latency;

static float time_arr[MAX_TIME_SAMPLES+1];
static float latency_arr[MAX_TIME_SAMPLES+1];
static float latency2_arr[MAX_TIME_SAMPLES+1];

static int overruns=0;
static double max_timediff=0.0;
static double max_timediff2=0.0;

static  int num_time_samples=0;
static  int num_times_within_1ms=0;
static  int num_times_within_2ms=0;

static  int cpu_num_times_within_02ms=0;
static  int cpu_num_times_within_01ms=0;

static  double mytime_start;
static  double mytime_end;
static  double mytime_x;


static void usage(void)
{
	fprintf(stderr, "usage: latencytest [options] [sample-data [fragments [fragsize [sync-time]]]]\n");
	fprintf(stderr, "  sample-data = none  -- generate tone\n");
	fprintf(stderr, "  sample-data = quiet -- run quietly\n"); 
	fprintf(stderr, "\noptions:\n");
	fprintf(stderr, "  -t type     specify driver type (oss or alsa)\n");
	fprintf(stderr, "  -d device   specify device or pcm name\n");
	fprintf(stderr, "  -f size     set fragment size in bytes\n");
	fprintf(stderr, "  -n num      set number of fragments\n");
	fprintf(stderr, "  -q          run quietly\n");
	fprintf(stderr, "  -s size     set the maximum sample data size\n");
	fprintf(stderr, "  -p          run WITHOUT mlock and sched_fifo\n");
}


#define TONE_QUIET	0
#define TONE_GENERATED	1
#define TONE_SAMPLE	2

int main(int argc,char **argv)
{
	int frags;
	int frag_size;
	int frequency;

	int sync_usecs = 0;
	int max_sample_bufsize = 1000000;

	double mytimediff;
	double mytimediff2;
	double mytime1;
	double mytime2;
	double mytime3;

	int loops_per_run;
	double loops_per_sec;
	int loops_per_grain;
	double grain_len;

	int res, err;
	int tone_type = TONE_GENERATED;
	int got_overrun = 0;
	int do_sched_fifo = 1;

	int infd;
	int c;

	frequency = 44100;
	frags = 3;
	frag_size = 2 << 12;

	signal(SIGTERM, my_exithandler);
	signal(SIGINT, my_exithandler);

	while ((c = getopt(argc, argv, "d:r:f:n:s:t:qp")) != -1) {
		switch (c) {
		case 'd':
			dsp_device = optarg;
			break;
		case 'r':
			frequency = atoi(optarg);
			break;
		case 'f':
			frag_size = atoi(optarg);
			break;
		case 'n':
			frags = atoi(optarg);
			break;
		case 's':
			max_sample_bufsize = atoi(optarg);
			break;
		case 'q':
			tone_type = TONE_QUIET;
			break;
		case 't':
			if (*optarg == 'o' || *optarg == 'O')
				output_type = OUTPUT_OSS;
			else
				output_type = OUTPUT_ALSA;
			break;
		case 'p':
			do_sched_fifo = 0;
			break;
		default:
			usage();
			exit(1);
		}
	}

	if (output_type == OUTPUT_OSS)
		printf("output type = OSS\n");
	else
		printf("output type = ALSA\n");

	if (argc > optind + 1)
		frags = atoi(argv[optind + 1]);
	if (frags <2 || frags >16) {
		fprintf(stderr,"num fragments out of range: valid values 2-16\n");
		exit(1);
	}

	if (argc > optind + 2)
		frag_size = atoi(argv[optind + 2]);
	if (frag_size < 128 || frag_size >32768) {
		fprintf(stderr,"fragment size out of range: valid values between and 32768\n");
		exit(1);
	}

	if (argc > optind + 3)
		sync_usecs = atoi(argv[optind + 3]) * 1000;

#if 1
	if(DEBUG)  fprintf(stderr,"calling mlockall() to prevent pagefaults ....\n");
	if (do_sched_fifo) {
		if(mlockall(MCL_CURRENT|MCL_FUTURE)) {
			perror("mlockall() failed, exiting. mlock");
			exit(1);
		}

		res = set_realtime_priority();
		if(res == -1) {
			fprintf(stderr,"can't get realtime priority, run the program as root.\n");
			exit(1);
		}
		if(DEBUG) fprintf(stderr,"got realtime scheduling\n");
	}
#endif

	if (output_type == OUTPUT_OSS)
		res = init_oss_audio(frequency, frags, frag_size);
	else
		res = init_alsa_audio(frequency, frags, frag_size);
	if (res < 0)
		exit(1);
	fprintf(stderr, "audio setup: fragment size=%d  total buffer size=%d\n",
		aufrag_size, aubuf_size);

	loops_per_sec = calibrate_loop();
	grain_len = aufrag_size / ((double)frequency * 4);
	loops_per_grain = loops_per_sec * grain_len;
	loops_per_run = loops_per_grain * cpu_load;
	if(DEBUG) printf("cpu_load=%f  loops per run = %d\n",cpu_load, loops_per_run);

	sample_buffer = malloc(aubuf_size);
	if (! sample_buffer) {
		perror("malloc buffer");
		exit(1);
	}
	memset(sample_buffer, 0, aubuf_size);

	if (argc > optind) {
		if (! strcmp(argv[optind], "none"))
			tone_type = TONE_GENERATED;
		else if (! strcmp(argv[optind], "quiet"))
			tone_type = TONE_QUIET;
		else {
			tone_type = TONE_SAMPLE;
			input_buffer = malloc(max_sample_bufsize);
			if (! input_buffer) {
				perror("malloc sample buffer");
				exit(1);
			}
			infd = open(argv[optind], O_RDONLY);
			if (infd<0) {
				perror("error in opening file. open");
				exit(0);
			}
			sample_len = read(infd, input_buffer, max_sample_bufsize);
			close(infd);
		}
	}

	/* calculate parameters */
	fragment_latency = aufrag_size / ((double)frequency * 4);
	fragment_latency_minus_1ms = fragment_latency - 0.001;
	fragment_latency_plus_1ms = fragment_latency + 0.001;

	fragment_latency_minus_2ms = fragment_latency - 0.002;
	fragment_latency_plus_2ms = fragment_latency + 0.002;

	cpu_latency = fragment_latency * cpu_load;
	cpu_latency_minus_02ms = cpu_latency - 0.0002;
	cpu_latency_plus_02ms = cpu_latency + 0.0002;

	cpu_latency_minus_01ms = cpu_latency - 0.0001;
	cpu_latency_plus_01ms = cpu_latency + 0.0001;

	buffer_latency = aubuf_size / ((double)frequency * 4);

	printf("fragment latency = %f ms\n",fragment_latency*1000.0);
	printf("cpu latency = %f ms\n",cpu_latency*1000.0);

	if(DEBUG) printf("buffer latency=%f\n",buffer_latency);

	if (sync_usecs > 0)
		run_background_sync(sync_usecs);

	mytime_start = mygettime();

	for(;;) {
		if (tone_type == TONE_GENERATED)
			tone_sample_buffer();
		else if (tone_type == TONE_SAMPLE)
			load_sample_buffer();

		mytime1 = mygettime();
		mydelay(loops_per_run);
		mytime2 = mygettime();
		if (output_type == OUTPUT_OSS) {
			err = write(audio_fd, sample_buffer, aufrag_size);
			if (err <= 0) {
				fprintf(stderr, "write error\n");
			}
		} else {
#ifdef BUILD_ALSA9
			err = snd_pcm_writei(pcm_handle, sample_buffer, aufrag_size >> 2);
			if (err <= 0) {
				if (err == -EPIPE) {
					snd_pcm_prepare(pcm_handle);
					fprintf(stderr, "overrun!\n");
					got_overrun = 1;
				} else {
					fprintf(stderr, "pcm write error %d\n", -err);
					exit(1);
				}
			}
#endif
#ifdef BUILD_ALSA5
			err = snd_pcm_write(pcm_handle, sample_buffer, aufrag_size);
			if (err <= 0) {
				if (err == -EPIPE) {
					snd_pcm_playback_prepare(pcm_handle);
					fprintf(stderr, "overrun!\n");
					got_overrun = 1;
				} else {
					fprintf(stderr, "pcm write error %d\n", -err);
					exit(1);
				}
			}
#endif
		}
		mytime3 = mygettime();

		mytimediff = mytime3 - mytime1;
		mytimediff2 = mytime2 - mytime1;

		mytime_x=mytime1-mytime_start;
		time_arr[num_time_samples]=mytime_x;
		latency_arr[num_time_samples]=mytimediff;
		latency2_arr[num_time_samples]=mytimediff2;

		num_time_samples++;
		if(num_time_samples >= MAX_TIME_SAMPLES) my_exithandler();

		if(mytimediff >= (fragment_latency_minus_1ms) && mytimediff <= (fragment_latency_plus_1ms)) num_times_within_1ms++;

		if(mytimediff >= (fragment_latency_minus_2ms) && mytimediff <= (fragment_latency_plus_2ms)) num_times_within_2ms++;

		if(mytimediff2 >= (cpu_latency_minus_02ms) && mytimediff2 <= (cpu_latency_plus_02ms)) cpu_num_times_within_02ms++;

		if(mytimediff2 >= (cpu_latency_minus_01ms) && mytimediff2 <= (cpu_latency_plus_01ms)) cpu_num_times_within_01ms++;

		if (mytimediff2 > max_timediff2) max_timediff2=mytimediff2;

		if (mytimediff > max_timediff) max_timediff=mytimediff;

		if (mytimediff >= buffer_latency || got_overrun) {
			overruns++;
			got_overrun = 0;

			if(DEBUG) {
				printf("OVERRUN nr=%d :",overruns);
				printf("time diff=%f buffer_latency=%f factor=%.1f %%\n",mytimediff,buffer_latency,(mytimediff/buffer_latency)*100.0);
			}

		}
	}

	return(0);
}


static void tone_sample_buffer(void)
{
	int j;
	static int myvalue = 0;
	static int mystep = 80;
	static int mydirection = 1;

	for (j = 0; j < aufrag_size; j++) {
		sample_buffer[j] = myvalue;
		myvalue += mystep;
	}
	mystep += mydirection;
	if (mystep<80 || mystep>300) mydirection = -mydirection;
}


static void load_sample_buffer(void)
{
	static int sample_count;
	int len, count = aufrag_size;

	do {
		len = sample_len - sample_count;
		if (len > count)
			len = count;
		memcpy(sample_buffer, input_buffer + sample_count, len);
		count -= len;
		sample_count += len;
		sample_count %= sample_len;
	} while (count > 0);
}

static void run_background_sync(int sync_usecs)
{
	if(DEBUG)
		fprintf(stderr, "starting background sync() process with %d ms frequency ...\n", sync_usecs/1000);

	pid = fork();
	if (pid == 0) {
		if(DEBUG)    fprintf(stderr,"background sync() process started\n");
		nice(-20);
		for(;;) {
			usleep(sync_usecs);
			sync();
		}
	}
}


static int init_oss_audio(int frequency, int frags, int frag_size)
{
	int res;
	int apar = 0;
	struct audio_buf_info abinfo; 
	int myfragmentsize;

	myfragmentsize = (int)floor((log(frag_size)/log(2.0))+0.5);
	if (myfragmentsize < 4 || myfragmentsize > 15) {
		fprintf(stderr,"fragment size out of range (%d): valid values 256 , 512, 1024,  ... , 32768\n",myfragmentsize);
		exit(1);
	}

	if (dsp_device == NULL) {
		if ((dsp_device = getenv("DSP_DEVICE")) == NULL || ! *dsp_device)
			dsp_device = "/dev/dsp";
	}

	audio_fd=open(dsp_device,O_WRONLY,0);
	if(audio_fd<0) {
		perror("ERROR: open /dev/dsp");
		exit(0);
		return(-1);
	}

	res = ioctl(audio_fd, SNDCTL_DSP_RESET, &apar);

	apar = AFMT_S16_LE;
	res = ioctl(audio_fd, SNDCTL_DSP_SETFMT, &apar);
	if(res == -1) {
		perror("ERROR: ioctl: SNDCTL_DSP_SETFMT");
		return(-1);
	}
	if (apar != AFMT_S16_LE) {
		fprintf(stderr,"ERROR: 16 bit format not supported\n");
		return(-1);
	}

	apar = 1;
	res = ioctl(audio_fd, SNDCTL_DSP_STEREO, &apar);
	if (res == -1) {
		perror("ERROR: ioctl: SNDCTL_DSP_STEREO");
		return(-1);
	}
	if (apar != 1) {
		fprintf(stderr,"ERROR: stereo not supported\n");
		return(-1);
	}
	apar = frequency;
	res = ioctl(audio_fd, SNDCTL_DSP_SPEED, &apar);
	if (res == -1) {
		perror("ERROR: ioctl: SNDCTL_DSP_SPEED");
		return(-1);
	}
	if (DEBUG) printf("SAMPLINGRATE=%d\n", apar);
 
	apar = (frags << 16) | myfragmentsize;
	if(DEBUG) printf("fragment parameter = %x\n",apar);
	res = ioctl(audio_fd, SNDCTL_DSP_SETFRAGMENT, &apar);
	if(res == -1) {
		perror("ERROR: ioctl: SNDCTL_DSP_SETFRAGMENT");
		exit(1);
		return(-1);
	}

  
	res=ioctl(audio_fd,SNDCTL_DSP_GETOSPACE,&abinfo);
	if(res == -1) {
		perror("ERROR: ioctl: SNDCTL_DSP_GETOSPACE");
		return(-1);
	}

	aufrag_size = abinfo.fragsize;
	aubuf_size = abinfo.fragstotal * abinfo.fragsize;

	return(0);
}


static int set_realtime_priority(void)
{
	struct sched_param schp;
	/*
	 * set the process to realtime privs
	 */
        memset(&schp, 0, sizeof(schp));
	schp.sched_priority = sched_get_priority_max(SCHED_FIFO);

	if (sched_setscheduler(0, SCHED_FIFO, &schp) != 0) {
		perror("sched_setscheduler");
		return -1;
	}

	return 0;

}

#ifdef BUILD_ALSA9
static int init_alsa_audio(int frequency, int frags, int frag_size)
{
	int period_size;
	snd_pcm_hw_params_t *hw;

	if (dsp_device == NULL) {
		if ((dsp_device = getenv("DSP_DEVICE")) == NULL || ! *dsp_device)
			dsp_device = "hw:0,0";
	}

	// fprintf(stderr, "using device %s\n", dsp_device);
	if (snd_pcm_open(&pcm_handle, dsp_device, SND_PCM_STREAM_PLAYBACK, 0) < 0) {
		fprintf(stderr, "device is %s\n", dsp_device);
		perror("snd_pcm_open");
		return -1;
	}

	snd_pcm_hw_params_alloca(&hw);
	snd_pcm_hw_params_any(pcm_handle, hw);
	snd_pcm_hw_params_set_access(pcm_handle, hw, SND_PCM_ACCESS_RW_INTERLEAVED);
	snd_pcm_hw_params_set_format(pcm_handle, hw, SND_PCM_FORMAT_S16_LE);
	snd_pcm_hw_params_set_channels(pcm_handle, hw, 2);
	snd_pcm_hw_params_set_rate_near(pcm_handle, hw, frequency, 0);
	period_size = snd_pcm_hw_params_set_period_size_near(pcm_handle, hw, frag_size / 4, 0);
	aufrag_size = period_size * 4;
	aubuf_size = snd_pcm_hw_params_set_buffer_size_near(pcm_handle, hw, period_size * frags);
	aubuf_size *= 4;
	if (snd_pcm_hw_params(pcm_handle, hw) < 0) {
		perror("pcm hw params");
		return -1;
	}

	return 0;
}
#endif

#ifdef BUILD_ALSA5
static int init_alsa_audio(int frequency, int frags, int frag_size)
{
	int card, device = 0;
	snd_pcm_channel_params_t params;
	snd_pcm_channel_setup_t setup;

	if (dsp_device == NULL) {
		if ((dsp_device = getenv("DSP_DEVICE")) == NULL || ! *dsp_device)
			dsp_device = "hw:0,0";
	}

	if (sscanf(dsp_device, "hw:%d,%d", &card, &device) < 1) {
		fprintf(stderr, "invalid pcm device %s\n", dsp_device);
		return -1;
	}

	if (snd_pcm_open(&pcm_handle, card, device, SND_PCM_OPEN_PLAYBACK) < 0) {
		perror("snd_pcm_open");
		return -1;
	}

	memset(&params, 0, sizeof(params));
	params.channel = SND_PCM_CHANNEL_PLAYBACK;
	params.mode = SND_PCM_MODE_BLOCK;
	params.format.voices = 2;
	params.format.format = SND_PCM_SFMT_S16_LE;
	params.format.interleave = 1;
	params.format.rate = frequency;
	params.buf.block.frag_size = frag_size;
	params.buf.block.frags_min = 1;
	params.buf.block.frags_max = frags;
	if (snd_pcm_channel_params(pcm_handle, &params) < 0) {
		perror("snd_pcm_channel_params");
		return -1;
	}
	
	memset(&setup, 0, sizeof(setup));
	setup.channel = SND_PCM_CHANNEL_PLAYBACK;
	if (snd_pcm_channel_setup(pcm_handle, &setup) < 0) {
		perror("snd_pcm_channel_setup");
		return -1;
	}

	aufrag_size = setup.buf.block.frag_size;
	aubuf_size = frags * aufrag_size;

	return 0;
}
#endif

#ifndef BUILD_ALSA
static int init_alsa_audio(int frequency, int frags, int frag_size)
{
	fprintf(stderr, "Sorry, ALSA is not supported on this program.\n");
	return -1;
}
#endif

static void *my_exithandler(void)
{
	FILE *f;
	int overruns2=overruns;
	if (pid == 0) {
		exit(0); // child (sync() process) exits quietly.
	}

	mytime_end=mygettime();

	if (output_type == OUTPUT_OSS) {
		if (audio_fd >= 0)
			close(audio_fd);
	} else {
#ifdef BUILD_ALSA
		if (pcm_handle)
			snd_pcm_close(pcm_handle);
#endif
	}

	if (overruns2>999) overruns2=999;

	if(DEBUG)  printf("\nNUMBER of OVERRUNS = %d  , max latency=%.1f ms  factor=%.1f %% of buffer\n",overruns2,max_timediff*1000.0,(max_timediff/buffer_latency)*100.0);

	//printf("% 5.1fms (% 3d)%5.1f%%|",max_timediff*1000.0,overruns2,(max_timediff/buffer_latency)*100.0);
	printf("%5.1fms (%3d)|", max_timediff*1000.0, overruns2);

	printf("\n1MS num_time_samples=%d num_times_within_1ms=%d factor=%f\n",
	       num_time_samples, num_times_within_1ms,
	       num_times_within_1ms*100.0/num_time_samples);

	printf("2MS num_time_samples=%d num_times_within_2ms=%d factor=%f\n",
	       num_time_samples, num_times_within_2ms,
	       num_times_within_2ms*100.0/num_time_samples);
	draw_chart(time_arr, latency_arr, latency2_arr, num_time_samples,
		   buffer_latency, fragment_latency, cpu_latency,
		   overruns, max_timediff, max_timediff2,
		   num_times_within_1ms, num_times_within_2ms,
		   cpu_num_times_within_02ms, cpu_num_times_within_01ms);

	if ((f = fopen("output.dat", "w")) != NULL) {
		int i;
		for (i = 0; i < num_time_samples; i++)
			fprintf(f, "%f %f %f\n", time_arr[i], latency_arr[i], latency2_arr[i]);
		fclose(f);
	}

	if(DEBUG) fprintf(stderr,"\nexiting.\n");


	exit(0);
}




static int calibrate_loop(void)
{
	FILE *f;
	char *res;
	char s1[100];
	double tmp_loops_per_sec;
	double mytime1, mytime2;

	f = fopen("/proc/cpuinfo", "r");
	if (f==NULL) {
		perror("can't open /proc/cpuinfo, exiting. open");
		exit(1);
	}

	for(;;) {
		res=fgets(s1,100,f);
		if(res==NULL) break;
		if(!memcmp(s1,"cpu MHz",7)) {
			cpu_hz=atof(&s1[10])*1000000.0;
			break;
		}
	}
	fclose(f);
	if(cpu_hz < 1.0) {
		fprintf(stderr,"can't determine CPU clock frequency, exiting.\n");
	}


	if(DEBUG) printf("calibrating loop ....\n");

#define CALIB_LOOPS 200000000

	mytime1=mygettime();
	mydelay(CALIB_LOOPS);
	mytime2=mygettime();


	if(DEBUG) printf("time diff= %f \n",mytime2-mytime1);
	
	if(DEBUG) printf("loops/sec = %f\n",CALIB_LOOPS/(mytime2-mytime1));
	tmp_loops_per_sec=CALIB_LOOPS/(mytime2-mytime1);

	return(tmp_loops_per_sec);
}

static void mydelay(int loops)
{
	int k=0;
	int u;
	for(u=0;u<loops;u++) k+=1;
}

