/*
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

/* blockscan is a bpm calculation program based on the autocorrelation method
** implementing algorithms by Werner Van Belle
 
 gcc -O3 -I/opt/local/include -L/opt/local/lib libav-blockscan.c -lavcodec -lavformat -o blockscan
 
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <fcntl.h>

#include <unistd.h>
#include <sys/stat.h>
#include <sys/mman.h>

#include <ffmpeg/avcodec.h>
#include <ffmpeg/avformat.h>

#define BUFFER_LEN 4096
#define PRECISION 65536

#if (defined (WIN32) || defined (_WIN32))
#define	snprintf	_snprintf
#endif


static void
print_version (void)
{	char buffer [256] ;

	// sf_command (NULL, SFC_GET_LIB_VERSION, buffer, sizeof (buffer)) ;
	printf ("\nVersion : %s\n\n", buffer) ;
} /* print_usage */


static void
print_usage (char *progname)
{	printf ("\nUsage : %s <lower_bpm> <upper_bpm> <file1> ... <fileN>\n", progname) ;
	printf ("\nPrints out information about one or more sound files.\n\n") ;
#if (defined (_WIN32) || defined (WIN32))
		printf ("This is a Unix style command line application which\n"
				"should be run in a MSDOS box or Command Shell window.\n\n") ;
		printf ("Sleeping for 5 seconds before exiting.\n\n") ;

		/* This is the officially blessed by microsoft way but I can't get
		** it to link.
		**     Sleep (15) ;
		** Instead, use this:
		*/
		_sleep (5 * 1000) ;
#endif
} /* print_usage */



inline
unsigned long long correlate2 (unsigned int *rd, unsigned int rdl, unsigned int delay) 
{

	unsigned int sample=0,samples;
	unsigned long long result=0;

	
	if (delay == rdl) {
		return 1<<30;
	}
	samples = rdl - delay;
	
	while (delay < rdl)
	{
		result += abs (rd[sample] - rd[delay]);
		delay++; sample++;
	}
	// Need to compensate for the number of samples.
	// fprintf(stderr,"%d - %d\n", rdl , delay);
	result = result * PRECISION / samples;
	
	return result ;
}

/* Returns the sum difference of the rd data set for this delay value 
**  Early exit if the result exceeds the min value
*/

inline
unsigned long long correlate (unsigned char *da, unsigned int dl,unsigned int delay, unsigned int min) 
{
	unsigned int sample = 0;
	unsigned int dld = dl - delay;
	unsigned long long result = 0;

	//while ( (sample < dld) && (result < min) )
	while (delay < dl)
	{
		/* vector engine psuedo code */

		/* load 16 bytes into vector1 from da[sample] */
		/* load 16 bytes into vector2 from da[delay] */
		/* vector1 = vector1 - vector2 */
		/* result += sumabs(vector1) */

		result += abs(da[sample] - da[delay]);
		delay ++; sample++;
		
		//printf("%u %llu\n",sample,result);
		
	}	
	// Need to compensate for the number of samples.
	result = result * PRECISION / dld;
	
	return result ;

}

/* reduce the sample data by blocksize times
** sums the samples into an unsigned int to reduce aliasing effects
*/

inline
void resample (unsigned int *rd, unsigned int *rdl, unsigned char *da, int dl, int blocksize) 
{

	unsigned int i=0;
	unsigned int k=0;
	
	*rdl = dl / blocksize +1;

	for (; i < dl ; i+=blocksize) 
	{
		rd[k] = 0;

		register int j = i;
		unsigned int iblk = i + blocksize ;
		
		for  (;j < iblk;j++)
		{
			rd[k] += da[j];
		
	//	printf("%u %u %u\n",j,rd[k],da[j]);
		}
		k++;
	}
//	printf("%u %u\n",k,*rdl);

}

// read the entire file into memory...

void * load_file (AVFormatContext *pfc, int s, unsigned int *total_data_size) {


	AVCodecContext  *pCodecCtx;
    AVCodec         *pCodec;
    AVPacket        packet;
    uint8_t         *buffer;
    static short *samples= NULL;
    uint8_t *ptr, samp;
	int data_size;
	int count;


	*total_data_size=0;

	samples = (short *) malloc(AVCODEC_MAX_AUDIO_FRAME_SIZE);
	data_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;

	//printf ("size of: %d\n", data_size);
	
	pCodecCtx=pfc->streams[s]->codec;

	pCodec=avcodec_find_decoder(pCodecCtx->codec_id);
    if(pCodec==NULL) {
		fprintf(stderr,"Codec not found.\n");
        return NULL; // Codec not found
	}

	// Open codec
    if(avcodec_open(pCodecCtx, pCodec)<0) {
		fprintf(stderr,"Could not open codec\n");
        return NULL; // Could not open codec
	}

	buffer = (uint8_t *) malloc(64);

	while(av_read_frame(pfc, &packet)>=0)
    {
	/*
		printf("total dur: %d ",pfc->streams[s]->duration);
		printf("size: %d ",packet.size);
		printf("dur: %d ",packet.duration);
	*/	
		
		avcodec_decode_audio2(pCodecCtx, samples, &data_size, packet.data, packet.size);
		
		//printf ("samples decoded: %d\n",data_size);
	
		//printf ("realloc %d\n",(*total_data_size) + data_size);
	
		buffer =(uint8_t *) realloc(buffer,(*total_data_size) + data_size/2);
		
		data_size /= 2;
		for (count=0;count<data_size;count+=2) 
		{

			// merge left and right channel into one.
			samp = (samples[count] + samples[count+1]) / 512 + 128;
		
			//	printf ("%d: (%hd + %hd) / 512 = %d\n", *total_data_size, samples[count], samples[count+1], samp);
			//  printf ("%d %d\n", *total_data_size, samples[count] + samples[count+1]));

			// This debug was to determine the file reading and conversion is correct.
			//	putchar(samp);
		
			buffer[(*total_data_size) + (count/2)] = samp;
		}

		*total_data_size += data_size/2;
		data_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
	}

	//printf ("total size: %d\n",*total_data_size);
	
	return buffer;

}

// open the audio file using an external library

void * open_file(char *fn, unsigned int *dl, unsigned int *sr) 
{
	unsigned char * da = NULL;
	AVFormatContext *pFormatCtx;
    int             audioStream,i;


	av_register_all();

	    if(av_open_input_file(&pFormatCtx, fn, NULL, 0, NULL)!=0) {
			printf("Error: opening file %d.\n",fn);
			return NULL; 
		}
		
		if(av_find_stream_info(pFormatCtx)<0) {
			printf("Error: couldn't find stream information in file %d.\n",fn);
			return NULL; // Couldn't find stream information
		}
		
		audioStream = -1;

		for(i=0; i<pFormatCtx->nb_streams; i++) {
			//fprintf (stderr,"stream: %d = %d (%s)\n",i,pFormatCtx->streams[i]->codec->codec_type ,pFormatCtx->streams[i]->codec->codec_name);
			if(pFormatCtx->streams[i]->codec->codec_type==CODEC_TYPE_AUDIO)
			{
				audioStream=i;
				*sr = pFormatCtx->streams[i]->codec->sample_rate;
				*dl = pFormatCtx->streams[i]->nb_frames;
			
				//printf ("Duration: %d.\n",pFormatCtx->streams[i]->duration);
				//printf ("Frames: %u.\n",*dl);
				fprintf (stderr,"SampleRate: %u.\n",*sr);

				/* 
				da = (unsigned char *)malloc(*dl);
				if (da) {
					load_file(infile,da);
				}
				*/
				// we can't seem to get duration until we read the first frame...
				fprintf (stderr,"Reading file.\n");
				da = load_file(pFormatCtx,audioStream,dl);
				return da;
			}
		}

		return NULL;

}



// 2 pass scan
unsigned int block_scan_t (unsigned int lower,unsigned int upper, unsigned char *da, unsigned int dl)
{
	unsigned int j,i,w;
	unsigned int min = 1<<30;
	unsigned int bpm,sbpm;
	unsigned long long temp,otemp=1<<30;;
	
	unsigned int blocksize = 16;
	unsigned int *resample_data;

	unsigned int resample_data_length;

	w = upper - lower;

	resample_data = (unsigned int *)malloc(dl*sizeof(unsigned int));
	resample (resample_data, &resample_data_length, da, dl, blocksize);


	for (j=0; j <w/blocksize; j++)
	{

		temp = correlate2(resample_data,resample_data_length,lower/blocksize+j);
		
//		printf ("%f %u\n",(44100.0*4.0*60.0)/ (lower+j*blocksize), temp);
		printf ("%u %llu\n", (lower+j*blocksize),temp);

		//	printf ("pass 3 temp: %u %u %u %u\n",(1<<i),j*(1<<i),match[j*(1<<i)],mean);
		if (temp < min) {
			min = temp;
			sbpm = (lower + j * blocksize);
		}
	}

	fprintf (stderr,"lowpass: %d\n",sbpm);

	min = 1<<30;
	for (j=sbpm-blocksize*4; j<sbpm+blocksize*4; j++) {

		temp = correlate(da,dl,j,1<<30);
	//	printf ("%u %llu\n", j,temp);
		if (temp < min) {
			min = temp;
			bpm = j;
		}
	}
	fprintf (stderr,"pass: %d\n",bpm);

	return bpm;

}

// for small files, single pass.  Actually for very very very small files.
// or very very small BPM scan ranges.
unsigned int slow_block_scan (unsigned int lower,unsigned int upper, unsigned char *da, unsigned int dl)
{

	unsigned int j;
	unsigned long long temp;
	unsigned long long min;
	unsigned int bpm;
	
	min = 1<<30;
	for (j=lower; j <upper; j++)
	{
		temp = correlate(da,dl,j,1<<30);
		// temp = correlate(da,dl,j,min);
		if (temp < min) {
			min = temp;
			bpm =j;
		}
		 printf ("%u %llu\n", j,temp);
	}
	
	return bpm;
	
}
// all times are in the sample domain.
unsigned int block_scan (unsigned int lower,unsigned int upper, unsigned char *da, unsigned int dl)
{

	unsigned int max_size = 6;
	unsigned int j,i,w;
	unsigned int *match;
	unsigned int temp,c,points,mean;
	unsigned int min;
	unsigned int bpm;
	double total;

	unsigned int *resample_data;
	unsigned int resample_data_length;


	//printf ("upper: %d lower: %d\n",upper,lower);
	if (upper > dl) 
		return 0;

	w = upper - lower;

	//printf ("W: %d  l: %d  u: %d \n",w,lower,upper);

	resample_data = (unsigned int *)malloc(dl*sizeof(unsigned int));
	match = (unsigned int*) malloc (sizeof(unsigned int) * w);
	for (c=0; c< w; c++) 
		match[c] = 1<<30;

	resample (resample_data, &resample_data_length, da, dl, 1<<max_size);
	total = 0; points = 0;

	fprintf (stderr,"(%u) ",(1<<max_size)); fflush(stderr);

	for (j=0; j < w / (1<<max_size); j++) 
	{
		temp = correlate2(resample_data,resample_data_length,lower/(1<<max_size)+j);
		total += temp; points ++;
		// printf ("%u %u\n",j*(1<<max_size),temp);
		match[j*(1<<max_size)] = temp;
		match[j * (1<<max_size) + (1<<max_size)/2] = temp;
		//printf (" %d < c < %d \n",j * (1<<max_size), (j+1) * (1<<max_size));
	}

	mean = total / points; total = 0 ; points = 0;
	for (i=max_size-1;i>0;i--)
	{
		resample(resample_data,&resample_data_length,da,dl,1<<i);

		fprintf (stderr,"(%u) ",(1<<i)); fflush(stderr);

		for (j=0; j<w/(1<<i); j++)
		{
		//	printf ("pass 2 j: %d match: %d (%d)\n",j,match[j*(1<<i)],mean);
			if (match[j*(1<<i)] < mean)
			{
				temp = correlate2(resample_data,resample_data_length,lower/(1<<i)+j);
			//	printf ("%u %u\n",j*(1<<i),temp);
				total += temp; points ++;
				match[j*(1<<i)] = temp;
				match[j * (1<<i) + (1<<i)/2] = temp;
			} else {
				match[j*(1<<i)] = 1<<30;
			}
		}
		mean = total / points; total = 0 ; points = 0;
		// printf ("pass 2  %d mean : %d\n",i,mean);
	}

	fprintf (stderr,"(1)"); fflush(stderr);
	
	min = 1<<30;
	for (j=0; j <w; j++)
	{
		//printf ("pass 3 j: %d match: %d (%d)\n",j,match[j],mean);
		if (match[j] < mean)
		{
			temp = correlate(da,dl,lower+j,min);
		//	printf ("pass 3 temp: %u %u %u %u\n",(1<<i),j*(1<<i),match[j*(1<<i)],mean);
			if (temp < min) {
				min = temp;
				bpm = (lower + j);
			}
		}
	}

	fprintf (stderr,"\r                                           \r");
	free (match);
	free(resample_data);
	return bpm;

}



int
main (int argc, char *argv[])
{	static	char	strbuffer [BUFFER_LEN] ;
	char 		*progname;
	int			k ;
	double glower,gupper;

	progname = strrchr (argv [0], '/') ;
	progname = progname ? progname + 1 : argv [0] ;

//	print_version () ;

	if (argc < 4)
	{	print_usage (progname) ;
		return  1 ;
	} ;

	glower = atof(argv[1]);
	gupper = atof(argv[2]);

	for (k = 3 ; k < argc ; k++)
	{
		unsigned char *data = 0;
		char * infilename;
		unsigned int data_length;
		unsigned int sample_rate;
		
		infilename = argv [k] ;

		if (strcmp (infilename, "--help") == 0 || strcmp (infilename, "-h") == 0)
		{	print_usage (progname) ;
			continue ;
		} ;


		data = open_file (infilename,&data_length,&sample_rate); 

		if (data) {

			unsigned int l,u,b;

			fprintf (stderr, "Frames: %d\n",data_length);
			fprintf (stderr, "Min BPM: %lf\n", sample_rate * 4.0 * 60.0 / (data_length / 2));
			l = 4 * 60 * sample_rate / glower;
			u = 4 * 60 * sample_rate / gupper;
			// need some conditional logic to pick the scan method
			
			b = block_scan_t(u,l,data,data_length);
			// b = block_scan(u,l,data,data_length);
			// b = slow_block_scan(u,l,data,data_length);
			
			if (b > 0) {
				double lower,upper,bpm;
				bpm = sample_rate * 4.0 * 60.0 / b;
				lower  = sample_rate * 4.0 * 60.0/((b-1)*1.0);
				upper  = sample_rate * 4.0 * 60.0/((b+1)*1.0);
				fprintf (stderr,"%s BPM: %lf +/-%lf (delay %u)\n",infilename,bpm,((lower-bpm)+(bpm-upper))/4,b);
				free (data);
			} else {
				printf ("%s too short to calculate bpm\n",infilename);
			}
		}

	} ;

	return 0 ;
} /* main */


