#ifndef _UTILITY_H_
#define _UTILITY_H_

//#include <iostream>
#include <vector>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include <ctype.h>

// CUDA runtime library
#include <cuda_runtime.h>

using namespace std;

#define CPU_MODE 0
#define GPU_MODE 1

#define EXACT_GUESS 0
#define MODEL_SELECTION 1

#define SCREENING_KSA 0
#define SCREENING_CHISQUARE 1

#define CHI_SQUARE_THRESHOLD_DF1 10.828f // TABLE VALUE OF CHI_SQUARE_DISTRIBUTION DF=1, value=0.001

#define DEFAULT_PREFILTERING_VALUE 99999.0f
#define DEL 0.000001f

// General Uility functions for BOOST
// type defintions for BOOST
typedef long long   int64;
typedef unsigned long long uint64;
#define FMT_INT64   "%lld"
#define FMT_UINT64   "%llu"
#define FMT_HEX64   "%llx"

#define MarginalDistrSNP_Y_DimensionX 2
#define MarginalDistrSNP_Y_DimensionY 3

// static variable for the precomput bin count of 64 bit string
static unsigned char wordbits[65536];// { bitcounts of ints between 0 and 65535 };

// compute number of 1s in 64 bit string
static int popcount( uint64 i )
{
	return( wordbits[i&0xFFFF] + wordbits[(i>>16)&0xFFFF] + wordbits[(i>>32)&0xFFFF] + wordbits[i>>48]);
}

// compute the number of 1s in 64 bit string (alternative implementation)
int bitCount(uint64 i);

// compute the absolute value of double
double Abs(double a);

// convert string to upper case
void toUpperCaseString(char* inputString, int strLen);

// get the data size of an input list file of BOOST program
int GetDataSize(char *filename, int **DataSize);

// calculate the marginal entropy
void CalculateMarginalEntropy(uint64* genocase, uint64* genoctrl, int nsnp, int n, int nlongintcase, int nlongintctrl, double *MarginalEntropySNP, double *MarginalEntropySNP_Y);

// calculate the marginal distribution
void CalculateMarginalDistr(uint64* genocase, uint64* genoctrl, int nsnp, int n, int nlongintcase, int nlongintctrl, int* pMarginalDistrSNP, int* pMarginalDistrSNP_Y);

// calculate the genome joint distribution
void CalculateGenoJointDistr(uint64* genocase, uint64* genoctrl, int nsnp, int nLongIntcase, int nLongIntctrl, int *GenoDistr, int j1, int j2, int* pMarginalDistrSNP_Y);

// calculate the chi-square value of a model
float CalculateChiSquareOfModel(int* input[4]);

// CUDA function headers
#include <list>

#define THREAD_NUM 256
#define BLOCK_NUM 10000

// for detecting available GPU with CUDA
int meetCUDARequirement();

// initialize CUDA driver
int initCUDA();

class DeviceProperties {
public:
	DeviceProperties();
	~DeviceProperties();
	int getDeviceCount();
	cudaDeviceProp getDeviceProp(int i);
	void printDevProp(int i);
private:
	cudaDeviceProp* devPropArray;
	int devCount;
};

extern "C" void cuda_SetWordBits(const unsigned char* wordBits, int count);
extern "C" 
	void cuda_GetInteractionPairs(std::vector<int> indexVector, 
		unsigned long long* genocase, unsigned long long* genoctrl, 
			int p, int n, int nLongIntcase, int nLongIntctrl, int ncase, int nctrl, float thresholdRecord,
			int* pMarginalDistrSNP, int* pMarginalDistrSNP_Y,  
			const unsigned char* wordBits, int wordBitCount, 
			std::list<int> &offsetListJ1, std::list<int> &offsetListJ2, int screenMode);
extern "C" void cuda_GetInteractionPairsDirect(unsigned long long* genocase, unsigned long long* genoctrl, int p, int nLongIntcase, int nLongIntctrl, int* pMarginalDistrSNP, int* pMarginalDistrSNP_Y, int n, float thresholdRecord, int ncase, int nctrl, int* interactionInputOffsetJ1, int* interactionInputOffsetJ2, int* interactionPairOffsetJ1, int* interactionPairOffsetJ2, float* interactionMeasureScore, const unsigned char* wordBits, int wordBitCount, std::list<int> &offsetListJ1, std::list<int> &offsetListJ2);

// C++ calling function for GBOOST
int BOOST_CPU(char* inputFilename, char* outputFilePrefix, float preFilteringThreshold, int screenMode, float screenThreshold, int testMode, float testThreshold);
int BOOST_GPU(char* inputFilename, char* outputFilePrefix, float preFilteringThreshold, int screenMode, float screenThreshold, int testMode, float testThreshold);

// use to flush stdout regularly
#define FLUSH_STDOUT() fflush(stdout)

#endif
