#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sched.h>
#include <stdlib.h>
#include <strings.h>

#define ARRAY_SIZE 1024*2

//pin thread to core
void set_cpu(int cpu_no)
{
	cpu_set_t mask;
	CPU_ZERO(&mask);
	CPU_SET(cpu_no, &mask);
	sched_setaffinity(syscall(SYS_gettid), sizeof(cpu_set_t), &mask);
}

volatile unsigned int *A;
unsigned int time_l1,time_h1,time_l2,time_h2;
long long sum;
unsigned int temp;
long long elapsed;

void test_prefetch_queue()
{
	int i;
    	for(i = 0; i < ARRAY_SIZE; i=i+16){
      	  _mm_clevict((void *)&A[i], 1);
     	  _mm_clevict((void *)&A[i], 2);
    	}

	//read begin timestamp
	//__asm mov rbx,0
	__asm cpuid
	__asm rdtsc
	__asm mov esi,eax
	__asm mov edi,edx

	asm("movl %0, %%r8d" : : "m"(A[16*91]));
	asm("movl %0, %%r9d" : : "m"(A[16*111]));
	asm("movl %0, %%r10d" : : "m"(A[16*59]));
	asm("movl %0, %%r11d" : : "m"(A[16*120]));
	asm("movl %0, %%r12d" : : "m"(A[16*104]));
	asm("movl %0, %%r13d" : : "m"(A[16*16]));
	asm("movl %0, %%r14d" : : "m"(A[16*34]));
	asm("movl %0, %%r15d" : : "m"(A[16*100]));

	asm("movl %0, %%r8d" : : "m"(A[16*65]));
	asm("movl %0, %%r9d" : : "m"(A[16*93]));
	asm("movl %0, %%r10d" : : "m"(A[16*71]));
	asm("movl %0, %%r11d" : : "m"(A[16*32]));
	asm("movl %0, %%r12d" : : "m"(A[16*74]));
	asm("movl %0, %%r13d" : : "m"(A[16*30]));
	asm("movl %0, %%r14d" : : "m"(A[16*24]));
	asm("movl %0, %%r15d" : : "m"(A[16*96]));

	asm("movl %0, %%r8d" : : "m"(A[16*38]));
	asm("movl %0, %%r9d" : : "m"(A[16*41]));
	asm("movl %0, %%r10d" : : "m"(A[16*22]));
	asm("movl %0, %%r11d" : : "m"(A[16*55]));
	asm("movl %0, %%r12d" : : "m"(A[16*39]));
	asm("movl %0, %%r13d" : : "m"(A[16*5]));
	asm("movl %0, %%r14d" : : "m"(A[16*114]));
	asm("movl %0, %%r15d" : : "m"(A[16*82]));

	asm("movl %0, %%r8d" : : "m"(A[16*43]));
	asm("movl %0, %%r9d" : : "m"(A[16*127]));
	asm("movl %0, %%r10d" : : "m"(A[16*92]));
	asm("movl %0, %%r11d" : : "m"(A[16*86]));
	asm("movl %0, %%r12d" : : "m"(A[16*7]));
	asm("movl %0, %%r13d" : : "m"(A[16*107]));
	asm("movl %0, %%r14d" : : "m"(A[16*62]));
	asm("movl %0, %%r15d" : : "m"(A[16*77]));

	asm("movl %0, %%r8d" : : "m"(A[16*27]));
	asm("movl %0, %%r9d" : : "m"(A[16*10]));
	asm("movl %0, %%r10d" : : "m"(A[16*72]));
	asm("movl %0, %%r11d" : : "m"(A[16*11]));
	asm("movl %0, %%r12d" : : "m"(A[16*109]));
	asm("movl %0, %%r13d" : : "m"(A[16*4]));
	asm("movl %0, %%r14d" : : "m"(A[16*85]));
	asm("movl %0, %%r15d" : : "m"(A[16*70]));

	asm("movl %0, %%r8d" : : "m"(A[16*73]));
	asm("movl %0, %%r9d" : : "m"(A[16*48]));
	asm("movl %0, %%r10d" : : "m"(A[16*106]));
	asm("movl %0, %%r11d" : : "m"(A[16*40]));
	asm("movl %0, %%r12d" : : "m"(A[16*69]));
	asm("movl %0, %%r13d" : : "m"(A[16*23]));
	asm("movl %0, %%r14d" : : "m"(A[16*95]));
	asm("movl %0, %%r15d" : : "m"(A[16*13]));

	asm("movl %0, %%r8d" : : "m"(A[16*52]));
	asm("movl %0, %%r9d" : : "m"(A[16*42]));
	asm("movl %0, %%r10d" : : "m"(A[16*79]));
	asm("movl %0, %%r11d" : : "m"(A[16*1]));
	asm("movl %0, %%r12d" : : "m"(A[16*44]));
	asm("movl %0, %%r13d" : : "m"(A[16*118]));
	asm("movl %0, %%r14d" : : "m"(A[16*125]));
	asm("movl %0, %%r15d" : : "m"(A[16*0]));

	asm("movl %0, %%r8d" : : "m"(A[16*61]));
	asm("movl %0, %%r9d" : : "m"(A[16*50]));
	asm("movl %0, %%r10d" : : "m"(A[16*60]));
	asm("movl %0, %%r11d" : : "m"(A[16*64]));
	asm("movl %0, %%r12d" : : "m"(A[16*99]));
	asm("movl %0, %%r13d" : : "m"(A[16*33]));
	asm("movl %0, %%r14d" : : "m"(A[16*124]));
	asm("movl %0, %%r15d" : : "m"(A[16*25]));

	asm("movl %0, %%r8d" : : "m"(A[16*126]));
	asm("movl %0, %%r9d" : : "m"(A[16*31]));
	asm("movl %0, %%r10d" : : "m"(A[16*116]));
	asm("movl %0, %%r11d" : : "m"(A[16*51]));
	asm("movl %0, %%r12d" : : "m"(A[16*117]));
	asm("movl %0, %%r13d" : : "m"(A[16*81]));
	asm("movl %0, %%r14d" : : "m"(A[16*36]));
	asm("movl %0, %%r15d" : : "m"(A[16*21]));

	asm("movl %0, %%r8d" : : "m"(A[16*76]));
	asm("movl %0, %%r9d" : : "m"(A[16*63]));
	asm("movl %0, %%r10d" : : "m"(A[16*46]));
	asm("movl %0, %%r11d" : : "m"(A[16*45]));
	asm("movl %0, %%r12d" : : "m"(A[16*88]));
	asm("movl %0, %%r13d" : : "m"(A[16*9]));
	asm("movl %0, %%r14d" : : "m"(A[16*108]));
	asm("movl %0, %%r15d" : : "m"(A[16*105]));

	asm("movl %0, %%r8d" : : "m"(A[16*121]));
	asm("movl %0, %%r9d" : : "m"(A[16*87]));
	asm("movl %0, %%r10d" : : "m"(A[16*29]));
	asm("movl %0, %%r11d" : : "m"(A[16*80]));
	asm("movl %0, %%r12d" : : "m"(A[16*112]));
	asm("movl %0, %%r13d" : : "m"(A[16*19]));
	asm("movl %0, %%r14d" : : "m"(A[16*75]));
	asm("movl %0, %%r15d" : : "m"(A[16*56]));

	asm("movl %0, %%r8d" : : "m"(A[16*6]));
	asm("movl %0, %%r9d" : : "m"(A[16*98]));
	asm("movl %0, %%r10d" : : "m"(A[16*17]));
	asm("movl %0, %%r11d" : : "m"(A[16*83]));
	asm("movl %0, %%r12d" : : "m"(A[16*123]));
	asm("movl %0, %%r13d" : : "m"(A[16*54]));
	asm("movl %0, %%r14d" : : "m"(A[16*102]));
	asm("movl %0, %%r15d" : : "m"(A[16*8]));

	asm("movl %0, %%r8d" : : "m"(A[16*84]));
	asm("movl %0, %%r9d" : : "m"(A[16*89]));
	asm("movl %0, %%r10d" : : "m"(A[16*3]));
	asm("movl %0, %%r11d" : : "m"(A[16*47]));
	asm("movl %0, %%r12d" : : "m"(A[16*18]));
	asm("movl %0, %%r13d" : : "m"(A[16*78]));
	asm("movl %0, %%r14d" : : "m"(A[16*122]));
	asm("movl %0, %%r15d" : : "m"(A[16*37]));

	asm("movl %0, %%r8d" : : "m"(A[16*94]));
	asm("movl %0, %%r9d" : : "m"(A[16*101]));
	asm("movl %0, %%r10d" : : "m"(A[16*66]));
	asm("movl %0, %%r11d" : : "m"(A[16*53]));
	asm("movl %0, %%r12d" : : "m"(A[16*110]));
	asm("movl %0, %%r13d" : : "m"(A[16*35]));
	asm("movl %0, %%r14d" : : "m"(A[16*115]));
	asm("movl %0, %%r15d" : : "m"(A[16*28]));

	asm("movl %0, %%r8d" : : "m"(A[16*90]));
	asm("movl %0, %%r9d" : : "m"(A[16*58]));
	asm("movl %0, %%r10d" : : "m"(A[16*57]));
	asm("movl %0, %%r11d" : : "m"(A[16*12]));
	asm("movl %0, %%r12d" : : "m"(A[16*49]));
	asm("movl %0, %%r13d" : : "m"(A[16*113]));
	asm("movl %0, %%r14d" : : "m"(A[16*15]));
	asm("movl %0, %%r15d" : : "m"(A[16*2]));

	asm("movl %0, %%r8d" : : "m"(A[16*97]));
	asm("movl %0, %%r9d" : : "m"(A[16*68]));
	asm("movl %0, %%r10d" : : "m"(A[16*103]));
	asm("movl %0, %%r11d" : : "m"(A[16*26]));
	asm("movl %0, %%r12d" : : "m"(A[16*14]));
	asm("movl %0, %%r13d" : : "m"(A[16*119]));
	asm("movl %0, %%r14d" : : "m"(A[16*20]));
	asm("movl %0, %%r15d" : : "m"(A[16*67]));

	//read end timestamp	
	//__asm cpuid
	__asm rdtsc
	__asm mov time_l1,esi
	__asm mov time_h1,edi
	__asm mov time_l2,eax
	__asm mov time_h2,edx

	elapsed = ((unsigned long long)time_h2<< 32 | time_l2) - ((unsigned long long)time_h1<<32 | time_l1);
}

int main()
{
	set_cpu(1);

	A = (volatile unsigned int *)valloc(ARRAY_SIZE*sizeof(volatile unsigned int));
	bzero((void *)A, ARRAY_SIZE*sizeof(volatile unsigned int));
	//to avoid page fault.
	int i;
	for(i = 0; i < ARRAY_SIZE; i=i+1024)
	  A[i] = i;

	test_prefetch_queue();
	test_prefetch_queue();
	test_prefetch_queue();
	test_prefetch_queue();
	test_prefetch_queue();
	printf("%llu\t",elapsed);
	//printf("\n");
	
	return 0;
}	
