typedef unsigned long time;
typedef unsigned long long stamp64;
extern inline time stamp(void)
{
time tsc;
asm volatile("rdtsc" : "=a" (tsc) : : "edx");
return tsc;
}
extern inline time measure(time t)
{
time tsc;
asm volatile("rdtsc" : "=a" (tsc) : : "edx");
if (tsc>t)
return tsc-t;
else
return t-tsc;
}
int c1 = 12345678;
int d1;
static long long var1 __attribute__ ((aligned(32)));
static long long var2;
#if 1
static long long dummy1; /* These two make var3 and var4 be outside */
static long long dummy2; /* the cache line when reading var1 */
#endif
static long long var3;
static long long var4;
int x[64];
int main(void)
{
time s;
time si[10];
int i;
long double *p;
p = (void *)&x;
while((int)p & 7) p++;
p++;
*p=1234.567E123;
/*
printf("p=%08lx\n",p);
printf("var1: %p\nvar2: %p\nvar3: %p\nvar4: %p\n",
&var1, &var2, &var3, &var4);
*/
for(i=0; i < 10; i++) {
int a,b;
int c[10];
for(a=0;a<10;a++)
c[a]=-5;
s=stamp();
asm("
not %%esi
nop
nop
.align 16
nop
nop
nop
nop
nop # Comment these four nop's out,
nop # and suddenly the loop is
nop # > 100%% slower!
nop
movl $30000, %%ecx # Loop this many times
.align 16
lp: # Here is the loop (just some
movl var1,%%edx # nonsense code)
movl var1+4,%%eax
movl var2,%%ebx
andl %%edx,%%ebx
movl var2+4,%%esi
andl %%eax,%%esi
movl %%ebx,var3
movl %%esi,var3+4
movl %%edx,%%eax
addl $1,%%eax
movl %%eax,var4
movl %%ebx,%%edx
adcl $0,%%eax
movl %%eax,var4+4
decl %%ecx
jnz lp
nop
nop
not %%esi
" : : : "eax", "ebx", "edx", "ecx", "esi", "edi" );
si[i]=measure(s);
}
for(i=0; i<10; i++) {
printf("%5d cycles %5f ms\n",si[i], ((float)si[i] * (1000.0 / 166.0)) / 1000000.0);
}
return 0;
}