#include <stdio.h>
#include <time.h>
typedef unsigned long long BITBOARD;
static BITBOARD bitboard __attribute__ ((aligned(32)));
static BITBOARD bitboard2 __attribute__ ((aligned(8)));
static BITBOARD Occupied __attribute__ ((aligned(8)));
static BITBOARD rank_mask __attribute__ ((aligned(8)));
static BITBOARD all_mask __attribute__ ((aligned(8)));
#define Old_And(a,b) ((a) & (b))
#define Old_Or(a,b) ((a) | (b))
#define Old_Xor(a,b) ((a) ^ (b))
#define Old_Compl(a) (~(a))
#define Old_Shiftl(a,b) ((a) << (b))
#define Old_Shiftr(a,b) ((a) >> (b))
#define And(a, b) \
({ \
static BITBOARD __tmp __attribute__ ((aligned(8))); \
__asm__ ( \
"movq %1, %%mm0\n\t" \
"pand %2, %%mm0\n\t" \
"movq %%mm0, %0" \
: "=m" (__tmp): "m" (a), "m" (b) : "memory", "%mm0" ); \
__tmp; \
})
#define Or(a, b) \
({ \
static BITBOARD __tmp __attribute__ ((aligned(8))); \
__asm__ ( \
"movq %1, %%mm0\n\t" \
"por %2, %%mm0\n\t" \
"movq %%mm0, %0" \
: "=m" (__tmp) : "m" (a), "m" (b) : "memory" ); \
__tmp; \
})
#define Xor(a, b) \
({ \
static BITBOARD __tmp __attribute__ ((aligned(8))); \
__asm__ ( \
"movq %1, %%mm0\n\t" \
"pxor %2, %%mm0\n\t" \
"movq %%mm0, %0" \
: "=m" (__tmp) : "m" (a), "m" (b) : "memory" ); \
__tmp; \
})
#define Compl(a) \
({ \
static BITBOARD __tmp __attribute__ ((aligned(8))); \
__asm__ ( \
"movq %1, %%mm0\n\t" \
"pandn all_mask, %%mm0\n\t" \
"movq %%mm0, %0" \
: "=m" (__tmp) : "m" (a) : "memory" ); \
__tmp; \
})
#define Shiftl(a, b) \
({ \
static BITBOARD __tmp __attribute__ ((aligned(8))); \
__asm__ ( \
"movq %1, %%mm0\n\t" \
"psllq %2, %%mm0\n\t" \
"movq %%mm0, %0" \
: "=m" (__tmp) : "m" (a), "im" (b) : "memory" ); \
__tmp; \
})
#define Shiftr(a, b) \
({ \
static BITBOARD __tmp __attribute__ ((aligned(8))); \
__asm__ ( \
"movq %1, %%mm0\n\t" \
"psrlq %2, %%mm0\n\t" \
"movq %%mm0, %0" \
: "=m" (__tmp) : "m" (a), "im" (b) : "memory" ); \
__tmp; \
})
#define Emms() __asm__("emms")
normal_run()
{
clock_t t1,t2;
unsigned int i;
t1 = clock();
while(t1 == clock());
t1 = clock();
for(i = 0; i < 30000000; i++) {
bitboard2 = Old_And(Old_And(bitboard, Old_Shiftl(Occupied,3)), Old_Compl(rank_mask));
bitboard ++;
rank_mask--;
}
t2 = clock();
printf("Normal: %d\n", t2 - t1);
}
mmx_run()
{
clock_t t1, t2;
unsigned int i;
t1 = clock();
while(t1 == clock());
t1 = clock();
for(i = 0; i < 30000000; i++) {
bitboard2 = And(And(bitboard, Shiftl(Occupied,3)), Compl(rank_mask));
bitboard++;
rank_mask--;
}
t2 = clock();
printf("MMX: %d\n", t2 - t1);
}
main()
{
int i;
printf("%p %p %p %p\n", &bitboard, &bitboard2, &Occupied, &rank_mask);
Occupied = 0x80FF442288445533LL;
bitboard = 0x8080808080804423LL;
bitboard2 = 0x0110101001434400LL;
all_mask = 0xffffffffffffffffLL;
for(i = 0; i < 8; i++) {
normal_run();
mmx_run();
}
}