/* -*- Mode: c -*-
*
*  Copyright 1993 Massachusetts Institute of Technology
*
*  Permission to use, copy, modify, distribute, and sell this software and its
*  documentation for any purpose is hereby granted without fee, provided that
*  the above copyright notice appear in all copies and that both that
*  copyright notice and this permission notice appear in supporting
*  documentation, and that the name of M.I.T. not be used in advertising or
*  publicity pertaining to distribution of the software without specific,
*  written prior permission.  M.I.T. makes no representations about the
*  suitability of this software for any purpose.  It is provided "as is"
*  without express or implied warranty.
*
*/

/* Written by Jeremy Lin, [email protected] */

#include <stdlib.h>
#include <stdio.h>
#include "measure.h"

#define mmxtest(data1, data2, result)  __asm__("movq %1,%%mm0\n\tpaddb %2, %%mm0\n\tmovq %%mm0,%0": "=m" (result) : "m" (data1), "m" (data2))

#define NUMQWORDS 100000
#define NUMBYTES NUMQWORDS * 4
#define TESTSIZE (NUMQWORDS * 8) + 8

void main(void)
{
 int i, j;
 unsigned char *scratch1, *scratch2, *scratch3, *data1, *data2, *result;
 MSR start, end;

/* Make sure you have quad-word aligned memory */
 scratch1 = (unsigned char *)malloc(16);
 scratch2 = (unsigned char *)malloc(16);
 scratch3 = (unsigned char *)malloc(16);

 data1=(unsigned char *)((unsigned long)(scratch1+8) & (unsigned long)(~7));
 data2=(unsigned char *)((unsigned long)(scratch2+8) & (unsigned long)(~7));
 result=(unsigned char *)((unsigned long)(scratch3+8) & (unsigned long)(~7));


/* Create Test Data */

/*

 for(i = 0; i < NUMBYTES; i++) {
   data1[i] = i;
   data2[i] = i * 5;
 }

*/

#if 0

/* Add the 8-bit quantities using MMX */

 getTSC(start);
 /*
 for(j = 0; j < 1000; j++)
   for(i = 0; i < NUMBYTES; i += 8) {
     mmxtest(data1[i], data2[i], result[i]);
   }
 */
 getTSC(end);

/* Print data and Number of cycles used */

 printf("MMX: %d\n", end - start);
 for(i = 390000; i < 390012; i++)
//    printf("%x %x %x\n", data1[i], data2[i], result[i]);
 __asm__("emms" : : );

#endif

/* Add the 8-bit quantities 'normally" */

/*  getTSC(start); */
 /*
 for(j = 0; j < 1000; j++)
   for(i = 0; i < NUMBYTES; i++)
     result[i] = data1[i] + data2[i];
 */
/*  getTSC(end); */

/* Print data and Number of cycles used */

 printf("Normal: %d\n", end.low - start.low);
 for(i = 390000; i < 390012; i++)
//    printf("%x %x %x\n", data1[i], data2[i], result[i]);
 free(scratch1);
 free(scratch2);
 free(scratch3);
}