0 Replies Latest reply on Oct 4, 2013 6:51 AM by aditya.patel

    Getting Unusual behavior in dgemm for ACML 5.3.1

    aditya.patel

      Hi All,

       

      I am currently trying to optimize a convolution by a simple Matrix-Matrix Multiplication using ACML 5.3.1, GCC Compiler 4.7 and running it on AMD 6376 Processor. When I am executing the following program (see below), I am getting unusally high cycles: Cycles consumed = ~ 89111 .

       

       

      #include <stdio.h>

      #include <stdlib.h>

      #include <time.h>

      #include <stdint.h> /* for uint64_t */ //timestamping

      #include <stdio.h>

      //#include <math.h>

      //#include "amdlibm.h"

      #include <acml.h>

      //#define REPLACE_WITH_AMDLIBM

      #define MATRIX_IDX(n, i, j) j*n + i

      #define MATRIX_ELEMENT(A, m, n, i, j) A[ MATRIX_IDX(m, i, j) ]

       

       

       

       

      uint64_t time1, time2;

      uint64_t time_diff_1 =0;

      /* For calculating the time */

      static __inline__ uint64_t getticks(void)

      {

           unsigned int a, d;

           /*asm("cpuid");*/

           __asm__ volatile("rdtsc" : "=a" (a), "=d" (d));

           return ((a) | (((uint64_t)d) << 32));

      }

       

      void init_matrix(double* A, int m, int n)

      {

      int i,j;

         double element = 1.0;

         for (j = 0; j < n; j++)

         {

            for ( i = 0; i < m; i++)

            {

               MATRIX_ELEMENT(A, m, n, i, j) = element;

               element *= 0.9;

            }

         }

      }

       

       

      void print_matrix(const double* A, int m, int n)

      {

      int i,j;

         for ( i = 0; i < m; i++)

         {

            for ( j = 0; j < n; j++)

            {

                printf("%8.4f", MATRIX_ELEMENT(A, m, n, i, j));

            }

            printf("\n");

         }

      }

       

       

      int main(int argc, char** argv)

      {

      int m = 1;

      int n = 1;

      int k = 10;

       

       

      double A[m * k];

      double B[k * n];

      double C[m * n];

       

       

      init_matrix(A, m, k);

      init_matrix(B, k, n);

       

       

      time1 = getticks();

      dgemm('N', 'N', m, n, k, 1.0, A, m, B, k, 0.0, C, m);

      time2 = getticks(); 

      time_diff_1 += (uint64_t)((time2-time1));  

      printf("Cycles consumed = %ld\n",time_diff_1);

      //  printf("\nMatrix C (%d x %d) = AB is:\n", m, n);

      //  print_matrix(C, m, n);

       

       

         return 0;

      }

       

       

      Can someone please discuss what could be the possible reason.?

       

      Thanks!