Issue parallelizing this c code with openmp

Collapse
X
 
  • Time
  • Show
Clear All
new posts
  • saliorkeo
    New Member
    • Mar 2012
    • 1

    Issue parallelizing this c code with openmp

    How can I parallelize this code with OpenMP?. The result I get is not correct.

    I try to use temporary variables p1aux, p2aux, and psumaux because the Reduction clause cannot be used with pointers or intrinsic functions. But as I said the result is incorrect.

    When I say "result is not correct", I would say: The calculations of array1 + array2 are stored in the matrix sumsse. The calculations are correct until the component sumsse[50] [50] [50], more or less, but the other components of the matrix are always 0.

    Do You have any idea what might happen?.

    If anyone can help me, thank you very much.

    Code:
    #include <stdio.h>
    #ifdef __SSE2__
    #include <emmintrin.h>
    #endif
    
    #include <omp.h>
    #include <stdlib.h>
    #include <math.h>
    
    double __attribute__((aligned(16))) array1[256][256][256],array2[256][256][256];
    double __attribute__((aligned(16))) sumsse[256][256][256];
    
    
    
    int main ()
    {
    
        int k,j,i,dim;
        dim=256;
    
    
        __m128d *p1= (__m128d*) &array1[0][0][0];
        __m128d *p2= (__m128d*) &array2[0][0][0];
        __m128d *psum= (__m128d*) &sumsse[0][0][0];
    
        __m128d *p1aux= (__m128d*) &array1[0][0][0];
        __m128d *p2aux= (__m128d*) &array2[0][0][0];
        __m128d *psumaux= (__m128d*) &sumsse[0][0][0];
    
        int nthreads =(8);
        omp_set_num_threads(nthreads);
    
    
        //initializa array2
    
        for(k = 0; k < dim; ++k){
            for(j = 0; j < dim; ++j){
                for(i = 0; i < dim; ++i){
                    array2[k][j][i] = 1.0;
                }
            }
        }
    
        //initialize array1
    
        for(k = 0; k < dim; ++k){
            for(j = 0; j < dim; ++j){
                for(i = 0; i < dim; ++i){
                    array1[k][j][i] = i + j + k;
                }
            }
        }
    
        //initialize array sumsse
    
        for(k = 0; k < dim; ++k){
            for(j = 0; j < dim; ++j){
                for(i = 0; i < dim; ++i){
                    sumsse[k][j][i] = 0.0;
                }
            }
        }
    
        //add array1 + array2 with sse 
    
    #pragma omp parallel firstprivate(p1,p2,p1aux,p2aux)
        {
    
        for(k = 0; k < dim; ++k){
    
            for(j = 0; j < dim; ++j){
    
    #pragma omp for private(i)
    
                    for(i = 0; i < dim; i += 2){
    
                        *psum = _mm_add_pd(*p1,*p2);
    
                        psum = psumaux + 1;
                        p1 = p1aux+1;
                        p2 = p2aux+1;
                        psumaux = psum;
                        p1aux = p1;
                        p2aux = p2;
                    }
                }
            }
    
        }// end parallel
    
    
        //show some datas
    
    
        printf("Elementosse=10 sumsse=%f",sumsse[10][10][10]);
        printf("\n");
        printf("Elementosse=50 sumsse=%f",sumsse[50][50][50]);   
        printf("Elementosse=100 sumsse=%f",sumsse[100][100][100]);
        printf("\n");
        printf("Elementosse=120 sumsse=%f",sumsse[120][120][120]);
        printf("\n");
        printf("Elementosse=200 sumsse=%f",sumsse[200][200][200]);
        printf("\n");
    
        return 0;
    }
Working...