Wednesday, 16 March 2016

Implement a Parallel Quick Sort algorithm using NVIDIA GPU or equivalent ARM board.

Implement a Parallel Quick Sort algorithm using NVIDIA GPU or equivalent ARM board.

 

#include<stdio.h>


const int threshold=100;
const int m=10;
__global__ void selection_sort(int *data, int left, int right)
{
 int temp;
 //printf("in selection sort\n");
    for(int i=left; i < right;i++)
     for(int j=i+1;j < =right;j++)
      if(data[i] > data[j])
      {
       temp=data[i];
       data[i]=data[j];
       data[j]=temp;
      }
}

__global__ void partition(int *a,int left,int right,int pivot,int *al,int *ah)
{


 int l,h;
 int diff=(right-left+1)/m;
 int k1=threadIdx.x*diff+left;
 int k2=k1+diff-1;
 if(threadIdx.x==m-1)
  k2=right;
// printf("in partition %d %d and k1= %d  k2= %d \n",left,right,k1,k2);
 l=h=k1;
 for(int i=k1; i < =k2;i++)
  {
   al[i]=ah[i]=-999;
  }
 for(int i=k1; i < =k2;i++)
 {
  if(a[i] < pivot)
  {
   al[l++]=a[i];
  }
  else
  {
   if(a[i] > pivot)
   {
    ah[h++]=a[i];
   }
  }
 }


}
//__global__
void quicksort(int *a, const int left, const int right)
{
 //printf("in quick sort %d %d \n",left,right);
    if (right-left <= threshold)
    {
     int *ad;
     cudaMalloc((void **)&ad,(right-left+1)*sizeof(int));
     cudaMemcpy(ad,a,(right-left+1)*sizeof(int),cudaMemcpyHostToDevice);
        selection_sort<<<1,1>>>(ad, left, right);
        cudaMemcpy(a,ad,(right-left+1)*sizeof(int),cudaMemcpyDeviceToHost);
        return;
    }
    int pivot=a[left];
    int *al,*ah;
    int *ad;
    cudaMalloc((void **)&ad,(right-left+1)*sizeof(int));
    cudaMalloc((void **)&al,(right-left+1)*sizeof(int));
    cudaMalloc((void **)&ah,(right-left+1)*sizeof(int));

    cudaMemcpy(ad,a,(right-left+1)*sizeof(int),cudaMemcpyHostToDevice);

    partition<<<1,m>>>(ad,left,right,pivot,al,ah);

    int al_h[right-left+1],ah_h[right-left+1];

        cudaMemcpy(al_h,al,(right-left+1)*sizeof(int),cudaMemcpyDeviceToHost);

        cudaMemcpy(ah_h,ah,(right-left+1)*sizeof(int),cudaMemcpyDeviceToHost);

    int i=0,k=0;

    while(i < right-left+1)
    {
     while(al_h[i]==-999 && i < right-left+1)
       i++;
     while(al_h[i]!=-999 && i < right-left+1)
     {
      al_h[k++]=al_h[i++];
     }
    }


    //quicksort<<<1,1>>>(al,0,k-1);
    quicksort(al_h,0,k-1);
    int p=left;
    int x=0;
   // printf("array al");
        while(x < k)
        {
         a[p++]=al_h[x++];
         //printf("%d  ",a[p]);

        }
        a[p]=pivot;
    i=0;
    k=0;
    while(i < right-left+1)
    {
       while(ah_h[i]==-999 && i < right-left+1)
      i++;
        while(ah_h[i]!=-999 && i < right-left+1)
        {
         ah_h[k++]=ah_h[i++];
        }
    }
    //quicksort<<<1,1>>>(ah,0,k-1);
    quicksort(ah_h,0,k-1);
    i=0;
    p++;
        while(i < k)
        {
         a[p++]=ah_h[i++];

        }
if(p-1!=right)
{
 printf("\nsome thing wrong %d %d\n",p,right);
}
}

void qsort(int *a,int n)
{
 //quicksort<<< 1, 1 >>>(a, 0,n-1);
 //printf("in qsort\n");
 quicksort(a, 0,n-1);

}

int main()
{
 int n = 20;
 int a[20];
 time_t t;
    srand((unsigned)time(&t));


    int x,flag;
    for (unsigned i = 0 ; i < n ; i++)
    {
     x=rand()%n;
     flag=0;
     for(int j=0;j < i;j++)
     {
      if(a[j]==x)
      {
       i--;
       flag=1;
       break;
      }

     }
     if(flag==0)
      a[i]=x;
    }
    printf("\n\n original array\n");
    for(int i=0; i < n;i++)
     printf("%d\t ",a[i]);
    quicksort(a,0,n-1);
    printf("\n\n after sorting\n");
    for(int i=0; i < n;i++)
         printf("%d\t ",a[i]);


}
/*
 * output:
 * original array
7  6  4  16  8  14  10  17  2  9  1  15  19  18  11  0  5  3  12  13

 after sorting
0  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19
 */

No comments:

Post a Comment