Wednesday, 16 March 2016

Implement a Parallel Quick Sort algorithm using NVIDIA GPU or equivalent ARM board.

Implement a Parallel Quick Sort algorithm using NVIDIA GPU or equivalent ARM board.

 

#include<stdio.h>


const int threshold=100;
const int m=10;
__global__ void selection_sort(int *data, int left, int right)
{
 int temp;
 //printf("in selection sort\n");
    for(int i=left; i < right;i++)
     for(int j=i+1;j < =right;j++)
      if(data[i] > data[j])
      {
       temp=data[i];
       data[i]=data[j];
       data[j]=temp;
      }
}

__global__ void partition(int *a,int left,int right,int pivot,int *al,int *ah)
{


 int l,h;
 int diff=(right-left+1)/m;
 int k1=threadIdx.x*diff+left;
 int k2=k1+diff-1;
 if(threadIdx.x==m-1)
  k2=right;
// printf("in partition %d %d and k1= %d  k2= %d \n",left,right,k1,k2);
 l=h=k1;
 for(int i=k1; i < =k2;i++)
  {
   al[i]=ah[i]=-999;
  }
 for(int i=k1; i < =k2;i++)
 {
  if(a[i] < pivot)
  {
   al[l++]=a[i];
  }
  else
  {
   if(a[i] > pivot)
   {
    ah[h++]=a[i];
   }
  }
 }


}
//__global__
void quicksort(int *a, const int left, const int right)
{
 //printf("in quick sort %d %d \n",left,right);
    if (right-left <= threshold)
    {
     int *ad;
     cudaMalloc((void **)&ad,(right-left+1)*sizeof(int));
     cudaMemcpy(ad,a,(right-left+1)*sizeof(int),cudaMemcpyHostToDevice);
        selection_sort<<<1,1>>>(ad, left, right);
        cudaMemcpy(a,ad,(right-left+1)*sizeof(int),cudaMemcpyDeviceToHost);
        return;
    }
    int pivot=a[left];
    int *al,*ah;
    int *ad;
    cudaMalloc((void **)&ad,(right-left+1)*sizeof(int));
    cudaMalloc((void **)&al,(right-left+1)*sizeof(int));
    cudaMalloc((void **)&ah,(right-left+1)*sizeof(int));

    cudaMemcpy(ad,a,(right-left+1)*sizeof(int),cudaMemcpyHostToDevice);

    partition<<<1,m>>>(ad,left,right,pivot,al,ah);

    int al_h[right-left+1],ah_h[right-left+1];

        cudaMemcpy(al_h,al,(right-left+1)*sizeof(int),cudaMemcpyDeviceToHost);

        cudaMemcpy(ah_h,ah,(right-left+1)*sizeof(int),cudaMemcpyDeviceToHost);

    int i=0,k=0;

    while(i < right-left+1)
    {
     while(al_h[i]==-999 && i < right-left+1)
       i++;
     while(al_h[i]!=-999 && i < right-left+1)
     {
      al_h[k++]=al_h[i++];
     }
    }


    //quicksort<<<1,1>>>(al,0,k-1);
    quicksort(al_h,0,k-1);
    int p=left;
    int x=0;
   // printf("array al");
        while(x < k)
        {
         a[p++]=al_h[x++];
         //printf("%d  ",a[p]);

        }
        a[p]=pivot;
    i=0;
    k=0;
    while(i < right-left+1)
    {
       while(ah_h[i]==-999 && i < right-left+1)
      i++;
        while(ah_h[i]!=-999 && i < right-left+1)
        {
         ah_h[k++]=ah_h[i++];
        }
    }
    //quicksort<<<1,1>>>(ah,0,k-1);
    quicksort(ah_h,0,k-1);
    i=0;
    p++;
        while(i < k)
        {
         a[p++]=ah_h[i++];

        }
if(p-1!=right)
{
 printf("\nsome thing wrong %d %d\n",p,right);
}
}

void qsort(int *a,int n)
{
 //quicksort<<< 1, 1 >>>(a, 0,n-1);
 //printf("in qsort\n");
 quicksort(a, 0,n-1);

}

int main()
{
 int n = 20;
 int a[20];
 time_t t;
    srand((unsigned)time(&t));


    int x,flag;
    for (unsigned i = 0 ; i < n ; i++)
    {
     x=rand()%n;
     flag=0;
     for(int j=0;j < i;j++)
     {
      if(a[j]==x)
      {
       i--;
       flag=1;
       break;
      }

     }
     if(flag==0)
      a[i]=x;
    }
    printf("\n\n original array\n");
    for(int i=0; i < n;i++)
     printf("%d\t ",a[i]);
    quicksort(a,0,n-1);
    printf("\n\n after sorting\n");
    for(int i=0; i < n;i++)
         printf("%d\t ",a[i]);


}
/*
 * output:
 * original array
7  6  4  16  8  14  10  17  2  9  1  15  19  18  11  0  5  3  12  13

 after sorting
0  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19
 */

Write an application to and demonstrate the change in BeagleBoard/ ARM Cortex A5 /Microprocessor /CPU frequency or square wave of programmable frequency

Write an application to and demonstrate the change in BeagleBoard/ ARM Cortex A5 /Microprocessor /CPU frequency or square wave of programmable frequency

 

#include<stdlib.h> //System() Function
#include<signal.h> //signal Handler for CNTR-C
#include<iostream>  //cout , cin
#include<string>    //string 
#include<conio.h>

using namespace std;
void delay(int num);  

int main()
{
int option;
string frq;
string command;

while(1)
        {
                                                                                
cout <<  "\n1.  Press 1 to set the Beagle Board CPU Frequency\n";               
cout <<  "2.  Press 2 to get the current CPU Frequency\n";                      
cout <<  "3.  Press 3 or CNTR-Z to exit\n";                                     
cin > >  option;                                                                  
                                                                                
switch(option)                                                                  
        {                                                                       
        case 1:                                                                 
                cout << "Please Enter the frequency in range 100-1000 (MHz)\n"; 
                cin > >  frq;                                                     
                command="cpufreq-set -f "+frq+"MHz";                            
                                                                                
                if(system(command.c_str()))                                     
                cout << "\nCannot Set the Frequency. Make sure logged in as ROOT\n";
                                                                                
                                                                                
                                                                                
        case 2:                                                                 
                                                           
                while(!kbhit())                                              
                {                                                               
                        cout << "Press any key to go Back in main menu \n";         
                                                                                
                        system("cpufreq-info | tail -2");                       
                        delay(50000);                                           
                }                                                               
        break;                                                                  
        case 3:                                                                 
                exit(0);                                                        
                                                                                
        }                                                                       
        }                                                                       
}                                                                               
void delay(int num)                                                             
{                                                                               
        int i,j;                                                                
        for(i=0; i < num;i++)                                                      
                for(j=0; j < 1275;j++);                                            
} 
OUTPUT:-
root@beaglebone:/home/debian# g++ -o cpu cpu.cpp
Cannot create temporary file in ./: Read-only file system
Aborted
root@beaglebone:/home/debian# ./cpu

1.  Press 1 to set the Beagle Board CPU Frequency
2.  Press 2 to get the current CPU Frequency
3.  Press 3 or CNTR-Z to exit
2
Press CNTR+C to Back in main menu 
  current CPU frequency is 300 MHz (asserted by call to hardware).
  cpufreq stats: 300 MHz:70.37%, 600 MHz:16.56%, 800 MHz:0.09%, 1000 MHz:12.98%)
Press CNTR+C to Back in main menu 
  current CPU frequency is 300 MHz (asserted by call to hardware).
  cpufreq stats: 300 MHz:70.40%, 600 MHz:16.55%, 800 MHz:0.09%, 1000 MHz:12.96%)
^C
1.  Press 1 to set the Beagle Board CPU Frequency
2.  Press 2 to get the current CPU Frequency
3.  Press 3 or CNTR-Z to exit
1
Please Enter the frequency in range 100-1000 (MHz)
600
Press CNTR+C to Back in main menu 
  current CPU frequency is 600 MHz (asserted by call to hardware).              
  cpufreq stats: 300 MHz:70.62%, 600 MHz:16.42%, 800 MHz:0.09%, 1000 MHz:12.87%)
Press CNTR+C to Back in main menu                                               
  current CPU frequency is 600 MHz (asserted by call to hardware).              
  cpufreq stats: 300 MHz:70.58%, 600 MHz:16.47%, 800 MHz:0.09%, 1000 MHz:12.86%)
^C                                                                              
1.  Press 1 to set the Beagle Board CPU Frequency                               
2.  Press 2 to get the current CPU Frequency                                    
3.  Press 3 or CNTR-Z to exit                                                   
3                                                                               
root@beaglebone:/home/debian# ^C                                                
root@beaglebone:/home/debian# 

Implement n-ary search algorithm using OPENMP

Implement n-ary search algorithm using OPENMP

 

#include<stdio.h>
#include<omp.h>
int e,n;
int newarr[100];

int nary(int arr[],int s);

int main(){
const int s;
printf("Enter the size of the array u wish :");
scanf("%d",&s);
printf("Enter the array in ascending order : \n");
int arr[s];
int i=0;
for(i=0; i < s;i++)
scanf("%d",&arr[i]);
                                                    
printf("Enter the element to be searched: \n");
scanf("%d",&e);
X:
printf("Enter the value of n :");
scanf("%d",&n);
if(n > s)
{
printf("Value of n exceeds size of array!\n");
goto X;
}
int ans=nary(arr,s);
if(ans==0)
printf("Value not found in the given array.\n");
else
printf("The value has been successfully found !\n");
return 0;
}

int nary(int arr[],int s)
{
if(arr[0]==e)
return 1;

if(s/n==0)
return 0;
#pragma omp parallel num_threads(n)
{
int tid= omp_get_thread_num();
int t=0,k=0,i=0;
 for(i=0; i < s; i=i+(s/n))
  if(t==tid && (arr[i] < =e && arr[(i+s/n)-1] > =e))//allotting nodes to different threads & if value lies in the given thread set.
   for(k=0; k < (s/n);k++) 
    newarr[k]=arr[k+i];//extract the thread set elements.    
 t++;
}
nary(newarr,s/n);//recursive call to the function 
}

---------OUTPUT-------------

Output--

[estudymoney@localhost ~]$ gcc -o finalnary -fopenmp finalnary.c
[estudymoney@localhost ~]$ ./finalnary
Enter the size of the array u wish :9
Enter the array in ascending order : 
1
2
3
4
5
6
7
8
9
Enter the element to be searched: 
8
Enter the value of n :3
The value has been successfully found !

Implement nxn matrix parallel multiplication using CUDA/OpenCL GPU, use shared memory

Implement nxn matrix parallel multiplication using CUDA/OpenCL GPU, use shared memory

 

#include<stdio.h>

#includelt;math.h>

#define TILE_WIDTH 2

/*matrix multiplication kernels*/

//non shared
__global__ void
MatrixMul( int *Md , int *Nd , int *Pd , const int WIDTH )
{

           // calculate thread id

           unsigned int col = TILE_WIDTH*blockIdx.x + threadIdx.x ;

           unsigned int row = TILE_WIDTH*blockIdx.y + threadIdx.y ;

           Pd[row*WIDTH + col]=0;

         for (int k = 0 ; k<WIDTH ; k++ )
         {
                  Pd[row*WIDTH + col]+= Md[row * WIDTH + k ] * Nd[ k * WIDTH + col] ;
          }

}


// main routine
int main ()
{
   const int WIDTH = 2 ;
   int array1_h[WIDTH][WIDTH] ,array2_h[WIDTH][WIDTH],result_array_h[WIDTH][WIDTH];
  int *array1_d , *array2_d ,*result_array_d ; // device array
  int i , j ;
  //input in host array
  printf("Enter matrix1\n");
  for ( i = 0 ; i < WIDTH ; i++ )
  {
     for (j = 0 ; j < WIDTH ; j++ )
     {
        scanf("%d",&array1_h[i][j]);

     }
  }
  printf("Enter matrix2\n");
    for ( i = 0 ; i < WIDTH ; i++ )
    {
       for (j = 0 ; j < WIDTH ; j++ )
       {
          scanf("%d",&array2_h[i][j]);

       }
    }

  //create device array cudaMalloc ( (void **)&array_name, sizeofmatrixinbytes) ;

  cudaMalloc((void **) &array1_d , WIDTH*WIDTH*sizeof (int) ) ;

  cudaMalloc((void **) &array2_d , WIDTH*WIDTH*sizeof (int) ) ;




  //copy host array to device array; cudaMemcpy ( dest , source , WIDTH , direction )

  cudaMemcpy ( array1_d , array1_h , WIDTH*WIDTH*sizeof (int) , cudaMemcpyHostToDevice ) ;

  cudaMemcpy ( array2_d , array2_h , WIDTH*WIDTH*sizeof (int) , cudaMemcpyHostToDevice ) ;



  //allocating memory for resultent device array

  cudaMalloc((void **) &result_array_d , WIDTH*WIDTH*sizeof (int) ) ;





  //calling kernal

  dim3 dimGrid ( WIDTH/TILE_WIDTH , WIDTH/TILE_WIDTH ,1 ) ;

  dim3 dimBlock( TILE_WIDTH, TILE_WIDTH, 1 ) ;
  MatrixMul <<<dimGrid,dimBlock>>> ( array1_d , array2_d ,result_array_d , WIDTH) ;



  cudaMemcpy(result_array_h , result_array_d , WIDTH*WIDTH*sizeof(int) ,cudaMemcpyDeviceToHost) ;

  //printf the result array
  printf("matrix 1\n");
  for ( i = 0 ; i <  WIDTH ; i++ )
   {
       for ( j = 0 ; j < WIDTH ; j++ )
      {
         printf ("%d   ",array1_h[i][j] ) ;
      }
  printf ("\n") ;
 }
  printf("matrix 2\n");
  for ( i = 0 ; i < WIDTH ; i++ )
   {
       for ( j = 0 ; j < WIDTH ; j++ )
      {
         printf ("%d   ",array2_h[i][j] ) ;
      }
  printf ("\n") ;
 }
  printf("matrix result\n");
  for ( i = 0 ; i < WIDTH ; i++ )
  {
      for ( j = 0 ; j < WIDTH ; j++ )
     {
        printf ("%d   ",result_array_h[i][j] ) ;
     }
 printf ("\n") ;
}
 system("pause") ;
}
/*
 output:
 Enter matrix1
1 1 1 1
Enter matrix2

1 1 1 1
matrix 1
1   1
1   1
matrix 2
1   1
1   1
matrix result
2   2
2   2
 */

Vedic Mathematics method to find square of 2-digit number is used in a distributed programming. Use shared memory and distributed (multi-CPU) programming to complete the task.

 

shm-client - client program to demonstrate shared memory.

#include<sys/types.h>
#include<sys/ipc.h>
#include<sys/shm.h>
#include<stdio.h>
#include<unistd.h>
#include<string.h>
 

#define SHMSZ     27

int main()
{
    int shmid;
    key_t key;
    char *shm, *s;

    /*
     * We need to get the segment named
     * "5678", created by the server.
     */
    key = 5678;

    /*
     * Locate the segment.
     */
    if ((shmid = shmget(key, SHMSZ, 0666)) < 0) {
        perror("shmget");
        return 1;
    }

    /*
     * Now we attach the segment to our data space.
     */
    if ((shm = shmat(shmid, NULL, 0)) == (char *) -1) {
        perror("shmat");
        return 1;
    }

     int i,n1;
     char n;
   s=shm;
     printf("Enter a 2 digit number : ");
     
     scanf("%2d",&n1);
     
     n=(char)n1;
     
     
     
     s++;
     *s=n;
     *shm='*';
     
     while (*shm != '%')
        sleep(1);
     s=shm;
     s++;
    for (i=0; *s != NULL && i < 4;i++){
        printf("%c",*s);
        s++;
        }
    putchar('\n');

    /*
     * Finally, change the first character of the 
     * segment to '*', indicating we have read 
     * the segment.
     */
    *shm = '$';

    return 0;
}

/*
OUTPUT-CLIENT SIDE
[root@localhost vedic]# gcc shmcli.c
[root@localhost vedic]# ./a.out
Enter a 2 digit number : 14
0196

[student@localhost vedic]$ ./a.out
Enter a 2 digit number : 34
1156



*/server side
 

#include<sys/types.h>
#include<sys/ipc.h>
#include<sys/shm.h>
#include<stdio.h>
#include<unistd.h>
#include<string.h>
 

#define SHMSZ     27

int main()
{
    char c;
    int shmid;
    key_t key;
    char *shm, *s;
 int i,n=52;
    /*
     * We'll name our shared memory segment
     * "5678".
     */
    key = 5678;

    /*
     * Create the segment.
     */
    if ((shmid = shmget(key, SHMSZ, IPC_CREAT | 0666)) < 0) {
        perror("shmget");
        return 1;
    }

    /*
     * Now we attach the segment to our data space.
     */
    if ((shm = shmat(shmid, NULL, 0)) == NULL) {
        perror("shmat");
        return 1;
    }
    
   
     while (*shm != '*')
        sleep(1);
    s = shm;
 
   s++;
   char m=*s;
  
   n=(int)m;
  
    int r=square(n);// called function square function 
    s=shm;
    s++;
    
    int a=1000;
    for (i=0;i < 4;i++){
        *s = (char)(r/a+48);
        r=r%a;
        a=a/10;
        s++;
        }
    s = NULL;
    *shm='%';
   
    while (*shm != '$')
        sleep(1);

    return 0;
}
int square(int n){
 int a,b;
 a=n/10;
 b=n%10;
 int res=a*a*100+a*b*2*10+b*b;
 return res;
}
/*
OUTPUT SERVER SIDE
[root@localhost vedic]# gcc shmser.c
[root@localhost vedic]# ./a.out

*/
--------OUTPUT---------



vedic_op_server:
[estudymoney@localhost ~]# gcc shmser.c
[estudymoney@localhost ~]# ./a.out
[estudymoney@localhost ~]# 


vedic_op_client
[estudymoney@localhost ~]# gedit shmcli.c
[estudymoney@localhost ~]# gcc shmcli.c
shmcli.c: In function ‘main’:
shmcli.c:61:18: warning: comparison between pointer and integer [enabled by default]
     for (i=0; *s != NULL && i < 4;i++){
                  ^
[estudymoney@localhost ~]# ./a.out
Enter a 2 digit number : 12
0144
[estudymoney@localhost ~]#