Implement a Distributed matrix multiplication using CUDA / OpenMPI(PL-II Lab)
//Matrix Multiplication Program Using threading
#include <stdio.h>
#include <stdlib.h>
int Width=32; //Width of matrix
int hTile_Width=300;
__device__ int dTile_Width=32; //Declare device copy for tile width
int CompareMatrix(int *M1,int *M2,int row,int col)
{ for(int i=0;i<(row*col);i++)
{ if(M1[i]!=M2[i])
return i;
}
return -1;
}
/*Function to display Matrix elements
--------------------------------------------
Arguments
1-Matrix Pointer
2-Total elements size
--------------------------------------------
*/
void display_matrix(int *ptr,int m, int n)
{
for(int i=0;i<m*n;i++)
{ if(i%(n)==0){
printf("\n");
}
printf("%d ",ptr[i]);
}
}
/*Kernel function
---------------------------------------------
Arguments:
1-1st Matrix pointer
2-2nd Matrix pointer
3-Resultant matrix pointer
4-Width of square matrix
This function calculates dot product of row and column for each thread
__global__ qualifier indicates function can be called from host code and runs on device(GPU)
---------------------------------------------
*/
__global__ void MatrixMulKernel(int *Md,int *Nd,int *Pd,int w)
{
int row=blockIdx.y*dTile_Width+ threadIdx.y; //Get thread X position in a 2D block of threads
int col=blockIdx.x*dTile_Width+ threadIdx.x; //Get thread Y position in a 2D block of threads
int Pvalue=0;
if(row<w && col<w){
for(int k=0;k<w;++k) //
{
Pvalue+=Md[row*w+k]*Nd[k*w+col]; //Calculate sum of dot product of elements
}
Pd[row*w+col]=Pvalue;
}
}
void NormalMultiplication(int *M,int *N,int *P,int w)
{
for(int i=0;i<w;++i){
for(int j=0;j<w;j++){
int sum=0;
for(int k=0;k<w;k++){
int a=M[i*w+k];
int b=N[k*w+j];
sum+=a*b;
}
P[i*w+j]=sum;
}
}
}
/*
-----------------------------------------------
Arguments
1-1st Matrix pointer
2-2nd Matrix pointer
3-Resultant matrix pointer
4-Width of square matrix
This function copy matrix data from host memory to device global memory and invoke kernel
-----------------------------------------------
*/
void MatrixMultiplication(int *M,int *N,int *P,int w)
{ int *Md,*Nd,*Pd; //Matrix Pointer on device memoryi.e GPU
int size=w*w*sizeof(int*);
int x;
cudaMalloc((void**)&Md,size); //Allocate memory on device global memory
cudaMemcpy(Md,M,size,cudaMemcpyHostToDevice); //Copy matrix data from host to device memory
cudaMalloc((void**)&Nd,size);
cudaMemcpy(Nd,N,size,cudaMemcpyHostToDevice);
cudaMalloc((void**)&Pd,size);
if(w%hTile_Width!=0) //
{
x=w/hTile_Width+1;
}
else
{
x=w/hTile_Width;
}
dim3 dimGrid(x,x); //Variable for threads arrangement in a block.
dim3 dimBlock(hTile_Width,hTile_Width); //Variable for blocks arrangement in a grid.
MatrixMulKernel<<<dimGrid,dimBlock>>>(Md,Nd,Pd,w); //Kernel invocation with grid and block specification in angle brackets
cudaMemcpy(P,Pd,size,cudaMemcpyDeviceToHost); //Copy resultant matrix from device to host
//Free device memory
cudaFree(Md);
cudaFree(Nd);
cudaFree(Pd);
}
int main()
{
int mat_size=Width*Width*sizeof(int*); //Calculate memory size required for float matrix
int tot_elements=Width*Width;
int *M,*N,*P,*ptr,*P_CPU; // Host matrix pointers
int a=0;
int i=0;
M=(int*)malloc(mat_size); //Allocate memory on host for matrix
N=(int*)malloc(mat_size);
P=(int*)malloc(mat_size);
P_CPU=(int*)malloc(mat_size);
ptr=M;
printf("\nGenarating random elements for matrix");
for(i=0;i<tot_elements;i++)
{ a=(rand()%10); //Generates random no. in 0 to 10 range
*ptr=a;
ptr++;
}
ptr=N;
for(i=0;i<tot_elements;i++)
{
a=(rand()%10);
*ptr=a;
ptr++;
}
printf("\nMatrix generated");
MatrixMultiplication(M,N,P,Width);
printf("\nDone with GPU");
display_matrix(N,Width,Width);
printf("\nMatrix Multiplication (GPU) is :");
display_matrix(P,Width,Width);
NormalMultiplication(M,N,P_CPU,Width);
printf("\nDone with CPU");
printf("\n\nMatrix Multiplication is :");
display_matrix(P_CPU,Width,Width);
if(CompareMatrix(P,P_CPU,Width,Width)==-1)
printf("\n\nBoth matrix are same\n");
else
{ printf("\n\nBoth matrix are not same\n\n");
//printf("%d",CompareMatrix(P,P_CPU,Width,Width));
}
}
#include <stdio.h>
#include <stdlib.h>
int Width=32; //Width of matrix
int hTile_Width=300;
__device__ int dTile_Width=32; //Declare device copy for tile width
int CompareMatrix(int *M1,int *M2,int row,int col)
{ for(int i=0;i<(row*col);i++)
{ if(M1[i]!=M2[i])
return i;
}
return -1;
}
/*Function to display Matrix elements
--------------------------------------------
Arguments
1-Matrix Pointer
2-Total elements size
--------------------------------------------
*/
void display_matrix(int *ptr,int m, int n)
{
for(int i=0;i<m*n;i++)
{ if(i%(n)==0){
printf("\n");
}
printf("%d ",ptr[i]);
}
}
/*Kernel function
---------------------------------------------
Arguments:
1-1st Matrix pointer
2-2nd Matrix pointer
3-Resultant matrix pointer
4-Width of square matrix
This function calculates dot product of row and column for each thread
__global__ qualifier indicates function can be called from host code and runs on device(GPU)
---------------------------------------------
*/
__global__ void MatrixMulKernel(int *Md,int *Nd,int *Pd,int w)
{
int row=blockIdx.y*dTile_Width+ threadIdx.y; //Get thread X position in a 2D block of threads
int col=blockIdx.x*dTile_Width+ threadIdx.x; //Get thread Y position in a 2D block of threads
int Pvalue=0;
if(row<w && col<w){
for(int k=0;k<w;++k) //
{
Pvalue+=Md[row*w+k]*Nd[k*w+col]; //Calculate sum of dot product of elements
}
Pd[row*w+col]=Pvalue;
}
}
void NormalMultiplication(int *M,int *N,int *P,int w)
{
for(int i=0;i<w;++i){
for(int j=0;j<w;j++){
int sum=0;
for(int k=0;k<w;k++){
int a=M[i*w+k];
int b=N[k*w+j];
sum+=a*b;
}
P[i*w+j]=sum;
}
}
}
/*
-----------------------------------------------
Arguments
1-1st Matrix pointer
2-2nd Matrix pointer
3-Resultant matrix pointer
4-Width of square matrix
This function copy matrix data from host memory to device global memory and invoke kernel
-----------------------------------------------
*/
void MatrixMultiplication(int *M,int *N,int *P,int w)
{ int *Md,*Nd,*Pd; //Matrix Pointer on device memoryi.e GPU
int size=w*w*sizeof(int*);
int x;
cudaMalloc((void**)&Md,size); //Allocate memory on device global memory
cudaMemcpy(Md,M,size,cudaMemcpyHostToDevice); //Copy matrix data from host to device memory
cudaMalloc((void**)&Nd,size);
cudaMemcpy(Nd,N,size,cudaMemcpyHostToDevice);
cudaMalloc((void**)&Pd,size);
if(w%hTile_Width!=0) //
{
x=w/hTile_Width+1;
}
else
{
x=w/hTile_Width;
}
dim3 dimGrid(x,x); //Variable for threads arrangement in a block.
dim3 dimBlock(hTile_Width,hTile_Width); //Variable for blocks arrangement in a grid.
MatrixMulKernel<<<dimGrid,dimBlock>>>(Md,Nd,Pd,w); //Kernel invocation with grid and block specification in angle brackets
cudaMemcpy(P,Pd,size,cudaMemcpyDeviceToHost); //Copy resultant matrix from device to host
//Free device memory
cudaFree(Md);
cudaFree(Nd);
cudaFree(Pd);
}
int main()
{
int mat_size=Width*Width*sizeof(int*); //Calculate memory size required for float matrix
int tot_elements=Width*Width;
int *M,*N,*P,*ptr,*P_CPU; // Host matrix pointers
int a=0;
int i=0;
M=(int*)malloc(mat_size); //Allocate memory on host for matrix
N=(int*)malloc(mat_size);
P=(int*)malloc(mat_size);
P_CPU=(int*)malloc(mat_size);
ptr=M;
printf("\nGenarating random elements for matrix");
for(i=0;i<tot_elements;i++)
{ a=(rand()%10); //Generates random no. in 0 to 10 range
*ptr=a;
ptr++;
}
ptr=N;
for(i=0;i<tot_elements;i++)
{
a=(rand()%10);
*ptr=a;
ptr++;
}
printf("\nMatrix generated");
MatrixMultiplication(M,N,P,Width);
printf("\nDone with GPU");
display_matrix(N,Width,Width);
printf("\nMatrix Multiplication (GPU) is :");
display_matrix(P,Width,Width);
NormalMultiplication(M,N,P_CPU,Width);
printf("\nDone with CPU");
printf("\n\nMatrix Multiplication is :");
display_matrix(P_CPU,Width,Width);
if(CompareMatrix(P,P_CPU,Width,Width)==-1)
printf("\n\nBoth matrix are same\n");
else
{ printf("\n\nBoth matrix are not same\n\n");
//printf("%d",CompareMatrix(P,P_CPU,Width,Width));
}
}
----------------------------------------------------O/P---------------------------------------------------
test@test-ThinkCentre-M72e:~$ nvcc Matrix.cu
test@test-ThinkCentre-M72e:~$ ./a.out
Genarating random elements for matrix
Matrix generated
Done with GPU
1 4 0 3 1 2 7 7 9 8 1 0 7 6 0 1 0 2 5 1 9 7 1 3 1 2 0 6 8 7 4 9
2 4 4 5 9 1 2 8 9 6 0 6 4 0 9 4 5 4 6 6 3 9 9 5 2 1 3 0 9 9 1 3
4 5 8 5 7 3 3 8 9 5 5 3 8 6 0 3 3 6 1 8 7 0 5 1 1 0 3 0 0 4 5 4
2 4 9 9 9 4 9 8 1 6 3 9 3 3 4 6 1 5 6 1 7 2 2 9 2 8 1 2 4 7 8 6
3 7 7 2 3 7 2 5 3 7 6 8 1 3 6 4 0 3 5 8 5 0 9 9 8 0 2 4 9 0 1 2
0 0 6 3 9 0 8 3 8 7 3 1 0 0 5 0 3 1 8 0 1 7 9 1 0 3 5 9 4 8 4 6
9 2 9 0 3 0 3 3 7 9 4 9 9 1 9 4 2 0 6 5 9 5 6 1 1 4 3 7 2 7 3 3
9 2 4 4 4 9 9 3 8 3 2 9 5 4 3 9 4 9 5 5 7 3 9 0 7 2 7 2 9 0 5 0
4 1 7 9 3 6 2 1 2 5 3 9 1 8 8 7 0 5 2 7 9 1 9 6 3 6 8 4 6 6 7 2
7 4 1 0 0 6 4 4 3 7 3 4 7 2 1 7 9 5 6 8 9 5 5 2 1 5 9 9 1 6 2 1
0 5 3 2 1 9 7 6 8 2 0 6 4 3 3 6 9 0 4 8 5 1 2 9 7 3 8 0 9 2 3 1
8 7 4 1 6 3 8 5 5 0 1 2 6 4 8 5 6 2 5 2 6 9 1 3 3 1 5 2 6 9 4 4
6 0 7 4 3 7 9 0 0 2 2 6 9 0 3 5 5 8 7 1 7 0 6 0 4 1 3 0 2 9 6 0
9 3 5 4 3 6 4 3 1 9 1 0 9 4 5 6 2 5 9 1 7 7 4 1 1 9 1 3 8 7 4 9
3 1 3 8 7 9 3 8 8 4 0 0 8 6 8 2 3 8 5 0 5 9 2 6 8 5 2 8 3 6 7 8
7 2 6 6 2 9 7 0 3 7 2 3 5 1 7 8 9 4 1 6 4 3 5 4 8 7 3 3 5 0 1 4
5 7 0 7 8 9 9 3 9 4 6 4 5 5 5 6 0 6 2 6 9 9 2 9 6 5 5 3 8 6 7 3
6 0 2 6 9 3 0 8 7 6 5 2 4 0 0 6 8 5 4 9 4 6 8 3 2 3 6 2 2 4 7 0
4 9 6 5 4 6 4 2 5 1 6 1 3 7 7 1 2 1 0 8 9 0 1 3 4 8 5 8 2 2 0 6
3 6 1 0 5 7 4 2 0 0 3 3 9 0 4 3 3 4 2 4 5 3 8 1 1 3 1 5 8 1 3 3
9 7 5 4 4 9 6 7 2 1 0 1 3 7 7 6 3 9 1 0 4 1 1 8 6 4 3 4 7 7 0 7
4 5 3 0 7 2 9 9 3 0 2 7 9 9 5 2 0 8 3 5 9 6 5 6 3 0 2 0 7 4 7 3
0 1 6 7 5 7 8 8 9 0 7 8 0 3 1 0 3 6 7 3 2 2 1 5 3 3 8 2 8 5 8 0
8 6 9 3 3 7 4 3 7 1 3 9 6 4 2 0 2 9 5 7 4 6 4 9 1 2 3 1 8 1 3 8
9 2 4 5 1 0 0 1 3 3 0 0 0 2 2 2 4 7 9 8 5 4 7 8 6 0 0 6 4 3 7 5
8 1 0 9 3 0 2 6 4 3 8 6 7 0 8 1 9 8 1 6 4 8 5 0 1 5 9 7 0 6 2 8
9 5 0 2 7 2 8 1 7 7 9 7 9 8 8 9 8 0 5 2 0 2 4 3 7 3 0 0 1 5 8 0
0 0 4 7 5 3 1 4 2 0 1 3 0 0 4 8 2 2 0 2 4 5 8 4 0 8 4 2 5 4 2 7
5 7 5 2 2 8 6 6 0 0 9 1 2 6 9 4 8 2 8 2 7 8 6 7 7 0 9 4 5 4 2 2
3 9 6 5 7 4 3 9 4 4 0 6 0 2 2 8 4 3 1 1 1 9 0 0 0 2 5 7 6 9 1 9
8 7 6 5 1 1 4 8 5 7 6 6 9 1 4 3 6 7 6 9 7 6 0 9 0 7 6 6 8 7 7 6
6 5 3 7 6 9 7 2 6 4 0 7 5 4 0 1 2 8 0 1 5 2 0 7 1 6 4 9 5 3 5 1
Matrix Multiplication (GPU) is :
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Done with CPU
Matrix Multiplication is :
710 585 753 595 677 651 688 735 686 591 462 686 720 505 674 578 517 666 656 656 808 700 667 655 383 515 588 497 828 715 633 614
646 572 661 648 718 711 726 635 690 579 421 618 676 509 668 584 493 637 605 521 745 634 589 602 434 511 542 525 729 750 629 551
807 705 677 689 779 713 814 858 771 639 540 674 799 507 722 657 647 765 677 693 909 787 662 669 458 563 697 673 823 876 663 697
676 552 704 661 609 758 776 684 650 480 444 780 689 535 693 592 513 730 540 622 855 568 660 680 490 518 624 455 787 637 548 583
864 650 751 675 756 856 858 751 816 686 498 798 865 570 757 771 695 787 698 771 932 681 858 794 614 522 623 535 905 759 691 614
786 700 690 734 697 760 788 771 775 617 554 712 813 530 683 582 615 762 690 705 920 749 588 745 444 600 661 628 834 832 681 678
819 667 772 719 789 832 782 832 798 617 443 764 804 595 658 682 503 845 626 697 956 685 684 662 464 605 638 638 870 790 685 700
700 573 647 533 667 640 738 686 686 592 419 600 782 519 638 553 500 607 586 592 840 560 531 636 440 538 511 556 652 704 625 546
689 514 619 515 585 568 615 609 637 570 388 562 603 370 588 615 554 535 521 619 713 525 538 519 372 532 565 490 619 654 475 474
747 619 632 727 806 798 822 799 864 634 521 644 752 655 692 674 588 786 667 694 897 704 783 765 554 575 641 631 858 770 735 640
620 473 551 454 481 657 640 499 656 505 322 538 655 392 535 515 442 552 544 585 743 586 580 528 423 466 472 488 727 489 482 547
688 531 704 663 665 761 755 719 624 511 412 692 728 529 720 574 511 746 563 538 845 630 613 673 484 522 588 535 731 731 597 594
823 600 664 671 791 777 798 734 780 529 471 761 746 486 712 683 618 808 564 679 804 747 735 618 497 498 667 535 830 732 619 587
811 622 787 716 793 735 848 775 857 683 453 815 828 590 656 630 478 829 703 692 927 703 692 747 433 581 583 571 914 815 787 662
847 702 760 775 804 842 812 767 898 667 557 820 757 596 899 736 681 754 612 744 844 774 740 777 601 616 740 678 866 826 640 702
649 498 663 536 610 605 629 604 716 573 398 634 643 380 473 544 469 619 545 632 770 543 673 513 350 440 554 570 657 628 541 526
652 621 609 603 663 807 771 664 684 570 418 582 622 499 665 626 503 618 601 522 821 676 599 590 466 576 628 683 819 714 540 591
754 551 607 761 742 778 772 758 791 606 479 664 646 535 743 739 657 780 640 682 814 805 792 705 574 569 731 648 877 763 687 645
954 684 798 813 844 905 831 830 843 739 600 813 844 604 953 833 710 817 725 738 956 874 860 780 619 699 752 663 931 850 671 754
568 490 511 554 519 540 532 573 590 425 363 565 526 408 561 463 462 545 442 467 576 542 390 485 336 433 522 484 569 651 458 494
770 536 629 748 602 818 761 643 770 564 459 573 701 552 653 684 617 741 564 684 899 605 637 643 571 611 677 609 741 695 638 609
676 557 678 651 679 692 691 644 664 514 481 665 544 451 678 608 535 681 598 635 759 633 750 600 523 426 627 573 727 671 555 560
642 489 485 511 571 596 578 617 606 618 441 558 729 417 617 604 563 558 572 613 703 595 643 545 447 471 474 456 651 586 543 491
629 518 653 548 637 566 633 645 676 593 378 601 644 392 580 544 457 600 651 603 799 686 673 593 361 428 521 595 713 761 610 603
634 535 596 510 546 625 605 579 599 468 298 506 587 430 485 515 509 633 501 586 662 519 550 579 403 396 487 477 657 605 483 567
768 630 656 642 696 776 748 730 766 513 506 623 674 463 691 651 677 679 615 697 829 739 672 650 489 557 740 611 776 714 559 641
699 555 613 628 665 773 689 735 857 661 385 601 731 446 691 696 613 643 555 703 853 708 714 579 523 568 636 609 798 695 484 642
620 509 555 625 587 615 613 599 654 430 360 558 574 406 578 527 541 616 506 531 636 590 530 545 399 481 548 489 674 660 500 585
703 497 575 600 559 564 580 617 596 506 435 543 679 426 583 473 516 688 579 565 692 535 579 559 404 435 478 466 626 622 554 559
800 606 630 663 722 767 788 668 805 558 529 708 805 530 778 644 624 699 599 632 846 705 717 673 494 563 662 564 795 777 602 606
956 656 792 733 829 837 850 763 886 666 533 767 873 642 846 756 687 854 715 789 957 792 849 742 592 623 706 658 865 800 742 685
593 493 531 534 612 607 665 579 630 551 380 535 649 447 612 592 545 515 523 520 654 577 551 559 422 510 506 446 646 624 540 498
Both matrix are not same
test@test-ThinkCentre-M72e:~$
test@test-ThinkCentre-M72e:~$ ./a.out
Genarating random elements for matrix
Matrix generated
Done with GPU
1 4 0 3 1 2 7 7 9 8 1 0 7 6 0 1 0 2 5 1 9 7 1 3 1 2 0 6 8 7 4 9
2 4 4 5 9 1 2 8 9 6 0 6 4 0 9 4 5 4 6 6 3 9 9 5 2 1 3 0 9 9 1 3
4 5 8 5 7 3 3 8 9 5 5 3 8 6 0 3 3 6 1 8 7 0 5 1 1 0 3 0 0 4 5 4
2 4 9 9 9 4 9 8 1 6 3 9 3 3 4 6 1 5 6 1 7 2 2 9 2 8 1 2 4 7 8 6
3 7 7 2 3 7 2 5 3 7 6 8 1 3 6 4 0 3 5 8 5 0 9 9 8 0 2 4 9 0 1 2
0 0 6 3 9 0 8 3 8 7 3 1 0 0 5 0 3 1 8 0 1 7 9 1 0 3 5 9 4 8 4 6
9 2 9 0 3 0 3 3 7 9 4 9 9 1 9 4 2 0 6 5 9 5 6 1 1 4 3 7 2 7 3 3
9 2 4 4 4 9 9 3 8 3 2 9 5 4 3 9 4 9 5 5 7 3 9 0 7 2 7 2 9 0 5 0
4 1 7 9 3 6 2 1 2 5 3 9 1 8 8 7 0 5 2 7 9 1 9 6 3 6 8 4 6 6 7 2
7 4 1 0 0 6 4 4 3 7 3 4 7 2 1 7 9 5 6 8 9 5 5 2 1 5 9 9 1 6 2 1
0 5 3 2 1 9 7 6 8 2 0 6 4 3 3 6 9 0 4 8 5 1 2 9 7 3 8 0 9 2 3 1
8 7 4 1 6 3 8 5 5 0 1 2 6 4 8 5 6 2 5 2 6 9 1 3 3 1 5 2 6 9 4 4
6 0 7 4 3 7 9 0 0 2 2 6 9 0 3 5 5 8 7 1 7 0 6 0 4 1 3 0 2 9 6 0
9 3 5 4 3 6 4 3 1 9 1 0 9 4 5 6 2 5 9 1 7 7 4 1 1 9 1 3 8 7 4 9
3 1 3 8 7 9 3 8 8 4 0 0 8 6 8 2 3 8 5 0 5 9 2 6 8 5 2 8 3 6 7 8
7 2 6 6 2 9 7 0 3 7 2 3 5 1 7 8 9 4 1 6 4 3 5 4 8 7 3 3 5 0 1 4
5 7 0 7 8 9 9 3 9 4 6 4 5 5 5 6 0 6 2 6 9 9 2 9 6 5 5 3 8 6 7 3
6 0 2 6 9 3 0 8 7 6 5 2 4 0 0 6 8 5 4 9 4 6 8 3 2 3 6 2 2 4 7 0
4 9 6 5 4 6 4 2 5 1 6 1 3 7 7 1 2 1 0 8 9 0 1 3 4 8 5 8 2 2 0 6
3 6 1 0 5 7 4 2 0 0 3 3 9 0 4 3 3 4 2 4 5 3 8 1 1 3 1 5 8 1 3 3
9 7 5 4 4 9 6 7 2 1 0 1 3 7 7 6 3 9 1 0 4 1 1 8 6 4 3 4 7 7 0 7
4 5 3 0 7 2 9 9 3 0 2 7 9 9 5 2 0 8 3 5 9 6 5 6 3 0 2 0 7 4 7 3
0 1 6 7 5 7 8 8 9 0 7 8 0 3 1 0 3 6 7 3 2 2 1 5 3 3 8 2 8 5 8 0
8 6 9 3 3 7 4 3 7 1 3 9 6 4 2 0 2 9 5 7 4 6 4 9 1 2 3 1 8 1 3 8
9 2 4 5 1 0 0 1 3 3 0 0 0 2 2 2 4 7 9 8 5 4 7 8 6 0 0 6 4 3 7 5
8 1 0 9 3 0 2 6 4 3 8 6 7 0 8 1 9 8 1 6 4 8 5 0 1 5 9 7 0 6 2 8
9 5 0 2 7 2 8 1 7 7 9 7 9 8 8 9 8 0 5 2 0 2 4 3 7 3 0 0 1 5 8 0
0 0 4 7 5 3 1 4 2 0 1 3 0 0 4 8 2 2 0 2 4 5 8 4 0 8 4 2 5 4 2 7
5 7 5 2 2 8 6 6 0 0 9 1 2 6 9 4 8 2 8 2 7 8 6 7 7 0 9 4 5 4 2 2
3 9 6 5 7 4 3 9 4 4 0 6 0 2 2 8 4 3 1 1 1 9 0 0 0 2 5 7 6 9 1 9
8 7 6 5 1 1 4 8 5 7 6 6 9 1 4 3 6 7 6 9 7 6 0 9 0 7 6 6 8 7 7 6
6 5 3 7 6 9 7 2 6 4 0 7 5 4 0 1 2 8 0 1 5 2 0 7 1 6 4 9 5 3 5 1
Matrix Multiplication (GPU) is :
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Done with CPU
Matrix Multiplication is :
710 585 753 595 677 651 688 735 686 591 462 686 720 505 674 578 517 666 656 656 808 700 667 655 383 515 588 497 828 715 633 614
646 572 661 648 718 711 726 635 690 579 421 618 676 509 668 584 493 637 605 521 745 634 589 602 434 511 542 525 729 750 629 551
807 705 677 689 779 713 814 858 771 639 540 674 799 507 722 657 647 765 677 693 909 787 662 669 458 563 697 673 823 876 663 697
676 552 704 661 609 758 776 684 650 480 444 780 689 535 693 592 513 730 540 622 855 568 660 680 490 518 624 455 787 637 548 583
864 650 751 675 756 856 858 751 816 686 498 798 865 570 757 771 695 787 698 771 932 681 858 794 614 522 623 535 905 759 691 614
786 700 690 734 697 760 788 771 775 617 554 712 813 530 683 582 615 762 690 705 920 749 588 745 444 600 661 628 834 832 681 678
819 667 772 719 789 832 782 832 798 617 443 764 804 595 658 682 503 845 626 697 956 685 684 662 464 605 638 638 870 790 685 700
700 573 647 533 667 640 738 686 686 592 419 600 782 519 638 553 500 607 586 592 840 560 531 636 440 538 511 556 652 704 625 546
689 514 619 515 585 568 615 609 637 570 388 562 603 370 588 615 554 535 521 619 713 525 538 519 372 532 565 490 619 654 475 474
747 619 632 727 806 798 822 799 864 634 521 644 752 655 692 674 588 786 667 694 897 704 783 765 554 575 641 631 858 770 735 640
620 473 551 454 481 657 640 499 656 505 322 538 655 392 535 515 442 552 544 585 743 586 580 528 423 466 472 488 727 489 482 547
688 531 704 663 665 761 755 719 624 511 412 692 728 529 720 574 511 746 563 538 845 630 613 673 484 522 588 535 731 731 597 594
823 600 664 671 791 777 798 734 780 529 471 761 746 486 712 683 618 808 564 679 804 747 735 618 497 498 667 535 830 732 619 587
811 622 787 716 793 735 848 775 857 683 453 815 828 590 656 630 478 829 703 692 927 703 692 747 433 581 583 571 914 815 787 662
847 702 760 775 804 842 812 767 898 667 557 820 757 596 899 736 681 754 612 744 844 774 740 777 601 616 740 678 866 826 640 702
649 498 663 536 610 605 629 604 716 573 398 634 643 380 473 544 469 619 545 632 770 543 673 513 350 440 554 570 657 628 541 526
652 621 609 603 663 807 771 664 684 570 418 582 622 499 665 626 503 618 601 522 821 676 599 590 466 576 628 683 819 714 540 591
754 551 607 761 742 778 772 758 791 606 479 664 646 535 743 739 657 780 640 682 814 805 792 705 574 569 731 648 877 763 687 645
954 684 798 813 844 905 831 830 843 739 600 813 844 604 953 833 710 817 725 738 956 874 860 780 619 699 752 663 931 850 671 754
568 490 511 554 519 540 532 573 590 425 363 565 526 408 561 463 462 545 442 467 576 542 390 485 336 433 522 484 569 651 458 494
770 536 629 748 602 818 761 643 770 564 459 573 701 552 653 684 617 741 564 684 899 605 637 643 571 611 677 609 741 695 638 609
676 557 678 651 679 692 691 644 664 514 481 665 544 451 678 608 535 681 598 635 759 633 750 600 523 426 627 573 727 671 555 560
642 489 485 511 571 596 578 617 606 618 441 558 729 417 617 604 563 558 572 613 703 595 643 545 447 471 474 456 651 586 543 491
629 518 653 548 637 566 633 645 676 593 378 601 644 392 580 544 457 600 651 603 799 686 673 593 361 428 521 595 713 761 610 603
634 535 596 510 546 625 605 579 599 468 298 506 587 430 485 515 509 633 501 586 662 519 550 579 403 396 487 477 657 605 483 567
768 630 656 642 696 776 748 730 766 513 506 623 674 463 691 651 677 679 615 697 829 739 672 650 489 557 740 611 776 714 559 641
699 555 613 628 665 773 689 735 857 661 385 601 731 446 691 696 613 643 555 703 853 708 714 579 523 568 636 609 798 695 484 642
620 509 555 625 587 615 613 599 654 430 360 558 574 406 578 527 541 616 506 531 636 590 530 545 399 481 548 489 674 660 500 585
703 497 575 600 559 564 580 617 596 506 435 543 679 426 583 473 516 688 579 565 692 535 579 559 404 435 478 466 626 622 554 559
800 606 630 663 722 767 788 668 805 558 529 708 805 530 778 644 624 699 599 632 846 705 717 673 494 563 662 564 795 777 602 606
956 656 792 733 829 837 850 763 886 666 533 767 873 642 846 756 687 854 715 789 957 792 849 742 592 623 706 658 865 800 742 685
593 493 531 534 612 607 665 579 630 551 380 535 649 447 612 592 545 515 523 520 654 577 551 559 422 510 506 446 646 624 540 498
Both matrix are not same
test@test-ThinkCentre-M72e:~$
No comments:
Post a Comment