使用blas做矩阵乘法-创新互联

 
#define min(x,y) (((x) < (y)) ? (x) : (y))

#include
#include
#include
#include
#include
//extern "C"
//{   #include 
//}using namespace std;
int main()
{
    const enum CBLAS_ORDER Order=CblasRowMajor;
const enum CBLAS_TRANSPOSE TransA=CblasNoTrans;
const enum CBLAS_TRANSPOSE TransB=CblasNoTrans;
const int M=4;//A的行数,C的行数  const int N=2;//B的列数,C的列数  const int K=3;//A的列数,B的行数  const float alpha=1;
const float beta=0;
const int lda=K;//A的列  const int ldb=N;//B的列  const int ldc=N;//C的列  const float A[M*K]={1,2,3,4,5,6,7,8,9,8,7,6};
const float B[K*N]={5,4,3,2,1,0};
float C[M*N];
   
    cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
     
for(int i=0;i
g++ testblas.c++ -lopenblas  -o testout使用blas做矩阵乘法

g++ testblas.c++ -lopenblas_piledriverp-r0.2.9 -o testout  本地编译openblas版本

创新互联响应式网站特点就是不管在电脑、平板还是手机上,HTML5建站都会根据屏幕尺寸自动调节大小、图片分辨率,并且融入一定的动画特效,让网站看起来非常的美观大方。从网站需求对接到网站制作设计、从代码编写到项目上线运维,技术人员全程跟踪,快速响应

注意library放在引用library的函数的后面

cblas_sgemm

Multiplies two matrices (single-precision).

void cblas_sgemm (
const enum CBLAS_ORDER Order,// Specifies row-major (C) or column-major (Fortran) data ordering.
//typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.const enum CBLAS_TRANSPOSE TransB,
const int M, //Number of rows in matrices A and C.const int N,//Number of rows in matrices A and C.const int K,//Number of columns in matrix A; number of rows in matrix Bconst float alpha, //Scaling factor for the product of matrices A and Bconst float *A, 
const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m.  stride

lda, ldb and ldc (the strides) are not relevant to my problem after all, but here's an explanation of them :

The elements of a matrix (i.e a 2D array) are stored contiguously in memory. However, they may be stored in either column-major or row-major fashion. The stride represents the distance in memory between elements in adjacent rows (if row-major) or in adjacent columns (if column-major). This means that the stride is usually equal to the number of rows/columns in the matrix.

Matrix A =
[1 2 3]
[4 5 6]
Row-major stores values as {1,2,3,4,5,6}
Stride here is 3

Col-major stores values as {1, 4, 2, 5, 3, 6}
Stride here is 2

Matrix B =
[1 2 3]
[4 5 6]
[7 8 9]

Col-major storage is {1, 4, 7, 2, 5, 8, 3, 6, 9}
Stride here is 3

Read more: http://www.physicsforums.com
const float *B, const int ldb,//The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.const float beta,//Scaling factor for matrix C.float *C, const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.); Thus, it calculates either C←αAB+ βC or C←αBA+ βC with optional use of transposed forms of A, B, or both.
typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;

$C=A*B$

$C^T=(A*B)^T=B^T*A^T$  把A和B的顺序颠倒,可以直接得到转制矩阵乘法的结果,不用作其他变换,(结果C也是转制)。

 Y←αAX + βY

cblas_sgemv
Multiplies a matrix by a vector (single precision).
void cblas_sgemv (
const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA,
const int M,
const int N,
const float alpha,
const float *A,
const int lda,
const float *X,
const int incX,
const float beta,
float *Y,
const int incY
);

STL版本

cblas_daxpy
Computes a constant times a vector plus a vector (double-precision).  

On return, the contents of vector Y are replaced with the result. The value computed is (alpha * X[i]) +
Y[i].

#include 
#include
#include
#include

int main()
{
    blasint n= 10;
    blasint in_x=1;
    blasint in_y=1;

    std::vector x(n);
    std::vector y(n);

double alpha = 10;

    std::fill(x.begin(),x.end(),1.0);
    std::fill(y.begin(),y.end(),2.0);

    cblas_daxpy( n, alpha,&x[0], in_x, &y[0], in_y);

//Print y  for(int j=0;j

cublas


cublasStatus_t
cublasCreate(cublasHandle_t *handle)

Return Value Meaning
CUBLAS_STATUS_SUCCESS the initialization succeeded
CUBLAS_STATUS_NOT_INITIALIZED the CUDATM Runtime initialization failed
CUBLAS_STATUS_ALLOC_FAILED the resources could not be allocated

cublasStatus_t
cublasDestroy(cublasHandle_t handle)

Return Value Meaning
CUBLAS_STATUS_SUCCESS the shut down succeeded
CUBLAS_STATUS_NOT_INITIALIZED the library was not initialized



cublasStatus_t cublasSgemm(cublasHandle_t handle, // 唯一的不同:handle to the cuBLAS library context.
cublasOperation_t transa,
cublasOperation_t transb int m,
int n,
int k, const float*alpha, const float*A,
int lda, const float*B,
int ldb, const float*beta, float*C,
int ldc
)
void cblas_sgemm (
const enum CBLAS_ORDER Order,// Specifies row-major (C) or column-major (Fortran) data ordering.
//typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.const enum CBLAS_TRANSPOSE TransB,
const int M, //Number of rows in matrices A and C.const int N,//Number of rows in matrices A and C.const int K,//Number of columns in matrix A; number of rows in matrix Bconst float alpha, //Scaling factor for the product of matrices A and Bconst float *A, 
const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m.const float *B,  
const int ldb,//The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.const float beta,//Scaling factor for matrix C.float *C,
const int ldc  //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.);

本文名称:使用blas做矩阵乘法-创新互联
URL地址:http://bzwzjz.com/article/epgcp.html