相关文章推荐
int xpageoffset; for (i = 0; i < 10000; i++) { for (xpageoffset = 0; xpageoffset < 10000; xpageoffset++) { d = 0.0; for (k = 0; k < 10000; k++) { d += a[i + 10000 * k] * b[k + 10000 * xpageoffset]; c[i + 10000 * xpageoffset] = d; static double b[100000000]; static double c[100000000]; cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, (MKL_INT)10000, (MKL_INT)10000, (MKL_INT)10000, 1.0, &a[0], (MKL_INT)10000, &b[0], (MKL_INT)10000, 0.0, &c[0], (MKL_INT)10000);
#pragma omp parallel for \
 num_threads(4 > omp_get_max_threads() ? omp_get_max_threads() : 4) \
 private(b_i,yCol,b_r)
  for (i = 0; i < 10; i++) {
    /*  指定4线程并发 */
    for (b_i = 0; b_i < 256; b_i++) {
      b_r[b_i] = r[i + 10 * b_i];
    c_FFTImplementationCallback_doH(b_r, 0, yCol);
    for (b_i = 0; b_i < 256; b_i++) {
      a[i + 10 * b_i] = yCol[b_i].re;
%% generate standalone exe by using GPU Coder (gpuCodeGenTest.m)
cfg = coder.gpuConfig('exe');
cfg.GenerateExampleMain = 'GenerateCodeAndCompile';
codegen largeMatrixTest -config cfg -report
	
cublasDgemm(getCublasGlobalHandle(), CUBLAS_OP_N, CUBLAS_OP_N, 5000, 5000,
              5000, (double *)gpu_alpha1, (double *)&(*gpu_a)[0], 5000, (double *)
              &(*gpu_b)[0], 5000, (double *)gpu_beta1, (double *)&(*gpu_c)[0],
              5000);
cusolverDnDgesvd(getCuSolverGlobalHandle(), 'N', 'N', 5000, 5000, (double *)
                     &(*gpu_c)[0], 5000, &(*gpu_s)[0], NULL, 1, NULL, 1, (double
      *)getCuSolverWorkspaceBuff(), *getCuSolverWorkspaceReq(), &(*gpu_superb)[0],
                     gpu_info_t);
	

除了以上提到的内容,如今最热且重度依赖硬件加速的深度学习应用并没在本文中讨论,事实上MATLAB从R2017b就已经开始支持针对深度学习推断生成C/C++代码,并可利用硬件来加速深度学习的推断,包括NVIDIA的桌面与服务器GPU及嵌入式GPU(通过CUDA实现)、ARM Mali GPU与ARM Neon核(通过Arm Compute Library实现),或者利用x86_64处理器的SIMD(SSE/AVX,通过Intel MKL-DNN实现)。在最新的R2020b版本中,Deep Learning HDL Toolbox还可以将训练好的深度学习模型生成为硬件描述语言,从而把深度学习部署到FPGA上。详情可参考MATLAB帮助文档或者咨询MathWorks中国办公室。

%% example function for code generation (largeMatrixTest.m)
function largeMatrixTest()
    a = rand(5000, 5000);
    b = a * a;
    c = sum(a);
    s = svd(a);
    e = eig(a);
    [maxValue, maxPos] = max(a);
    tCpu = toc;
    fprintf('    Time cost: %f\n', tCpu);
%% define class for BLASCallback (useMyBLAS.m)
classdef useMyBLAS < coder.BLASCallback
    methods (Static)
        function updateBuildInfo(buildInfo, ~)
            libPath = 'C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64';
            libPriority = '';
            libPreCompiled = true;
            libLinkOnly = true;
            libs = {'mkl_intel_ilp64.lib' 'mkl_intel_thread.lib' 'mkl_core.lib'};
            buildInfo.addLinkObjects(libs, libPath, libPriority, libPreCompiled, ...
                                  libLinkOnly);
            buildInfo.addLinkObjects('libiomp5md.lib',fullfile(matlabroot,'bin', ...
                             'win64'), libPriority, libPreCompiled, libLinkOnly);
            buildInfo.addIncludePaths('C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2020.1.216\windows\mkl\include');
            buildInfo.addDefines('-DMKL_ILP64');
        function headerName = getHeaderFilename()
            headerName = 'mkl_cblas.h';
        function intTypeName = getBLASIntTypeName()
            intTypeName = 'MKL_INT';
        function doubleComplexTypeName = getBLASDoubleComplexTypeName()
            doubleComplexTypeName = 'my_double_complex_type';
        function singleComplexTypeName = getBLASSingleComplexTypeName()
            singleComplexTypeName = 'my_single_complex_type';
        function p = useEnumNameRatherThanTypedef()
            p = true;
%% define class for LAPACKCallback (useMyLAPACK.m)
classdef useMyLAPACK < coder.LAPACKCallback
    methods (Static)
        function hn = getHeaderFilename()
            hn = 'mkl_lapacke.h';
        function updateBuildInfo(buildInfo, buildctx)
            buildInfo.addIncludePaths(fullfile(pwd,'include'));
            libName = 'mkl_lapack95_ilp64';
            libPath = 'C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64';
            [~,linkLibExt] = buildctx.getStdLibInfo();
            buildInfo.addLinkObjects([libName linkLibExt], libPath, ...
                '', true, true);
            buildInfo.addIncludePaths('C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2020.1.216\windows\mkl\include');
            buildInfo.addDefines('HAVE_LAPACK_CONFIG_H');
            buildInfo.addDefines('LAPACK_COMPLEX_STRUCTURE');
            buildInfo.addDefines('LAPACK_ILP64'); 
%% generate standalone exe for above MATLAB function (genCodeTest.m)
cfg = coder.config('exe');
cfg.CustomBLASCallback = 'useMyBLAS';
cfg.CustomLAPACKCallback = 'useMyLAPACK';
cfg.GenerateExampleMain = 'GenerateCodeAndCompile';
codegen largeMatrixTest -config cfg -report
					

Puoi anche selezionare un sito web dal seguente elenco:

Come ottenere le migliori prestazioni del sito

Per ottenere le migliori prestazioni del sito, seleziona il sito cinese (in cinese o in inglese). I siti MathWorks per gli altri paesi non sono ottimizzati per essere visitati dalla tua area geografica.

Americhe

  • América Latina (Español)
  • Canada (English)
  • United States (English)
  • Europa

  • Belgium (English)
  • Denmark (English)
  • Deutschland (Deutsch)
  • España (Español)
  • Finland (English)
  • France (Français)
  • Ireland (English)
  • Italia (Italiano)
  • Luxembourg (English)
  •  
    推荐文章