/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin.           */
/* All rights reserved.                                              */
/*                                                                   */
/* Redistribution and use in source and binary forms, with or        */
/* without modification, are permitted provided that the following   */
/* conditions are met:                                               */
/*                                                                   */
/*   1. Redistributions of source code must retain the above         */
/*      copyright notice, this list of conditions and the following  */
/*      disclaimer.                                                  */
/*                                                                   */
/*   2. Redistributions in binary form must reproduce the above      */
/*      copyright notice, this list of conditions and the following  */
/*      disclaimer in the documentation and/or other materials       */
/*      provided with the distribution.                              */
/*                                                                   */
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFI_PILEDRIVER;  OR    */
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
/*                                                                   */
/* The views and conclusions contained in the software and           */
/* documentation are those of the authors and should not be          */
/* interpreted as representing official policies, either expressed   */
/* or implied, of The University of Texas at Austin.                 */
/*********************************************************************/

#include <stdio.h>
#include <string.h>
#include "common.h"

#ifdef BUILD_KERNEL
#include "kernel_PILEDRIVER.h"
#endif

#undef DEBUG

static void init_parameter(void);

gotoblas_t TABLE_NAME = {
  DTB_DEFAULT_ENTRIES ,

  GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,

  0, 0, 0,
  SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
#ifdef SGEMM_DEFAULT_UNROLL_MN
 SGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
#endif


#ifdef HAVE_EXCLUSIVE_CACHE
  1,
#else
  0,
#endif

  samax_k_PILEDRIVER,  samin_k_PILEDRIVER,  smax_k_PILEDRIVER,  smin_k_PILEDRIVER,
  isamax_k_PILEDRIVER, isamin_k_PILEDRIVER, ismax_k_PILEDRIVER, ismin_k_PILEDRIVER,
  snrm2_k_PILEDRIVER,  sasum_k_PILEDRIVER,  scopy_k_PILEDRIVER, sdot_k_PILEDRIVER,
  dsdot_k_PILEDRIVER,
  srot_k_PILEDRIVER,   saxpy_k_PILEDRIVER,  sscal_k_PILEDRIVER, sswap_k_PILEDRIVER,
  sgemv_n_PILEDRIVER,  sgemv_t_PILEDRIVER, sger_k_PILEDRIVER,
  ssymv_L_PILEDRIVER, ssymv_U_PILEDRIVER,

  sgemm_kernel_PILEDRIVER, sgemm_beta_PILEDRIVER,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  sgemm_incopy_PILEDRIVER, sgemm_itcopy_PILEDRIVER,
#else
  sgemm_oncopy_PILEDRIVER, sgemm_otcopy_PILEDRIVER,
#endif
  sgemm_oncopy_PILEDRIVER, sgemm_otcopy_PILEDRIVER,
  strsm_kernel_LN_PILEDRIVER, strsm_kernel_LT_PILEDRIVER, strsm_kernel_RN_PILEDRIVER, strsm_kernel_RT_PILEDRIVER,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  strsm_iunucopy_PILEDRIVER, strsm_iunncopy_PILEDRIVER, strsm_iutucopy_PILEDRIVER, strsm_iutncopy_PILEDRIVER,
  strsm_ilnucopy_PILEDRIVER, strsm_ilnncopy_PILEDRIVER, strsm_iltucopy_PILEDRIVER, strsm_iltncopy_PILEDRIVER,
#else
  strsm_ounucopy_PILEDRIVER, strsm_ounncopy_PILEDRIVER, strsm_outucopy_PILEDRIVER, strsm_outncopy_PILEDRIVER,
  strsm_olnucopy_PILEDRIVER, strsm_olnncopy_PILEDRIVER, strsm_oltucopy_PILEDRIVER, strsm_oltncopy_PILEDRIVER,
#endif
  strsm_ounucopy_PILEDRIVER, strsm_ounncopy_PILEDRIVER, strsm_outucopy_PILEDRIVER, strsm_outncopy_PILEDRIVER,
  strsm_olnucopy_PILEDRIVER, strsm_olnncopy_PILEDRIVER, strsm_oltucopy_PILEDRIVER, strsm_oltncopy_PILEDRIVER,
  strmm_kernel_RN_PILEDRIVER, strmm_kernel_RT_PILEDRIVER, strmm_kernel_LN_PILEDRIVER, strmm_kernel_LT_PILEDRIVER,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  strmm_iunucopy_PILEDRIVER, strmm_iunncopy_PILEDRIVER, strmm_iutucopy_PILEDRIVER, strmm_iutncopy_PILEDRIVER,
  strmm_ilnucopy_PILEDRIVER, strmm_ilnncopy_PILEDRIVER, strmm_iltucopy_PILEDRIVER, strmm_iltncopy_PILEDRIVER,
#else
  strmm_ounucopy_PILEDRIVER, strmm_ounncopy_PILEDRIVER, strmm_outucopy_PILEDRIVER, strmm_outncopy_PILEDRIVER,
  strmm_olnucopy_PILEDRIVER, strmm_olnncopy_PILEDRIVER, strmm_oltucopy_PILEDRIVER, strmm_oltncopy_PILEDRIVER,
#endif
  strmm_ounucopy_PILEDRIVER, strmm_ounncopy_PILEDRIVER, strmm_outucopy_PILEDRIVER, strmm_outncopy_PILEDRIVER,
  strmm_olnucopy_PILEDRIVER, strmm_olnncopy_PILEDRIVER, strmm_oltucopy_PILEDRIVER, strmm_oltncopy_PILEDRIVER,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  ssymm_iutcopy_PILEDRIVER, ssymm_iltcopy_PILEDRIVER,
#else
  ssymm_outcopy_PILEDRIVER, ssymm_oltcopy_PILEDRIVER,
#endif
  ssymm_outcopy_PILEDRIVER, ssymm_oltcopy_PILEDRIVER,

#ifndef NO_LAPACK
  sneg_tcopy_PILEDRIVER, slaswp_ncopy_PILEDRIVER,
#else
  NULL,NULL,
#endif

  0, 0, 0,
  DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
#ifdef DGEMM_DEFAULT_UNROLL_MN
 DGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
#endif

  damax_k_PILEDRIVER,  damin_k_PILEDRIVER,  dmax_k_PILEDRIVER,  dmin_k_PILEDRIVER,
  idamax_k_PILEDRIVER, idamin_k_PILEDRIVER, idmax_k_PILEDRIVER, idmin_k_PILEDRIVER,
  dnrm2_k_PILEDRIVER,  dasum_k_PILEDRIVER,  dcopy_k_PILEDRIVER, ddot_k_PILEDRIVER,
  drot_k_PILEDRIVER,   daxpy_k_PILEDRIVER,  dscal_k_PILEDRIVER, dswap_k_PILEDRIVER,
  dgemv_n_PILEDRIVER,  dgemv_t_PILEDRIVER,  dger_k_PILEDRIVER,
  dsymv_L_PILEDRIVER,  dsymv_U_PILEDRIVER,

  dgemm_kernel_PILEDRIVER, dgemm_beta_PILEDRIVER,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dgemm_incopy_PILEDRIVER, dgemm_itcopy_PILEDRIVER,
#else
  dgemm_oncopy_PILEDRIVER, dgemm_otcopy_PILEDRIVER,
#endif
  dgemm_oncopy_PILEDRIVER, dgemm_otcopy_PILEDRIVER,
  dtrsm_kernel_LN_PILEDRIVER, dtrsm_kernel_LT_PILEDRIVER, dtrsm_kernel_RN_PILEDRIVER, dtrsm_kernel_RT_PILEDRIVER,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dtrsm_iunucopy_PILEDRIVER, dtrsm_iunncopy_PILEDRIVER, dtrsm_iutucopy_PILEDRIVER, dtrsm_iutncopy_PILEDRIVER,
  dtrsm_ilnucopy_PILEDRIVER, dtrsm_ilnncopy_PILEDRIVER, dtrsm_iltucopy_PILEDRIVER, dtrsm_iltncopy_PILEDRIVER,
#else
  dtrsm_ounucopy_PILEDRIVER, dtrsm_ounncopy_PILEDRIVER, dtrsm_outucopy_PILEDRIVER, dtrsm_outncopy_PILEDRIVER,
  dtrsm_olnucopy_PILEDRIVER, dtrsm_olnncopy_PILEDRIVER, dtrsm_oltucopy_PILEDRIVER, dtrsm_oltncopy_PILEDRIVER,
#endif
  dtrsm_ounucopy_PILEDRIVER, dtrsm_ounncopy_PILEDRIVER, dtrsm_outucopy_PILEDRIVER, dtrsm_outncopy_PILEDRIVER,
  dtrsm_olnucopy_PILEDRIVER, dtrsm_olnncopy_PILEDRIVER, dtrsm_oltucopy_PILEDRIVER, dtrsm_oltncopy_PILEDRIVER,
  dtrmm_kernel_RN_PILEDRIVER, dtrmm_kernel_RT_PILEDRIVER, dtrmm_kernel_LN_PILEDRIVER, dtrmm_kernel_LT_PILEDRIVER,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dtrmm_iunucopy_PILEDRIVER, dtrmm_iunncopy_PILEDRIVER, dtrmm_iutucopy_PILEDRIVER, dtrmm_iutncopy_PILEDRIVER,
  dtrmm_ilnucopy_PILEDRIVER, dtrmm_ilnncopy_PILEDRIVER, dtrmm_iltucopy_PILEDRIVER, dtrmm_iltncopy_PILEDRIVER,
#else
  dtrmm_ounucopy_PILEDRIVER, dtrmm_ounncopy_PILEDRIVER, dtrmm_outucopy_PILEDRIVER, dtrmm_outncopy_PILEDRIVER,
  dtrmm_olnucopy_PILEDRIVER, dtrmm_olnncopy_PILEDRIVER, dtrmm_oltucopy_PILEDRIVER, dtrmm_oltncopy_PILEDRIVER,
#endif
  dtrmm_ounucopy_PILEDRIVER, dtrmm_ounncopy_PILEDRIVER, dtrmm_outucopy_PILEDRIVER, dtrmm_outncopy_PILEDRIVER,
  dtrmm_olnucopy_PILEDRIVER, dtrmm_olnncopy_PILEDRIVER, dtrmm_oltucopy_PILEDRIVER, dtrmm_oltncopy_PILEDRIVER,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dsymm_iutcopy_PILEDRIVER, dsymm_iltcopy_PILEDRIVER,
#else
  dsymm_outcopy_PILEDRIVER, dsymm_oltcopy_PILEDRIVER,
#endif
  dsymm_outcopy_PILEDRIVER, dsymm_oltcopy_PILEDRIVER,

#ifndef NO_LAPACK
  dneg_tcopy_PILEDRIVER, dlaswp_ncopy_PILEDRIVER,
#else
  NULL, NULL,
#endif

#ifdef EXPRECISION

  0, 0, 0,
  QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),

  qamax_k_PILEDRIVER,  qamin_k_PILEDRIVER,  qmax_k_PILEDRIVER,  qmin_k_PILEDRIVER,
  iqamax_k_PILEDRIVER, iqamin_k_PILEDRIVER, iqmax_k_PILEDRIVER, iqmin_k_PILEDRIVER,
  qnrm2_k_PILEDRIVER,  qasum_k_PILEDRIVER,  qcopy_k_PILEDRIVER, qdot_k_PILEDRIVER,
  qrot_k_PILEDRIVER,   qaxpy_k_PILEDRIVER,  qscal_k_PILEDRIVER, qswap_k_PILEDRIVER,
  qgemv_n_PILEDRIVER,  qgemv_t_PILEDRIVER,  qger_k_PILEDRIVER,
  qsymv_L_PILEDRIVER,  qsymv_U_PILEDRIVER,

  qgemm_kernel_PILEDRIVER, qgemm_beta_PILEDRIVER,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qgemm_incopy_PILEDRIVER, qgemm_itcopy_PILEDRIVER,
#else
  qgemm_oncopy_PILEDRIVER, qgemm_otcopy_PILEDRIVER,
#endif
  qgemm_oncopy_PILEDRIVER, qgemm_otcopy_PILEDRIVER,
  qtrsm_kernel_LN_PILEDRIVER, qtrsm_kernel_LT_PILEDRIVER, qtrsm_kernel_RN_PILEDRIVER, qtrsm_kernel_RT_PILEDRIVER,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qtrsm_iunucopy_PILEDRIVER, qtrsm_iunncopy_PILEDRIVER, qtrsm_iutucopy_PILEDRIVER, qtrsm_iutncopy_PILEDRIVER,
  qtrsm_ilnucopy_PILEDRIVER, qtrsm_ilnncopy_PILEDRIVER, qtrsm_iltucopy_PILEDRIVER, qtrsm_iltncopy_PILEDRIVER,
#else
  qtrsm_ounucopy_PILEDRIVER, qtrsm_ounncopy_PILEDRIVER, qtrsm_outucopy_PILEDRIVER, qtrsm_outncopy_PILEDRIVER,
  qtrsm_olnucopy_PILEDRIVER, qtrsm_olnncopy_PILEDRIVER, qtrsm_oltucopy_PILEDRIVER, qtrsm_oltncopy_PILEDRIVER,
#endif
  qtrsm_ounucopy_PILEDRIVER, qtrsm_ounncopy_PILEDRIVER, qtrsm_outucopy_PILEDRIVER, qtrsm_outncopy_PILEDRIVER,
  qtrsm_olnucopy_PILEDRIVER, qtrsm_olnncopy_PILEDRIVER, qtrsm_oltucopy_PILEDRIVER, qtrsm_oltncopy_PILEDRIVER,
  qtrmm_kernel_RN_PILEDRIVER, qtrmm_kernel_RT_PILEDRIVER, qtrmm_kernel_LN_PILEDRIVER, qtrmm_kernel_LT_PILEDRIVER,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qtrmm_iunucopy_PILEDRIVER, qtrmm_iunncopy_PILEDRIVER, qtrmm_iutucopy_PILEDRIVER, qtrmm_iutncopy_PILEDRIVER,
  qtrmm_ilnucopy_PILEDRIVER, qtrmm_ilnncopy_PILEDRIVER, qtrmm_iltucopy_PILEDRIVER, qtrmm_iltncopy_PILEDRIVER,
#else
  qtrmm_ounucopy_PILEDRIVER, qtrmm_ounncopy_PILEDRIVER, qtrmm_outucopy_PILEDRIVER, qtrmm_outncopy_PILEDRIVER,
  qtrmm_olnucopy_PILEDRIVER, qtrmm_olnncopy_PILEDRIVER, qtrmm_oltucopy_PILEDRIVER, qtrmm_oltncopy_PILEDRIVER,
#endif
  qtrmm_ounucopy_PILEDRIVER, qtrmm_ounncopy_PILEDRIVER, qtrmm_outucopy_PILEDRIVER, qtrmm_outncopy_PILEDRIVER,
  qtrmm_olnucopy_PILEDRIVER, qtrmm_olnncopy_PILEDRIVER, qtrmm_oltucopy_PILEDRIVER, qtrmm_oltncopy_PILEDRIVER,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qsymm_iutcopy_PILEDRIVER, qsymm_iltcopy_PILEDRIVER,
#else
  qsymm_outcopy_PILEDRIVER, qsymm_oltcopy_PILEDRIVER,
#endif
  qsymm_outcopy_PILEDRIVER, qsymm_oltcopy_PILEDRIVER,

#ifndef NO_LAPACK
  qneg_tcopy_PILEDRIVER, qlaswp_ncopy_PILEDRIVER,
#else
  NULL, NULL,
#endif

#endif

  0, 0, 0,
  CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
#ifdef CGEMM_DEFAULT_UNROLL_MN
 CGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
#endif

  camax_k_PILEDRIVER, camin_k_PILEDRIVER, icamax_k_PILEDRIVER, icamin_k_PILEDRIVER,
  cnrm2_k_PILEDRIVER, casum_k_PILEDRIVER, ccopy_k_PILEDRIVER,
  cdotu_k_PILEDRIVER, cdotc_k_PILEDRIVER, csrot_k_PILEDRIVER,
  caxpy_k_PILEDRIVER, caxpyc_k_PILEDRIVER, cscal_k_PILEDRIVER, cswap_k_PILEDRIVER,

  cgemv_n_PILEDRIVER, cgemv_t_PILEDRIVER, cgemv_r_PILEDRIVER, cgemv_c_PILEDRIVER,
  cgemv_o_PILEDRIVER, cgemv_u_PILEDRIVER, cgemv_s_PILEDRIVER, cgemv_d_PILEDRIVER,
  cgeru_k_PILEDRIVER, cgerc_k_PILEDRIVER, cgerv_k_PILEDRIVER, cgerd_k_PILEDRIVER,
  csymv_L_PILEDRIVER, csymv_U_PILEDRIVER,
  chemv_L_PILEDRIVER, chemv_U_PILEDRIVER, chemv_M_PILEDRIVER, chemv_V_PILEDRIVER,

  cgemm_kernel_n_PILEDRIVER, cgemm_kernel_l_PILEDRIVER, cgemm_kernel_r_PILEDRIVER, cgemm_kernel_b_PILEDRIVER,
  cgemm_beta_PILEDRIVER,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  cgemm_incopy_PILEDRIVER, cgemm_itcopy_PILEDRIVER,
#else
  cgemm_oncopy_PILEDRIVER, cgemm_otcopy_PILEDRIVER,
#endif
  cgemm_oncopy_PILEDRIVER, cgemm_otcopy_PILEDRIVER,

  ctrsm_kernel_LN_PILEDRIVER, ctrsm_kernel_LT_PILEDRIVER, ctrsm_kernel_LR_PILEDRIVER, ctrsm_kernel_LC_PILEDRIVER,
  ctrsm_kernel_RN_PILEDRIVER, ctrsm_kernel_RT_PILEDRIVER, ctrsm_kernel_RR_PILEDRIVER, ctrsm_kernel_RC_PILEDRIVER,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  ctrsm_iunucopy_PILEDRIVER,  ctrsm_iunncopy_PILEDRIVER,  ctrsm_iutucopy_PILEDRIVER,  ctrsm_iutncopy_PILEDRIVER,
  ctrsm_ilnucopy_PILEDRIVER,  ctrsm_ilnncopy_PILEDRIVER,  ctrsm_iltucopy_PILEDRIVER,  ctrsm_iltncopy_PILEDRIVER,
#else
  ctrsm_ounucopy_PILEDRIVER,  ctrsm_ounncopy_PILEDRIVER,  ctrsm_outucopy_PILEDRIVER,  ctrsm_outncopy_PILEDRIVER,
  ctrsm_olnucopy_PILEDRIVER,  ctrsm_olnncopy_PILEDRIVER,  ctrsm_oltucopy_PILEDRIVER,  ctrsm_oltncopy_PILEDRIVER,
#endif
  ctrsm_ounucopy_PILEDRIVER,  ctrsm_ounncopy_PILEDRIVER,  ctrsm_outucopy_PILEDRIVER,  ctrsm_outncopy_PILEDRIVER,
  ctrsm_olnucopy_PILEDRIVER,  ctrsm_olnncopy_PILEDRIVER,  ctrsm_oltucopy_PILEDRIVER,  ctrsm_oltncopy_PILEDRIVER,

  ctrmm_kernel_RN_PILEDRIVER,  ctrmm_kernel_RT_PILEDRIVER,  ctrmm_kernel_RR_PILEDRIVER,  ctrmm_kernel_RC_PILEDRIVER,
  ctrmm_kernel_LN_PILEDRIVER,  ctrmm_kernel_LT_PILEDRIVER,  ctrmm_kernel_LR_PILEDRIVER,  ctrmm_kernel_LC_PILEDRIVER,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  ctrmm_iunucopy_PILEDRIVER,  ctrmm_iunncopy_PILEDRIVER,  ctrmm_iutucopy_PILEDRIVER,  ctrmm_iutncopy_PILEDRIVER,
  ctrmm_ilnucopy_PILEDRIVER,  ctrmm_ilnncopy_PILEDRIVER,  ctrmm_iltucopy_PILEDRIVER,  ctrmm_iltncopy_PILEDRIVER,
#else
  ctrmm_ounucopy_PILEDRIVER,  ctrmm_ounncopy_PILEDRIVER,  ctrmm_outucopy_PILEDRIVER,  ctrmm_outncopy_PILEDRIVER,
  ctrmm_olnucopy_PILEDRIVER,  ctrmm_olnncopy_PILEDRIVER,  ctrmm_oltucopy_PILEDRIVER,  ctrmm_oltncopy_PILEDRIVER,
#endif
  ctrmm_ounucopy_PILEDRIVER,  ctrmm_ounncopy_PILEDRIVER,  ctrmm_outucopy_PILEDRIVER,  ctrmm_outncopy_PILEDRIVER,
  ctrmm_olnucopy_PILEDRIVER,  ctrmm_olnncopy_PILEDRIVER,  ctrmm_oltucopy_PILEDRIVER,  ctrmm_oltncopy_PILEDRIVER,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  csymm_iutcopy_PILEDRIVER,  csymm_iltcopy_PILEDRIVER,
#else
  csymm_outcopy_PILEDRIVER,  csymm_oltcopy_PILEDRIVER,
#endif
  csymm_outcopy_PILEDRIVER,  csymm_oltcopy_PILEDRIVER,
#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  chemm_iutcopy_PILEDRIVER,  chemm_iltcopy_PILEDRIVER,
#else
  chemm_outcopy_PILEDRIVER,  chemm_oltcopy_PILEDRIVER,
#endif
  chemm_outcopy_PILEDRIVER,  chemm_oltcopy_PILEDRIVER,

  0, 0, 0,
#ifdef CGEMM3M_DEFAULT_UNROLL_M
  CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
#else
  SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
#endif


  cgemm3m_kernel_PILEDRIVER,

  cgemm3m_incopyb_PILEDRIVER,  cgemm3m_incopyr_PILEDRIVER,
  cgemm3m_incopyi_PILEDRIVER,  cgemm3m_itcopyb_PILEDRIVER,
  cgemm3m_itcopyr_PILEDRIVER,  cgemm3m_itcopyi_PILEDRIVER,
  cgemm3m_oncopyb_PILEDRIVER,  cgemm3m_oncopyr_PILEDRIVER,
  cgemm3m_oncopyi_PILEDRIVER,  cgemm3m_otcopyb_PILEDRIVER,
  cgemm3m_otcopyr_PILEDRIVER,  cgemm3m_otcopyi_PILEDRIVER,

  csymm3m_iucopyb_PILEDRIVER,  csymm3m_ilcopyb_PILEDRIVER,
  csymm3m_iucopyr_PILEDRIVER,  csymm3m_ilcopyr_PILEDRIVER,
  csymm3m_iucopyi_PILEDRIVER,  csymm3m_ilcopyi_PILEDRIVER,
  csymm3m_oucopyb_PILEDRIVER,  csymm3m_olcopyb_PILEDRIVER,
  csymm3m_oucopyr_PILEDRIVER,  csymm3m_olcopyr_PILEDRIVER,
  csymm3m_oucopyi_PILEDRIVER,  csymm3m_olcopyi_PILEDRIVER,

  chemm3m_iucopyb_PILEDRIVER,  chemm3m_ilcopyb_PILEDRIVER,
  chemm3m_iucopyr_PILEDRIVER,  chemm3m_ilcopyr_PILEDRIVER,
  chemm3m_iucopyi_PILEDRIVER,  chemm3m_ilcopyi_PILEDRIVER,

  chemm3m_oucopyb_PILEDRIVER,  chemm3m_olcopyb_PILEDRIVER,
  chemm3m_oucopyr_PILEDRIVER,  chemm3m_olcopyr_PILEDRIVER,
  chemm3m_oucopyi_PILEDRIVER,  chemm3m_olcopyi_PILEDRIVER,

#ifndef NO_LAPACK
  cneg_tcopy_PILEDRIVER, claswp_ncopy_PILEDRIVER,
#else
  NULL, NULL,
#endif

  0, 0, 0,
  ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
#ifdef ZGEMM_DEFAULT_UNROLL_MN
 ZGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
#endif

  zamax_k_PILEDRIVER, zamin_k_PILEDRIVER, izamax_k_PILEDRIVER, izamin_k_PILEDRIVER,
  znrm2_k_PILEDRIVER, zasum_k_PILEDRIVER, zcopy_k_PILEDRIVER,
  zdotu_k_PILEDRIVER, zdotc_k_PILEDRIVER, zdrot_k_PILEDRIVER,
  zaxpy_k_PILEDRIVER, zaxpyc_k_PILEDRIVER, zscal_k_PILEDRIVER, zswap_k_PILEDRIVER,

  zgemv_n_PILEDRIVER, zgemv_t_PILEDRIVER, zgemv_r_PILEDRIVER, zgemv_c_PILEDRIVER,
  zgemv_o_PILEDRIVER, zgemv_u_PILEDRIVER, zgemv_s_PILEDRIVER, zgemv_d_PILEDRIVER,
  zgeru_k_PILEDRIVER, zgerc_k_PILEDRIVER, zgerv_k_PILEDRIVER, zgerd_k_PILEDRIVER,
  zsymv_L_PILEDRIVER, zsymv_U_PILEDRIVER,
  zhemv_L_PILEDRIVER, zhemv_U_PILEDRIVER, zhemv_M_PILEDRIVER, zhemv_V_PILEDRIVER,

  zgemm_kernel_n_PILEDRIVER, zgemm_kernel_l_PILEDRIVER, zgemm_kernel_r_PILEDRIVER, zgemm_kernel_b_PILEDRIVER,
  zgemm_beta_PILEDRIVER,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  zgemm_incopy_PILEDRIVER, zgemm_itcopy_PILEDRIVER,
#else
  zgemm_oncopy_PILEDRIVER, zgemm_otcopy_PILEDRIVER,
#endif
  zgemm_oncopy_PILEDRIVER, zgemm_otcopy_PILEDRIVER,

  ztrsm_kernel_LN_PILEDRIVER, ztrsm_kernel_LT_PILEDRIVER, ztrsm_kernel_LR_PILEDRIVER, ztrsm_kernel_LC_PILEDRIVER,
  ztrsm_kernel_RN_PILEDRIVER, ztrsm_kernel_RT_PILEDRIVER, ztrsm_kernel_RR_PILEDRIVER, ztrsm_kernel_RC_PILEDRIVER,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  ztrsm_iunucopy_PILEDRIVER,  ztrsm_iunncopy_PILEDRIVER,  ztrsm_iutucopy_PILEDRIVER,  ztrsm_iutncopy_PILEDRIVER,
  ztrsm_ilnucopy_PILEDRIVER,  ztrsm_ilnncopy_PILEDRIVER,  ztrsm_iltucopy_PILEDRIVER,  ztrsm_iltncopy_PILEDRIVER,
#else
  ztrsm_ounucopy_PILEDRIVER,  ztrsm_ounncopy_PILEDRIVER,  ztrsm_outucopy_PILEDRIVER,  ztrsm_outncopy_PILEDRIVER,
  ztrsm_olnucopy_PILEDRIVER,  ztrsm_olnncopy_PILEDRIVER,  ztrsm_oltucopy_PILEDRIVER,  ztrsm_oltncopy_PILEDRIVER,
#endif
  ztrsm_ounucopy_PILEDRIVER,  ztrsm_ounncopy_PILEDRIVER,  ztrsm_outucopy_PILEDRIVER,  ztrsm_outncopy_PILEDRIVER,
  ztrsm_olnucopy_PILEDRIVER,  ztrsm_olnncopy_PILEDRIVER,  ztrsm_oltucopy_PILEDRIVER,  ztrsm_oltncopy_PILEDRIVER,

  ztrmm_kernel_RN_PILEDRIVER,  ztrmm_kernel_RT_PILEDRIVER,  ztrmm_kernel_RR_PILEDRIVER,  ztrmm_kernel_RC_PILEDRIVER,
  ztrmm_kernel_LN_PILEDRIVER,  ztrmm_kernel_LT_PILEDRIVER,  ztrmm_kernel_LR_PILEDRIVER,  ztrmm_kernel_LC_PILEDRIVER,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  ztrmm_iunucopy_PILEDRIVER,  ztrmm_iunncopy_PILEDRIVER,  ztrmm_iutucopy_PILEDRIVER,  ztrmm_iutncopy_PILEDRIVER,
  ztrmm_ilnucopy_PILEDRIVER,  ztrmm_ilnncopy_PILEDRIVER,  ztrmm_iltucopy_PILEDRIVER,  ztrmm_iltncopy_PILEDRIVER,
#else
  ztrmm_ounucopy_PILEDRIVER,  ztrmm_ounncopy_PILEDRIVER,  ztrmm_outucopy_PILEDRIVER,  ztrmm_outncopy_PILEDRIVER,
  ztrmm_olnucopy_PILEDRIVER,  ztrmm_olnncopy_PILEDRIVER,  ztrmm_oltucopy_PILEDRIVER,  ztrmm_oltncopy_PILEDRIVER,
#endif
  ztrmm_ounucopy_PILEDRIVER,  ztrmm_ounncopy_PILEDRIVER,  ztrmm_outucopy_PILEDRIVER,  ztrmm_outncopy_PILEDRIVER,
  ztrmm_olnucopy_PILEDRIVER,  ztrmm_olnncopy_PILEDRIVER,  ztrmm_oltucopy_PILEDRIVER,  ztrmm_oltncopy_PILEDRIVER,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  zsymm_iutcopy_PILEDRIVER,  zsymm_iltcopy_PILEDRIVER,
#else
  zsymm_outcopy_PILEDRIVER,  zsymm_oltcopy_PILEDRIVER,
#endif
  zsymm_outcopy_PILEDRIVER,  zsymm_oltcopy_PILEDRIVER,
#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  zhemm_iutcopy_PILEDRIVER,  zhemm_iltcopy_PILEDRIVER,
#else
  zhemm_outcopy_PILEDRIVER,  zhemm_oltcopy_PILEDRIVER,
#endif
  zhemm_outcopy_PILEDRIVER,  zhemm_oltcopy_PILEDRIVER,

  0, 0, 0,
#ifdef ZGEMM3M_DEFAULT_UNROLL_M
  ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
#else
  DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
#endif


  zgemm3m_kernel_PILEDRIVER,

  zgemm3m_incopyb_PILEDRIVER,  zgemm3m_incopyr_PILEDRIVER,
  zgemm3m_incopyi_PILEDRIVER,  zgemm3m_itcopyb_PILEDRIVER,
  zgemm3m_itcopyr_PILEDRIVER,  zgemm3m_itcopyi_PILEDRIVER,
  zgemm3m_oncopyb_PILEDRIVER,  zgemm3m_oncopyr_PILEDRIVER,
  zgemm3m_oncopyi_PILEDRIVER,  zgemm3m_otcopyb_PILEDRIVER,
  zgemm3m_otcopyr_PILEDRIVER,  zgemm3m_otcopyi_PILEDRIVER,

  zsymm3m_iucopyb_PILEDRIVER,  zsymm3m_ilcopyb_PILEDRIVER,
  zsymm3m_iucopyr_PILEDRIVER,  zsymm3m_ilcopyr_PILEDRIVER,
  zsymm3m_iucopyi_PILEDRIVER,  zsymm3m_ilcopyi_PILEDRIVER,
  zsymm3m_oucopyb_PILEDRIVER,  zsymm3m_olcopyb_PILEDRIVER,
  zsymm3m_oucopyr_PILEDRIVER,  zsymm3m_olcopyr_PILEDRIVER,
  zsymm3m_oucopyi_PILEDRIVER,  zsymm3m_olcopyi_PILEDRIVER,

  zhemm3m_iucopyb_PILEDRIVER,  zhemm3m_ilcopyb_PILEDRIVER,
  zhemm3m_iucopyr_PILEDRIVER,  zhemm3m_ilcopyr_PILEDRIVER,
  zhemm3m_iucopyi_PILEDRIVER,  zhemm3m_ilcopyi_PILEDRIVER,

  zhemm3m_oucopyb_PILEDRIVER,  zhemm3m_olcopyb_PILEDRIVER,
  zhemm3m_oucopyr_PILEDRIVER,  zhemm3m_olcopyr_PILEDRIVER,
  zhemm3m_oucopyi_PILEDRIVER,  zhemm3m_olcopyi_PILEDRIVER,

#ifndef NO_LAPACK
  zneg_tcopy_PILEDRIVER, zlaswp_ncopy_PILEDRIVER,
#else
  NULL, NULL,
#endif

#ifdef EXPRECISION

  0, 0, 0,
  XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),

  xamax_k_PILEDRIVER, xamin_k_PILEDRIVER, ixamax_k_PILEDRIVER, ixamin_k_PILEDRIVER,
  xnrm2_k_PILEDRIVER, xasum_k_PILEDRIVER, xcopy_k_PILEDRIVER,
  xdotu_k_PILEDRIVER, xdotc_k_PILEDRIVER, xqrot_k_PILEDRIVER,
  xaxpy_k_PILEDRIVER, xaxpyc_k_PILEDRIVER, xscal_k_PILEDRIVER, xswap_k_PILEDRIVER,

  xgemv_n_PILEDRIVER, xgemv_t_PILEDRIVER, xgemv_r_PILEDRIVER, xgemv_c_PILEDRIVER,
  xgemv_o_PILEDRIVER, xgemv_u_PILEDRIVER, xgemv_s_PILEDRIVER, xgemv_d_PILEDRIVER,
  xgeru_k_PILEDRIVER, xgerc_k_PILEDRIVER, xgerv_k_PILEDRIVER, xgerd_k_PILEDRIVER,
  xsymv_L_PILEDRIVER, xsymv_U_PILEDRIVER,
  xhemv_L_PILEDRIVER, xhemv_U_PILEDRIVER, xhemv_M_PILEDRIVER, xhemv_V_PILEDRIVER,

  xgemm_kernel_n_PILEDRIVER, xgemm_kernel_l_PILEDRIVER, xgemm_kernel_r_PILEDRIVER, xgemm_kernel_b_PILEDRIVER,
  xgemm_beta_PILEDRIVER,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xgemm_incopy_PILEDRIVER, xgemm_itcopy_PILEDRIVER,
#else
  xgemm_oncopy_PILEDRIVER, xgemm_otcopy_PILEDRIVER,
#endif
  xgemm_oncopy_PILEDRIVER, xgemm_otcopy_PILEDRIVER,

  xtrsm_kernel_LN_PILEDRIVER, xtrsm_kernel_LT_PILEDRIVER, xtrsm_kernel_LR_PILEDRIVER, xtrsm_kernel_LC_PILEDRIVER,
  xtrsm_kernel_RN_PILEDRIVER, xtrsm_kernel_RT_PILEDRIVER, xtrsm_kernel_RR_PILEDRIVER, xtrsm_kernel_RC_PILEDRIVER,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xtrsm_iunucopy_PILEDRIVER,  xtrsm_iunncopy_PILEDRIVER,  xtrsm_iutucopy_PILEDRIVER,  xtrsm_iutncopy_PILEDRIVER,
  xtrsm_ilnucopy_PILEDRIVER,  xtrsm_ilnncopy_PILEDRIVER,  xtrsm_iltucopy_PILEDRIVER,  xtrsm_iltncopy_PILEDRIVER,
#else
  xtrsm_ounucopy_PILEDRIVER,  xtrsm_ounncopy_PILEDRIVER,  xtrsm_outucopy_PILEDRIVER,  xtrsm_outncopy_PILEDRIVER,
  xtrsm_olnucopy_PILEDRIVER,  xtrsm_olnncopy_PILEDRIVER,  xtrsm_oltucopy_PILEDRIVER,  xtrsm_oltncopy_PILEDRIVER,
#endif
  xtrsm_ounucopy_PILEDRIVER,  xtrsm_ounncopy_PILEDRIVER,  xtrsm_outucopy_PILEDRIVER,  xtrsm_outncopy_PILEDRIVER,
  xtrsm_olnucopy_PILEDRIVER,  xtrsm_olnncopy_PILEDRIVER,  xtrsm_oltucopy_PILEDRIVER,  xtrsm_oltncopy_PILEDRIVER,

  xtrmm_kernel_RN_PILEDRIVER,  xtrmm_kernel_RT_PILEDRIVER,  xtrmm_kernel_RR_PILEDRIVER,  xtrmm_kernel_RC_PILEDRIVER,
  xtrmm_kernel_LN_PILEDRIVER,  xtrmm_kernel_LT_PILEDRIVER,  xtrmm_kernel_LR_PILEDRIVER,  xtrmm_kernel_LC_PILEDRIVER,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xtrmm_iunucopy_PILEDRIVER,  xtrmm_iunncopy_PILEDRIVER,  xtrmm_iutucopy_PILEDRIVER,  xtrmm_iutncopy_PILEDRIVER,
  xtrmm_ilnucopy_PILEDRIVER,  xtrmm_ilnncopy_PILEDRIVER,  xtrmm_iltucopy_PILEDRIVER,  xtrmm_iltncopy_PILEDRIVER,
#else
  xtrmm_ounucopy_PILEDRIVER,  xtrmm_ounncopy_PILEDRIVER,  xtrmm_outucopy_PILEDRIVER,  xtrmm_outncopy_PILEDRIVER,
  xtrmm_olnucopy_PILEDRIVER,  xtrmm_olnncopy_PILEDRIVER,  xtrmm_oltucopy_PILEDRIVER,  xtrmm_oltncopy_PILEDRIVER,
#endif
  xtrmm_ounucopy_PILEDRIVER,  xtrmm_ounncopy_PILEDRIVER,  xtrmm_outucopy_PILEDRIVER,  xtrmm_outncopy_PILEDRIVER,
  xtrmm_olnucopy_PILEDRIVER,  xtrmm_olnncopy_PILEDRIVER,  xtrmm_oltucopy_PILEDRIVER,  xtrmm_oltncopy_PILEDRIVER,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xsymm_iutcopy_PILEDRIVER,  xsymm_iltcopy_PILEDRIVER,
#else
  xsymm_outcopy_PILEDRIVER,  xsymm_oltcopy_PILEDRIVER,
#endif
  xsymm_outcopy_PILEDRIVER,  xsymm_oltcopy_PILEDRIVER,
#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xhemm_iutcopy_PILEDRIVER,  xhemm_iltcopy_PILEDRIVER,
#else
  xhemm_outcopy_PILEDRIVER,  xhemm_oltcopy_PILEDRIVER,
#endif
  xhemm_outcopy_PILEDRIVER,  xhemm_oltcopy_PILEDRIVER,

  0, 0, 0,
  QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),

  xgemm3m_kernel_PILEDRIVER,

  xgemm3m_incopyb_PILEDRIVER,  xgemm3m_incopyr_PILEDRIVER,
  xgemm3m_incopyi_PILEDRIVER,  xgemm3m_itcopyb_PILEDRIVER,
  xgemm3m_itcopyr_PILEDRIVER,  xgemm3m_itcopyi_PILEDRIVER,
  xgemm3m_oncopyb_PILEDRIVER,  xgemm3m_oncopyr_PILEDRIVER,
  xgemm3m_oncopyi_PILEDRIVER,  xgemm3m_otcopyb_PILEDRIVER,
  xgemm3m_otcopyr_PILEDRIVER,  xgemm3m_otcopyi_PILEDRIVER,

  xsymm3m_iucopyb_PILEDRIVER,  xsymm3m_ilcopyb_PILEDRIVER,
  xsymm3m_iucopyr_PILEDRIVER,  xsymm3m_ilcopyr_PILEDRIVER,
  xsymm3m_iucopyi_PILEDRIVER,  xsymm3m_ilcopyi_PILEDRIVER,
  xsymm3m_oucopyb_PILEDRIVER,  xsymm3m_olcopyb_PILEDRIVER,
  xsymm3m_oucopyr_PILEDRIVER,  xsymm3m_olcopyr_PILEDRIVER,
  xsymm3m_oucopyi_PILEDRIVER,  xsymm3m_olcopyi_PILEDRIVER,

  xhemm3m_iucopyb_PILEDRIVER,  xhemm3m_ilcopyb_PILEDRIVER,
  xhemm3m_iucopyr_PILEDRIVER,  xhemm3m_ilcopyr_PILEDRIVER,
  xhemm3m_iucopyi_PILEDRIVER,  xhemm3m_ilcopyi_PILEDRIVER,

  xhemm3m_oucopyb_PILEDRIVER,  xhemm3m_olcopyb_PILEDRIVER,
  xhemm3m_oucopyr_PILEDRIVER,  xhemm3m_olcopyr_PILEDRIVER,
  xhemm3m_oucopyi_PILEDRIVER,  xhemm3m_olcopyi_PILEDRIVER,

#ifndef NO_LAPACK
  xneg_tcopy_PILEDRIVER, xlaswp_ncopy_PILEDRIVER,
#else
  NULL, NULL,
#endif

#endif

  init_parameter,

  SNUMOPT, DNUMOPT, QNUMOPT,

  saxpby_k_PILEDRIVER, daxpby_k_PILEDRIVER, caxpby_k_PILEDRIVER, zaxpby_k_PILEDRIVER,

  somatcopy_k_cn_PILEDRIVER, somatcopy_k_ct_PILEDRIVER, somatcopy_k_rn_PILEDRIVER, somatcopy_k_rt_PILEDRIVER,
  domatcopy_k_cn_PILEDRIVER, domatcopy_k_ct_PILEDRIVER, domatcopy_k_rn_PILEDRIVER, domatcopy_k_rt_PILEDRIVER,
  comatcopy_k_cn_PILEDRIVER, comatcopy_k_ct_PILEDRIVER, comatcopy_k_rn_PILEDRIVER, comatcopy_k_rt_PILEDRIVER,
  comatcopy_k_cnc_PILEDRIVER, comatcopy_k_ctc_PILEDRIVER, comatcopy_k_rnc_PILEDRIVER, comatcopy_k_rtc_PILEDRIVER,
  zomatcopy_k_cn_PILEDRIVER, zomatcopy_k_ct_PILEDRIVER, zomatcopy_k_rn_PILEDRIVER, zomatcopy_k_rt_PILEDRIVER,
  zomatcopy_k_cnc_PILEDRIVER, zomatcopy_k_ctc_PILEDRIVER, zomatcopy_k_rnc_PILEDRIVER, zomatcopy_k_rtc_PILEDRIVER,

  simatcopy_k_cn_PILEDRIVER, simatcopy_k_ct_PILEDRIVER, simatcopy_k_rn_PILEDRIVER, simatcopy_k_rt_PILEDRIVER,
  dimatcopy_k_cn_PILEDRIVER, dimatcopy_k_ct_PILEDRIVER, dimatcopy_k_rn_PILEDRIVER, dimatcopy_k_rt_PILEDRIVER,
  cimatcopy_k_cn_PILEDRIVER, cimatcopy_k_ct_PILEDRIVER, cimatcopy_k_rn_PILEDRIVER, cimatcopy_k_rt_PILEDRIVER,
  cimatcopy_k_cnc_PILEDRIVER, cimatcopy_k_ctc_PILEDRIVER, cimatcopy_k_rnc_PILEDRIVER, cimatcopy_k_rtc_PILEDRIVER,
  zimatcopy_k_cn_PILEDRIVER, zimatcopy_k_ct_PILEDRIVER, zimatcopy_k_rn_PILEDRIVER, zimatcopy_k_rt_PILEDRIVER,
  zimatcopy_k_cnc_PILEDRIVER, zimatcopy_k_ctc_PILEDRIVER, zimatcopy_k_rnc_PILEDRIVER, zimatcopy_k_rtc_PILEDRIVER,

  sgeadd_k_PILEDRIVER, dgeadd_k_PILEDRIVER, cgeadd_k_PILEDRIVER, zgeadd_k_PILEDRIVER

};

#ifdef ARCH_X86
static int get_l2_size_old(void){
  int i, eax, ebx, ecx, edx, cpuid_level;
  int info[15];

  cpuid(2, &eax, &ebx, &ecx, &edx);

  info[ 0] = BITMASK(eax,  8, 0xff);
  info[ 1] = BITMASK(eax, 16, 0xff);
  info[ 2] = BITMASK(eax, 24, 0xff);

  info[ 3] = BITMASK(ebx,  0, 0xff);
  info[ 4] = BITMASK(ebx,  8, 0xff);
  info[ 5] = BITMASK(ebx, 16, 0xff);
  info[ 6] = BITMASK(ebx, 24, 0xff);

  info[ 7] = BITMASK(ecx,  0, 0xff);
  info[ 8] = BITMASK(ecx,  8, 0xff);
  info[ 9] = BITMASK(ecx, 16, 0xff);
  info[10] = BITMASK(ecx, 24, 0xff);

  info[11] = BITMASK(edx,  0, 0xff);
  info[12] = BITMASK(edx,  8, 0xff);
  info[13] = BITMASK(edx, 16, 0xff);
  info[14] = BITMASK(edx, 24, 0xff);

  for (i = 0; i < 15; i++){

    switch (info[i]){

      /* This table is from http://www.sandpile.org/ia32/cpuid.htm */

    case 0x1a :
      return 96;

    case 0x39 :
    case 0x3b :
    case 0x41 :
    case 0x79 :
    case 0x81 :
      return 128;

    case 0x3a :
      return 192;

    case 0x21 :
    case 0x3c :
    case 0x42 :
    case 0x7a :
    case 0x7e :
    case 0x82 :
      return 256;

    case 0x3d :
      return 384;

    case 0x3e :
    case 0x43 :
    case 0x7b :
    case 0x7f :
    case 0x83 :
    case 0x86 :
      return 512;

    case 0x44 :
    case 0x78 :
    case 0x7c :
    case 0x84 :
    case 0x87 :
      return 1024;

    case 0x45 :
    case 0x7d :
    case 0x85 :
      return 2048;

    case 0x48 :
      return 3184;

    case 0x49 :
      return 4096;

    case 0x4e :
      return 6144;
    }
  }
  return 0;
}
#endif

static __inline__ int get_l2_size(void){

  int eax, ebx, ecx, edx, l2;

  cpuid(0x80000006, &eax, &ebx, &ecx, &edx);

  l2 = BITMASK(ecx, 16, 0xffff);

#ifndef ARCH_X86
  return l2;

#else

  if (l2 > 0) return l2;

  return get_l2_size_old();
#endif
}

static __inline__ int get_l3_size(void){

  int eax, ebx, ecx, edx;

  cpuid(0x80000006, &eax, &ebx, &ecx, &edx);

  return BITMASK(edx, 18, 0x3fff) * 512;
}


static void init_parameter(void) {

  int l2 = get_l2_size();

  TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;

#ifdef CGEMM3M_DEFAULT_Q
  TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
#else
  TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
#endif

#ifdef ZGEMM3M_DEFAULT_Q
  TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
#else
  TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
#endif

#ifdef EXPRECISION
  TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
#endif

#if defined(CORE_KATMAI)  || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)

#ifdef DEBUG
  fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
#endif

  TABLE_NAME.sgemm_p =  64 * (l2 >> 7);
  TABLE_NAME.dgemm_p =  32 * (l2 >> 7);
  TABLE_NAME.cgemm_p =  32 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  16 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  16 * (l2 >> 7);
  TABLE_NAME.xgemm_p =   8 * (l2 >> 7);
#endif
#endif

#ifdef CORE_NORTHWOOD

#ifdef DEBUG
  fprintf(stderr, "Northwood\n");
#endif

  TABLE_NAME.sgemm_p =  96 * (l2 >> 7);
  TABLE_NAME.dgemm_p =  48 * (l2 >> 7);
  TABLE_NAME.cgemm_p =  48 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  24 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  24 * (l2 >> 7);
  TABLE_NAME.xgemm_p =  12 * (l2 >> 7);
#endif
#endif

#ifdef ATOM

#ifdef DEBUG
  fprintf(stderr, "Atom\n");
#endif

  TABLE_NAME.sgemm_p = 256;
  TABLE_NAME.dgemm_p = 128;
  TABLE_NAME.cgemm_p = 128;
  TABLE_NAME.zgemm_p =  64;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  64;
  TABLE_NAME.xgemm_p =  32;
#endif
#endif

#ifdef CORE_PRESCOTT

#ifdef DEBUG
  fprintf(stderr, "Prescott\n");
#endif

  TABLE_NAME.sgemm_p =  56 * (l2 >> 7);
  TABLE_NAME.dgemm_p =  28 * (l2 >> 7);
  TABLE_NAME.cgemm_p =  28 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  14 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  14 * (l2 >> 7);
  TABLE_NAME.xgemm_p =   7 * (l2 >> 7);
#endif
#endif

#ifdef CORE2

#ifdef DEBUG
  fprintf(stderr, "Core2\n");
#endif

  TABLE_NAME.sgemm_p =  92 * (l2 >> 9) + 8;
  TABLE_NAME.dgemm_p =  46 * (l2 >> 9) + 8;
  TABLE_NAME.cgemm_p =  46 * (l2 >> 9) + 4;
  TABLE_NAME.zgemm_p =  23 * (l2 >> 9) + 4;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  92 * (l2 >> 9) + 8;
  TABLE_NAME.xgemm_p =  46 * (l2 >> 9) + 4;
#endif
#endif

#ifdef PENRYN

#ifdef DEBUG
  fprintf(stderr, "Penryn\n");
#endif

  TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
  TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
#endif
#endif

#ifdef DUNNINGTON

#ifdef DEBUG
  fprintf(stderr, "Dunnington\n");
#endif

  TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
  TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
#endif
#endif


#ifdef NEHALEM

#ifdef DEBUG
  fprintf(stderr, "Nehalem\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef SANDYBRIDGE

#ifdef DEBUG
  fprintf(stderr, "Sandybridge\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef HASWELL

#ifdef DEBUG
  fprintf(stderr, "Haswell\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif


#ifdef OPTERON

#ifdef DEBUG
  fprintf(stderr, "Opteron\n");
#endif

  TABLE_NAME.sgemm_p = 224 +  56 * (l2 >> 7);
  TABLE_NAME.dgemm_p = 112 +  28 * (l2 >> 7);
  TABLE_NAME.cgemm_p = 112 +  28 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  56 +  14 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  56 +  14 * (l2 >> 7);
  TABLE_NAME.xgemm_p =  28 +   7 * (l2 >> 7);
#endif
#endif

#ifdef BARCELONA

#ifdef DEBUG
  fprintf(stderr, "Barcelona\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef BOBCAT

#ifdef DEBUG
  fprintf(stderr, "Bobcate\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef BULLDOZER

#ifdef DEBUG
  fprintf(stderr, "Bulldozer\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef PILEDRIVER

#ifdef DEBUG
  fprintf(stderr, "Piledriver\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef STEAMROLLER

#ifdef DEBUG
  fprintf(stderr, "Steamroller\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif


#ifdef NANO

#ifdef DEBUG
  fprintf(stderr, "NANO\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;



#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif

#endif


#ifdef CGEMM3M_DEFAULT_P
  TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
#else
  TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
#endif

#ifdef ZGEMM3M_DEFAULT_P
  TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
#else
  TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
#endif

#ifdef EXPRECISION
  TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
#endif



  TABLE_NAME.sgemm_p = (TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.dgemm_p = (TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.cgemm_p = (TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1) & ~(CGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.zgemm_p = (TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1) & ~(ZGEMM_DEFAULT_UNROLL_M - 1);

#ifdef CGEMM3M_DEFAULT_UNROLL_M
  TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1) & ~(CGEMM3M_DEFAULT_UNROLL_M - 1);
#else
  TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
#endif

#ifdef ZGEMM3M_DEFAULT_UNROLL_M
  TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1) & ~(ZGEMM3M_DEFAULT_UNROLL_M - 1);
#else
  TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
#endif

#ifdef QUAD_PRECISION
  TABLE_NAME.qgemm_p = (TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.xgemm_p = (TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1) & ~(XGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.xgemm3m_p = (TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
#endif

#ifdef DEBUG
  fprintf(stderr, "L2 = %8d DGEMM_P  .. %d\n", l2, TABLE_NAME.dgemm_p);
#endif

  TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q *  4 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.sgemm_q *  4) - 15) & ~15);

  TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q *  8 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.dgemm_q *  8) - 15) & ~15);

#ifdef EXPRECISION
  TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
#endif

  TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q *  8 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.cgemm_q *  8) - 15) & ~15);

  TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);

  TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q *  8 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.cgemm3m_q *  8) - 15) & ~15);

  TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);




#ifdef EXPRECISION
  TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
		       ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);

  TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
		       ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);

#endif



}
