4x2.c File Reference


Detailed Description

The $4\times 2$ MBCSR implementation of simultaneous multiplication by $A$ and $\mathrm{op}(A)$.

Automatically generated by ./gen_a_and_at.sh on Wed Jun 8 15:54:33 PDT 2005.

#include <assert.h>
#include <oski/config.h>
#include <oski/common.h>
#include <oski/mangle.h>
#include <oski/vecview.h>
#include <oski/MBCSR/format.h>
#include <oski/MBCSR/module.h>

Defines

#define REGISTER   register
 Real-valued, so use explicit 'register' keyword.
#define MBCSR_MatMultAndMatMult_v1_aX_b1_xs1_ysX_oX_z1_ws1_zsX   MANGLE_MOD_(MBCSR_MatMultAndMatMult_v1_aX_b1_xs1_ysX_oX_z1_ws1_zsX_4x2)
 Mangled name for MBCSR_MatMultAndMatMult_v1_aX_b1_xs1_ysX_oX_z1_ws1_zsX.
#define MBCSR_MatMultAndMatMult_v1_aX_b1_xsX_ysX_oX_z1_wsX_zsX   MANGLE_MOD_(MBCSR_MatMultAndMatMult_v1_aX_b1_xsX_ysX_oX_z1_wsX_zsX_4x2)
 Mangled name for MBCSR_MatMultAndMatMult_v1_aX_b1_xsX_ysX_oX_z1_wsX_zsX.
#define MBCSR_MatMultAndMatTransMult_v1_aX_b1_xs1_ysX_oX_z1_wsX_zs1   MANGLE_MOD_(MBCSR_MatMultAndMatTransMult_v1_aX_b1_xs1_ysX_oX_z1_wsX_zs1_4x2)
 Mangled name for MBCSR_MatMultAndMatTransMult_v1_aX_b1_xs1_ysX_oX_z1_wsX_zs1.
#define MBCSR_MatMultAndMatTransMult_v1_aX_b1_xsX_ysX_oX_z1_wsX_zsX   MANGLE_MOD_(MBCSR_MatMultAndMatTransMult_v1_aX_b1_xsX_ysX_oX_z1_wsX_zsX_4x2)
 Mangled name for MBCSR_MatMultAndMatTransMult_v1_aX_b1_xsX_ysX_oX_z1_wsX_zsX.
#define SubmatReprMultAndSubmatReprTransMult   MANGLE_MOD_(SubmatReprMultAndSubmatReprTransMult_4x2)
 Mangled name for primary exportable symbol.

Functions

void MBCSR_MatMultAndMatMult_v1_aX_b1_xs1_ysX_oX_z1_ws1_zsX (oski_index_t M, oski_index_t d0, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, const oski_value_t *restrict val, const oski_value_t *restrict diag, oski_value_t alpha, const oski_value_t *restrict x, oski_value_t *restrict y, oski_index_t incy, oski_value_t omega, const oski_value_t *restrict w, oski_value_t *restrict z, oski_index_t incz)
 The $4\times 2$ MBCSR implementation of $A\cdot x, A\cdot w$, where x, y, w, and z vectors have unit-stride, general-stride, unit-stride, and general-stride, respectively.
void MBCSR_MatMultAndMatMult_v1_aX_b1_xsX_ysX_oX_z1_wsX_zsX (oski_index_t M, oski_index_t d0, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, const oski_value_t *restrict val, const oski_value_t *restrict diag, oski_value_t alpha, const oski_value_t *restrict x, oski_index_t incx, oski_value_t *restrict y, oski_index_t incy, oski_value_t omega, const oski_value_t *restrict w, oski_index_t incw, oski_value_t *restrict z, oski_index_t incz)
 The $4\times 2$ MBCSR implementation of $A\cdot x, A\cdot w$, where x, y, w, and z vectors have general-stride, general-stride, general-stride, and general-stride, respectively.
static int MatMultAndMatMult (const oski_submatMBCSR_t *A, oski_value_t alpha, const oski_vecview_t x, oski_vecview_t y, oski_value_t omega, const oski_vecview_t w, oski_vecview_t z)
 Exported module wrapper for the $4\times 2$ implementation of $A\cdot x, A\cdot w$.
static int MatMultAndMatConjMult (const oski_submatMBCSR_t *A, oski_value_t alpha, const oski_vecview_t x, oski_vecview_t y, oski_value_t omega, const oski_vecview_t w, oski_vecview_t z)
 Exported module wrapper for the $4\times 2$ implementation of $A\cdot x, \bar{A}\cdot w$.
void MBCSR_MatMultAndMatTransMult_v1_aX_b1_xs1_ysX_oX_z1_wsX_zs1 (oski_index_t M, oski_index_t d0, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, const oski_value_t *restrict val, const oski_value_t *restrict diag, oski_value_t alpha, const oski_value_t *restrict x, oski_value_t *restrict y, oski_index_t incy, oski_value_t omega, const oski_value_t *restrict w, oski_index_t incw, oski_value_t *restrict z)
 The $4\times 2$ MBCSR implementation of $A\cdot x, A^T\cdot w$, where x, y, w, and z vectors have unit-stride, general-stride, general-stride, and unit-stride, respectively.
void MBCSR_MatMultAndMatTransMult_v1_aX_b1_xsX_ysX_oX_z1_wsX_zsX (oski_index_t M, oski_index_t d0, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, const oski_value_t *restrict val, const oski_value_t *restrict diag, oski_value_t alpha, const oski_value_t *restrict x, oski_index_t incx, oski_value_t *restrict y, oski_index_t incy, oski_value_t omega, const oski_value_t *restrict w, oski_index_t incw, oski_value_t *restrict z, oski_index_t incz)
 The $4\times 2$ MBCSR implementation of $A\cdot x, A^T\cdot w$, where x, y, w, and z vectors have general-stride, general-stride, general-stride, and general-stride, respectively.
static int MatMultAndMatTransMult (const oski_submatMBCSR_t *A, oski_value_t alpha, const oski_vecview_t x, oski_vecview_t y, oski_value_t omega, const oski_vecview_t w, oski_vecview_t z)
 Exported module wrapper for the $4\times 2$ implementation of $A\cdot x, A^T\cdot w$.
static int MatMultAndMatHermMult (const oski_submatMBCSR_t *A, oski_value_t alpha, const oski_vecview_t x, oski_vecview_t y, oski_value_t omega, const oski_vecview_t w, oski_vecview_t z)
 Exported module wrapper for the $4\times 2$ implementation of $A\cdot x, \bar{A}^T\cdot w$.
int SubmatReprMultAndSubmatReprTransMult (const oski_submatMBCSR_t *A, oski_value_t alpha, const oski_vecview_t x, oski_vecview_t y, oski_matop_t opA, oski_value_t omega, const oski_vecview_t w, oski_vecview_t z)
 Entry point to the 4x2 kernel that implements simultaneous multiplication by sparse $A$ and $\mathrm{op}(A)$.


Generated on Wed Sep 19 16:41:19 2007 for BeBOP Optimized Sparse Kernel Interface Library by  doxygen 1.4.6