convert.c File Reference


Detailed Description

Conversion between CSR and VBR format.

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <oski/config.h>
#include <oski/common.h>
#include <oski/modloader.h>
#include <oski/matrix.h>
#include <oski/CSR/format.h>
#include <oski/VBR/format.h>
#include <oski/VBR/module.h>
#include <oski/xforms_internal.h>

Defines

#define MAX(a, b)   ((a) > (b) ? (a) : (b))
 Return max of two side-effect free values.

Typedefs

typedef unsigned char flag_t
 Flag type (takes the values 0 or 1).

Functions

static int ExpandSymm (const oski_matCSR_t *mat, const oski_matcommon_t *props, oski_matCSR_t **p_mat_full)
static int TransposeSymm (const oski_matCSR_t *mat, const oski_matcommon_t *props, oski_matCSR_t **p_mat_trans)
static void DestroyCSR (oski_matCSR_t *mat)
static void SetFlags (oski_index_t n_x, const oski_index_t *restrict x, flag_t value, oski_index_t n, flag_t *restrict f)
 Scatters a specified value to all elements of a dense flag vector $f$ corresponding to the structurally non-zero elements of a sparse vector $x$.
static oski_index_t CountUnique (oski_index_t n_x, const oski_index_t *restrict x, oski_index_t n, flag_t *restrict workspace)
 Counts the number of unique structurally non-zero elements in a sparse vector $x$.
static oski_index_t CountNumCommonSpVec (oski_index_t i1, oski_index_t n1, const oski_index_t *restrict ind1, oski_index_t i2, oski_index_t n2, const oski_index_t *restrict ind2, oski_index_t b, int has_unit_diag, oski_index_t n, flag_t *restrict workspace)
 Returns the number of unique elements that two sparse vectors, $x_1$ and $x_2$, have in common.
static oski_index_t FindBlockPart (oski_index_t m, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, oski_index_t b, int has_unit_diag, double threshold, oski_index_t *b_start, oski_index_t n_max, flag_t *workspace)
 Determine a partitioning of a CSR structure into groups of consecutive rows that have similar non-zero patterns.
static void MakeBlockMap (const oski_index_t *restrict I_starts, oski_index_t M, oski_index_t *restrict map)
 Computes a row-to-block-row lookup table.
static int GetPart (oski_index_t m, oski_index_t n, const oski_index_t *ptr, const oski_index_t *ind, oski_index_t b, int has_unit_diag, int is_symm, const oski_index_t *Tptr, const oski_index_t *Tind, oski_index_t b_T, int has_unit_diag_T, double threshold_r, double threshold_c, oski_index_t *p_M, oski_index_t *p_N, oski_index_t **p_brow, oski_index_t **p_bcol, flag_t *work_f)
static void CountVBRSize (oski_index_t m, oski_index_t n, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, oski_index_t b, int has_unit_diag, oski_index_t M, oski_index_t N, oski_index_t *restrict brow, oski_index_t *restrict bcol, oski_index_t *p_nb, oski_index_t *p_nnz, flag_t *restrict f_workspace, oski_index_t *restrict i_workspace)
 Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix.
static void CopyToVBR (oski_index_t m, oski_index_t n, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, const oski_value_t *restrict val, oski_index_t b, int has_unit_diag, oski_index_t M, oski_index_t N, oski_index_t *restrict brow, oski_index_t *restrict bcol, oski_index_t nb, oski_index_t nnz, oski_index_t *V_ptr, oski_index_t *V_ind, oski_index_t *V_valptr, oski_value_t *V_val, flag_t *restrict f_workspace, oski_index_t *restrict i_workspace)
 Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix.
static int ConvertToVBR (oski_index_t m, oski_index_t n, const oski_index_t *ptr, const oski_index_t *ind, const oski_value_t *val, oski_index_t b, int has_unit_diag, int is_symm, const oski_index_t *Tptr, const oski_index_t *Tind, oski_index_t b_T, int has_unit_diag_T, double threshold_r, double threshold_c, oski_index_t *p_M, oski_index_t *p_N, oski_index_t **p_brow, oski_index_t **p_bcol, oski_index_t **p_V_ptr, oski_index_t **p_V_ind, oski_index_t **p_V_valptr, oski_value_t **p_V_val)
void * oski_CreateMatReprFromCSR (const oski_matCSR_t *mat, const oski_matcommon_t *props,...)
 Method: Instantiate from an existing CSR representation.
oski_matCSR_toski_ConvertMatReprToCSR (const void *mat, const oski_matcommon_t *props)
 Method: Convert to CSR format.
void oski_DestroyMatRepr (void *mat)
 Method: Destroy matrix type-specific representation.
void * oski_CopyMatRepr (const void *mat, const oski_matcommon_t *props)
 Method: Duplicate a matrix representation.
int oski_CreateLuaMatReprFromCSR (lua_State *L)
 Matrix-type specific method to convert from a CSR matrix, with arguments passed on the Lua stack.


Function Documentation

static void CopyToVBR oski_index_t  m,
oski_index_t  n,
const oski_index_t *restrict  ptr,
const oski_index_t *restrict  ind,
const oski_value_t *restrict  val,
oski_index_t  b,
int  has_unit_diag,
oski_index_t  M,
oski_index_t  N,
oski_index_t *restrict  brow,
oski_index_t *restrict  bcol,
oski_index_t  nb,
oski_index_t  nnz,
oski_index_t *  V_ptr,
oski_index_t *  V_ind,
oski_index_t *  V_valptr,
oski_value_t *  V_val,
flag_t *restrict  f_workspace,
oski_index_t *restrict  i_workspace
[static]
 

Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix.

Parameters:
[in] m No. of rows.
[in] n No. of columns.
[in] ptr Row pointers, of size m+1.
[in] ind Column indices, of size equal to the number of stored structural non-zero entries.
[in] b Index base of CSR representation (0- or 1-based).
[in] has_unit_diag Non-zero if the CSR representation assumes an implicit unit diagonal.
[in] M No. of block rows in the VBR representation.
[in] N No. of block columns in the VBR representation.
[in] brow Starting row of each of the M block rows in the VBR representation. brow is of size M+1, and 0 = brow[0] < brow[1] < ... < brow[M] = m.
[in] bcol Starting column of each of the M block columns in the VBR representation. bcol is of size M+1, and 0 = bcol[0] < bcol[1] < ... < bcol[N] = n.
[out] nb Number of blocks needed to store the CSR matrix in VBR format.
[out] nnz Number of non-zero elements needed to store the CSR matrix in VBR format.
[in,out] f_workspace Temporary workspace, of size at least N. This workspace must be initialized to zero on entry, and will be returned as all zero on exit.
[in,out] i_workspace Additional temporary workspace, of size at least 2*n. The values on entry are not used. On exit, the values are undefined.
Returns:
The number of structurally non-zero blocks in *p_nb, and the number of structural non-zeros in *p_nnz.

static oski_index_t CountNumCommonSpVec oski_index_t  i1,
oski_index_t  n1,
const oski_index_t *restrict  ind1,
oski_index_t  i2,
oski_index_t  n2,
const oski_index_t *restrict  ind2,
oski_index_t  b,
int  has_unit_diag,
oski_index_t  n,
flag_t *restrict  workspace
[static]
 

Returns the number of unique elements that two sparse vectors, $x_1$ and $x_2$, have in common.

These vectors are assumed to be row (or column) vectors of a larger matrix, located at positions row (column) $i_1$ and $i_2$, respectively.

Parameters:
[in] i1 Index of vector $x_1$.
[in] n1 No. of elements stored with $x_1$.
[in] ind1 Non-zero indices associated with $x_1$.
[in] i2 Index of vector $x_2$.
[in] n2 No. of elements stored with $x_2$.
[in] ind2 Non-zero indices associated with $x_2$.
[in] b Index base (0- or 1-based) for i1, i2, and all values in ind1 and ind2.
[in] n A value greater than or equal to the maximum value in either ind1 or ind2. That is, for all 0 <= k1 < n1, n >= ind1[k1] and for all 0 <= k2 < n2, n >= ind2[k2].
[in,out] workspace A buffer of size 2*(n+1), all of whose elements must be zero.
Returns:
Returns the number of unique non-zero entries the two sparse vectors have in common. All entries of the workspace will also be reset to zero, provided they were all initially zero.
Note:
The algorithm below returns the number of unique elements, in the event that either ind1 or ind2 contain duplicate entries.

static oski_index_t CountUnique oski_index_t  n_x,
const oski_index_t *restrict  x,
oski_index_t  n,
flag_t *restrict  workspace
[static]
 

Counts the number of unique structurally non-zero elements in a sparse vector $x$.

Parameters:
[in] n_x Number $n_x$ of structurally non-zero elements in $x$.
[in] x Sparse vector indices of $x$.
[in] n Value >= maximum index in $x$.
[out] workspace Buffer of size at least n+1, initialized to all zeros on entry.
Returns:
The number of unique structurally non-zero elements in a sparse vector $x$. Resets any modified elements of the workspace back to 0 on return.

static void CountVBRSize oski_index_t  m,
oski_index_t  n,
const oski_index_t *restrict  ptr,
const oski_index_t *restrict  ind,
oski_index_t  b,
int  has_unit_diag,
oski_index_t  M,
oski_index_t  N,
oski_index_t *restrict  brow,
oski_index_t *restrict  bcol,
oski_index_t *  p_nb,
oski_index_t *  p_nnz,
flag_t *restrict  f_workspace,
oski_index_t *restrict  i_workspace
[static]
 

Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix.

Parameters:
[in] m No. of rows.
[in] n No. of columns.
[in] ptr Row pointers, of size m+1.
[in] ind Column indices, of size equal to the number of stored structural non-zero entries.
[in] b Index base of CSR representation (0- or 1-based).
[in] has_unit_diag Non-zero if the CSR representation assumes an implicit unit diagonal.
[in] M No. of block rows in the VBR representation.
[in] N No. of block columns in the VBR representation.
[in] brow Starting row of each of the M block rows in the VBR representation. brow is of size M+1, and 0 = brow[0] < brow[1] < ... < brow[M] = m.
[in] bcol Starting column of each of the M block columns in the VBR representation. bcol is of size M+1, and 0 = bcol[0] < bcol[1] < ... < bcol[N] = n.
[out] p_nb Pointer to the number of non-zero blocks needed to store the CSR matrix in VBR format. If p_nb == NULL, then this value is not returned.
[out] p_nnz Pointer to the number of non-zero elements needed to store the CSR matrix in VBR format. If p_nnz == NULL, then this value is not returned.
[in,out] f_workspace Temporary workspace, of size at least N. This workspace must be initialized to zero on entry, and will be returned as all zero on exit.
[in,out] i_workspace Additional temporary workspace, of size at least n. The values on entry are not used. On exit, the values are undefined.
Returns:
The number of structurally non-zero blocks in *p_nb, and the number of structural non-zeros in *p_nnz.

static oski_index_t FindBlockPart oski_index_t  m,
const oski_index_t *restrict  ptr,
const oski_index_t *restrict  ind,
oski_index_t  b,
int  has_unit_diag,
double  threshold,
oski_index_t *  b_start,
oski_index_t  n_max,
flag_t workspace
[static]
 

Determine a partitioning of a CSR structure into groups of consecutive rows that have similar non-zero patterns.

This routine finds a block row partitioning, defined as a sequence of $M$ block row starting positions $0 = i_0 < i_1 < \cdots < i_{M} = m$, where the $K^{\mathrm{th}}$ block row consists of all rows starting at $i_K$ and ending at $i_{K+1}-1$. The block rows satisfy the following property.

  • Let $x_i$ be a pattern row vector corresponding to the non-zero pattern of row $i$ of the matrix, and let $k_i$ be the number of unique structural non-zeros in that row.
  • Let $0 \leq \rho \leq 1$ be a user-defined partitioning threshold.
  • (Similarity property) Then, each row $i_K \leq i < i_{K+1}$ satisfies $\frac{x_{i_K}\cdot x_{i}^T}{\max\{k_{i_K},k_i\}} \geq \rho$.

Parameters:
[in] m Number of rows, $m$.
[in] ptr Row pointers, of size $m+1$.
[in] ind Column indices, of size ptr[m].
[in] b Index base (0- or 1-based)
[in] has_unit_diag Non-zero if matrix has an implicit unit diagonal.
[in] threshold The similarity threshold, $\rho$.
[out] b_start Buffer in which to store the block row starts, of size at least $M+1$. If b_start is NULL, then these values are not returned.
[in] n_max Maximum value of any column index in ind.
[in,out] workspace A buffer of size at least 2*(n_max+1) to be used as temporary storage. The workspace must be all zero initially, and if so, will be returned in an all-zero state.
Returns:
The number of block rows $M$. If b_start is not NULL, returns the block row partitioning as well.
The algorithm is greedy, starting at row 0 and growing each block row until the similarity property no longer holds.

Note:
The array b_start may be NULL, in which case this routine just returns $M$. Thus, this routine can be called once to determine $M$, allocate a buffer b_start of size $M+1$, and then called again to determine b_start.

static void MakeBlockMap const oski_index_t *restrict  I_starts,
oski_index_t  M,
oski_index_t *restrict  map
[static]
 

Computes a row-to-block-row lookup table.

Parameters:
[in] I_starts Start of each row of a consecutive block row partitioning.
[in] M Number of block rows.
[in,out] map An array of size at least I_starts[M] to store the lookup table.
Returns:
An initialized row-to-block-row map such that map[i] == K where I_starts[K] <= i < I_starts[K+1].

static void SetFlags oski_index_t  n_x,
const oski_index_t *restrict  x,
flag_t  value,
oski_index_t  n,
flag_t *restrict  f
[static]
 

Scatters a specified value to all elements of a dense flag vector $f$ corresponding to the structurally non-zero elements of a sparse vector $x$.

More specifically, sets $f_j \leftarrow v$ for all $j = x_k$ such that $0 \leq k < n_x$.

Parameters:
[in] n_x Number $n_x$ of structurally non-zero elements in $x$.
[in] x Sparse vector indices of $x$.
[in] value Value $v$ to assign.
[in] n Maximum length of $f$.
[out] f Flag vector $f$.
Returns:
Only elements of $f$ corresponding to structurally non-zero elements of $x$ are modified.


Generated on Wed Sep 19 16:41:22 2007 for BeBOP Optimized Sparse Kernel Interface Library by  doxygen 1.4.6