From 4d4898ed89d64222863c9bb6d33c6275a578ce3a Mon Sep 17 00:00:00 2001 From: jalvesz Date: Fri, 16 Jan 2026 23:49:24 +0100 Subject: [PATCH 01/18] start working on BSR --- src/stdlib_sparse_kinds.fypp | 156 +++++++++++++++++++++ src/stdlib_sparse_spmv.fypp | 208 ++++++++++++++++++++++++++++ test/linalg/test_linalg_sparse.fypp | 44 ++++++ 3 files changed, 408 insertions(+) diff --git a/src/stdlib_sparse_kinds.fypp b/src/stdlib_sparse_kinds.fypp index bf03d2747..17c5a051e 100644 --- a/src/stdlib_sparse_kinds.fypp +++ b/src/stdlib_sparse_kinds.fypp @@ -44,6 +44,28 @@ module stdlib_sparse_kinds end type #:endfor + !! version: experimental + !! + !! BSR: Compressed sparse row or Yale format + type, public, extends(sparse_type) :: BSR_type + integer(ilp) :: block_shape(2) = (/1,1/) !! shape of the blocks + integer(ilp), allocatable :: col(:) !! matrix column pointer + integer(ilp), allocatable :: rowptr(:) !! matrix row pointer + contains + procedure :: malloc => malloc_bsr + end type + + #:for k1, t1, s1 in (KINDS_TYPES) + type, public, extends(BSR_type) :: BSR_${s1}$_type + ${t1}$, allocatable :: data(:,:,:) + contains + procedure, non_overridable :: at => at_value_bsr_${s1}$ + procedure, non_overridable :: add_value => add_bvalue_bsr_${s1}$ + procedure, non_overridable :: add_block => add_block_bsr_${s1}$ + generic :: add => add_value, add_block + end type + #:endfor + !! version: experimental !! !! CSR: Compressed sparse row or Yale format @@ -292,6 +314,55 @@ contains end select end subroutine + !! (re)Allocate matrix memory for the BSR type + subroutine malloc_bsr(self,block_shape,num_brows,num_bcols,bnnz) + class(BSR_type) :: self + integer(ilp), intent(in) :: block_shape(2) !! shape of the blocks + integer(ilp), intent(in) :: num_brows !! number of block rows + integer(ilp), intent(in) :: num_bcols !! number of block columns + integer(ilp), intent(in) :: bnnz !! number of non-zero blocks + integer(ilp), allocatable :: temp_idx(:) + integer(ilp) :: br, bc + !----------------------------------------------------- + + br = max(block_shape(1), 1) + bc = max(block_shape(2), 1) + + self%block_shape = (/br,bc/) + self%nrows = num_brows * br + self%ncols = num_bcols * bc + self%nnz = br*bc*bnnz + + if(.not.allocated(self%col)) then + allocate(temp_idx(bnnz) , source = 0 ) + else + allocate(temp_idx(bnnz) , source = self%col ) + end if + call move_alloc(from=temp_idx,to=self%col) + + if(.not.allocated(self%rowptr)) then + allocate(temp_idx(num_brows+1) , source = 0 ) + else + allocate(temp_idx(num_brows+1) , source = self%rowptr ) + end if + call move_alloc(from=temp_idx,to=self%rowptr) + + select type(self) + #:for k1, t1, s1 in (KINDS_TYPES) + type is(BSR_${s1}$_type) + block + ${t1}$, allocatable :: temp_data_${s1}$(:,:,:) + if(.not.allocated(self%data)) then + allocate(temp_data_${s1}$(br,bc,bnnz) , source = zero_${s1}$ ) + else + allocate(temp_data_${s1}$(br,bc,bnnz) , source = self%data ) + end if + call move_alloc(from=temp_data_${s1}$,to=self%data) + end block + #:endfor + end select + end subroutine + !================================================================== ! data accessors !================================================================== @@ -590,4 +661,89 @@ contains #:endfor + #:for k1, t1, s1 in (KINDS_TYPES) + pure ${t1}$ function at_value_bsr_${s1}$(self,ik,jk) result(val) + class(BSR_${s1}$_type), intent(in) :: self + integer(ilp), intent(in) :: ik, jk + integer(ilp) :: k, br, bc, ib, jb, ii, jj, ik_, jk_ + logical :: transpose + + if( (ik<1 .or. ik>self%nrows) .or. (jk<1 .or. jk>self%ncols) ) then + val = ieee_value( 0._${k1}$ , ieee_quiet_nan) + return + end if + + ik_ = ik; jk_ = jk + transpose = (self%storage == sparse_lower .and. ik > jk) .or. (self%storage == sparse_upper .and. ik < jk) + if(transpose) then + ik_ = jk; jk_ = ik + end if + + br = self%block_shape(1) + bc = self%block_shape(2) + + ib = (ik_ - 1) / br + 1 + jb = (jk_ - 1) / bc + 1 + ii = modulo(ik_ - 1, br) + 1 + jj = modulo(jk_ - 1, bc) + 1 + + if(.not.allocated(self%rowptr) .or. ib < 1 .or. ib >= size(self%rowptr)) then + val = zero_${s1}$ + return + end if + + do k = self%rowptr(ib), self%rowptr(ib+1)-1 + if( jb == self%col(k) ) then + val = self%data(ii,jj,k) + return + end if + end do + val = zero_${s1}$ + end function + + !> add a single block value to the BSR matrix + subroutine add_bvalue_bsr_${s1}$(self,ik,jk,val) + class(BSR_${s1}$_type), intent(inout) :: self + ${t1}$, intent(in) :: val(:,:) + integer(ilp), intent(in) :: ik, jk + integer(ilp) :: k + + if(size(val,1) /= self%block_shape(1) .or. size(val,2) /= self%block_shape(2)) then + print *, "Warning: block size mismatch in add_bvalue_bsr" + return + end if + + do k = self%rowptr(ik), self%rowptr(ik+1)-1 + if( jk == self%col(k) ) then + self%data(:,:,k) = self%data(:,:,k) + val(:,:) + return + end if + end do + + end subroutine + + !> add a blocks matrices to the BSR matrix + subroutine add_block_bsr_${s1}$(self,ik,jk,val) + class(BSR_${s1}$_type), intent(inout) :: self + ${t1}$, intent(in) :: val(:,:,:,:) + integer(ilp), intent(in) :: ik(:), jk(:) + integer(ilp) :: i, j, k + + if(size(val,1) /= self%block_shape(1) .or. size(val,2) /= self%block_shape(2)) then + print *, "Warning: block size mismatch in add_bvalue_bsr" + return + end if + + do i = 1, size(ik) + do k = self%rowptr(ik(i)), self%rowptr(ik(i)+1)-1 + do j = 1, size(jk) + if( jk(j) == self%col(k) ) then + self%data(:,:,k) = self%data(:,:,k) + val(:,:,i,j) + end if + end do + end do + end do + end subroutine + + #:endfor end module stdlib_sparse_kinds diff --git a/src/stdlib_sparse_spmv.fypp b/src/stdlib_sparse_spmv.fypp index 76c78f9e1..8e539b8f4 100644 --- a/src/stdlib_sparse_spmv.fypp +++ b/src/stdlib_sparse_spmv.fypp @@ -13,6 +13,7 @@ module stdlib_sparse_spmv use stdlib_sparse_constants use stdlib_sparse_kinds + use stdlib_linalg_blas, only: gemv, gemm implicit none private @@ -24,6 +25,7 @@ module stdlib_sparse_spmv #:for k1, t1, s1 in (KINDS_TYPES) #:for rank in RANKS module procedure :: spmv_coo_${rank}$d_${s1}$ + module procedure :: spmv_bsr_${rank}$d_${s1}$ module procedure :: spmv_csr_${rank}$d_${s1}$ module procedure :: spmv_csc_${rank}$d_${s1}$ module procedure :: spmv_ell_${rank}$d_${s1}$ @@ -233,6 +235,212 @@ contains #:endfor #:endfor + !! spmv_bsr + #:for k1, t1, s1 in (KINDS_TYPES) + #:for rank in RANKS + subroutine spmv_bsr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(BSR_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op + ${t1}$ :: alpha_, beta_ + character(1) :: op_ + character(1) :: trans_blas + integer(ilp) :: nbrow, bi, bj, p + integer(ilp) :: row_i_start, col_j_start, i_max_i, j_max_j + integer(ilp) :: row_j_start, col_i_start, i_max_j, j_max_i + + #:if rank == 2 + integer(ilp) :: nrhs + #:endif + #:if t1.startswith('complex') + #:if rank == 1 + ${t1}$, allocatable :: xwork_bc(:) + ${t1}$, allocatable :: ywork_br(:) + #:else + ${t1}$, allocatable :: xwork_bc(:,:) + ${t1}$, allocatable :: ywork_br(:,:) + #:endif + #:endif + + op_ = sparse_op_none; if(present(op)) op_ = op + alpha_ = one_${s1}$; if(present(alpha)) alpha_ = alpha + beta_ = zero_${s1}$; if(present(beta)) beta_ = beta + if(present(beta)) then + vec_y = beta_ * vec_y + else + vec_y = zero_${s1}$ + endif + + associate( data => matrix%data, col => matrix%col, rowptr => matrix%rowptr, & + & nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage, & + br => matrix%block_shape(1), bc => matrix%block_shape(2) ) + + nbrow = size(rowptr) - 1 + #:if rank == 2 + nrhs = size(vec_x,dim=2) + #:endif + + select case(op_) + case(sparse_op_none) + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_i_start:), incy=1) + #:else + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & + c=vec_y(row_i_start:, :), ldc=i_max_i) + #:endif + + if (storage /= sparse_full .and. bi /= bj) then + row_j_start = (bj - 1) * br + 1 + if (row_j_start > nrows) cycle + i_max_j = min(br, nrows - row_j_start + 1) + col_i_start = (bi - 1) * bc + 1 + if (col_i_start > ncols) cycle + j_max_i = min(bc, ncols - col_i_start + 1) + + #:if rank == 1 + call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_j_start:), incy=1) + #:else + call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & + c=vec_y(row_j_start:, :), ldc=i_max_j) + #:endif + end if + end do + end do + + case(sparse_op_transpose) + ! storage == sparse_full here (otherwise op is mapped to 'N') + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(col_j_start:), incy=1) + #:else + call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(row_i_start:, :), ldb=i_max_i, beta=one_${s1}$, & + c=vec_y(col_j_start:, :), ldc=j_max_j) + #:endif + end do + end do + + #:if t1.startswith('complex') + case(sparse_op_hermitian) + if (storage == sparse_full) then + trans_blas = 'C' + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv(trans_blas, m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(col_j_start:), incy=1) + #:else + call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(row_i_start:, :), ldb=i_max_i, beta=one_${s1}$, & + c=vec_y(col_j_start:, :), ldc=j_max_j) + #:endif + end do + end do + else + ! symmetric-storage: full matrix is symmetric; compute y = alpha * A^H * x + #:if rank == 1 + if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc)) + if(.not.allocated(ywork_br)) allocate(ywork_br(br)) + #:else + if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc,nrhs)) + if(.not.allocated(ywork_br)) allocate(ywork_br(br,nrhs)) + #:endif + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + ! y_i += conjg(A_ij) * x_j == conjg( A_ij * conjg(x_j) ) + #:if rank == 1 + xwork_bc(1:j_max_j) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1)) + ywork_br(1:i_max_i) = zero_${s1}$ + call gemv('N', m=i_max_i, n=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & + x=xwork_bc, incx=1, beta=zero_${s1}$, y=ywork_br, incy=1) + vec_y(row_i_start:row_i_start+i_max_i-1) = vec_y(row_i_start:row_i_start+i_max_i-1) + & + alpha_ * conjg(ywork_br(1:i_max_i)) + #:else + xwork_bc(1:j_max_j, :) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1, :)) + ywork_br(1:i_max_i, :) = zero_${s1}$ + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=xwork_bc, ldb=bc, beta=zero_${s1}$, c=ywork_br, ldc=br) + vec_y(row_i_start:row_i_start+i_max_i-1, :) = vec_y(row_i_start:row_i_start+i_max_i-1, :) + & + alpha_ * conjg(ywork_br(1:i_max_i, :)) + #:endif + + if (bi /= bj) then + row_j_start = (bj - 1) * br + 1 + if (row_j_start > nrows) cycle + i_max_j = min(br, nrows - row_j_start + 1) + col_i_start = (bi - 1) * bc + 1 + if (col_i_start > ncols) cycle + j_max_i = min(bc, ncols - col_i_start + 1) + + ! y_j += A_ij^H * x_i + #:if rank == 1 + call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_j_start:), incy=1) + #:else + call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & + c=vec_y(row_j_start:, :), ldc=i_max_j) + #:endif + end if + end do + end do + end if + #:endif + end select + end associate + end subroutine + + #:endfor + #:endfor + !! spmv_csc #:for k1, t1, s1 in (KINDS_TYPES) #:for rank in RANKS diff --git a/test/linalg/test_linalg_sparse.fypp b/test/linalg/test_linalg_sparse.fypp index 21237f862..a4b938b4d 100644 --- a/test/linalg/test_linalg_sparse.fypp +++ b/test/linalg/test_linalg_sparse.fypp @@ -19,6 +19,7 @@ contains testsuite = [ & new_unittest('coo', test_coo), & new_unittest('coo2ordered', test_coo2ordered), & + new_unittest('bsr', test_bsr), & new_unittest('csr', test_csr), & new_unittest('csc', test_csc), & new_unittest('ell', test_ell), & @@ -99,6 +100,49 @@ contains end subroutine + subroutine test_bsr(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + #:for k1, t1, s1 in (KINDS_TYPES) + block + integer, parameter :: wp = ${k1}$ + type(BSR_${s1}$_type) :: BSR + real(wp) :: dense(4,4) + ${t1}$, allocatable :: vec_x(:) + ${t1}$, allocatable :: vec_y(:) + ${t1}$, allocatable :: vec_ref(:) + + dense = 0._wp + dense(1:2,1:2) = reshape(real([1,3,2,4],kind=wp),[2,2]) + dense(3:4,3:4) = reshape(real([5,7,6,8],kind=wp),[2,2]) + + call BSR%malloc([2,2],4,4,2) + BSR%rowptr = [1,2,3] + BSR%col = [1,2] + BSR%data(:,:,1) = reshape(real([1,3,2,4],kind=wp),[2,2]) + BSR%data(:,:,2) = reshape(real([5,7,6,8],kind=wp),[2,2]) + + allocate( vec_x(4) , source = 1._wp ) + allocate( vec_y(4) , source = 0._wp ) + allocate( vec_ref(4) , source = 0._wp ) + + vec_ref = matmul( dense, vec_x ) + call spmv( BSR, vec_x, vec_y ) + call check(error, all(vec_y == vec_ref) ) + if (allocated(error)) return + + ! Test in-place transpose + vec_y = 1._wp + vec_ref = matmul( transpose(dense), vec_y ) + vec_x = 0._wp + call spmv( BSR, vec_y, vec_x, op=sparse_op_transpose ) + call check(error, all(vec_x == vec_ref) ) + if (allocated(error)) return + + end block + #:endfor + end subroutine + subroutine test_csr(error) !> Error handling type(error_type), allocatable, intent(out) :: error From b220a5e26e483c5470162082108078d519d84520 Mon Sep 17 00:00:00 2001 From: Jose Alves Date: Sun, 18 Jan 2026 18:47:07 +0100 Subject: [PATCH 02/18] small cleanups --- src/stdlib_sparse_spmv.fypp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/stdlib_sparse_spmv.fypp b/src/stdlib_sparse_spmv.fypp index 8e539b8f4..40683d78e 100644 --- a/src/stdlib_sparse_spmv.fypp +++ b/src/stdlib_sparse_spmv.fypp @@ -278,7 +278,7 @@ contains & nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage, & br => matrix%block_shape(1), bc => matrix%block_shape(2) ) - nbrow = size(rowptr) - 1 + nbrow = nrows / br**2 #:if rank == 2 nrhs = size(vec_x,dim=2) #:endif @@ -353,7 +353,6 @@ contains #:if t1.startswith('complex') case(sparse_op_hermitian) if (storage == sparse_full) then - trans_blas = 'C' do bi = 1, nbrow row_i_start = (bi - 1) * br + 1 if (row_i_start > nrows) cycle @@ -365,7 +364,7 @@ contains j_max_j = min(bc, ncols - col_j_start + 1) #:if rank == 1 - call gemv(trans_blas, m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & y=vec_y(col_j_start:), incy=1) #:else From 355599228a7dc8eb5b97103576bbe8e724ceab09 Mon Sep 17 00:00:00 2001 From: Jose Alves Date: Fri, 23 Jan 2026 15:34:54 +0100 Subject: [PATCH 03/18] include blas as dependency for sparse --- src/sparse/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparse/CMakeLists.txt b/src/sparse/CMakeLists.txt index 3e00df66a..ef78021d3 100644 --- a/src/sparse/CMakeLists.txt +++ b/src/sparse/CMakeLists.txt @@ -14,4 +14,4 @@ set(sparse_f90Files configure_stdlib_target(sparse sparse_f90Files sparse_fppFiles sparse_cppFiles) -target_link_libraries(sparse PUBLIC constants sorting) +target_link_libraries(sparse PUBLIC constants sorting blas) From 205670fbf375a629c20b89466d083740ddba0f28 Mon Sep 17 00:00:00 2001 From: jalvesz Date: Sun, 15 Mar 2026 14:47:57 +0100 Subject: [PATCH 04/18] revisit logic of the spmv_bsr kernel --- src/sparse/stdlib_sparse_spmv.fypp | 323 +++++++++++++++++++---------- 1 file changed, 218 insertions(+), 105 deletions(-) diff --git a/src/sparse/stdlib_sparse_spmv.fypp b/src/sparse/stdlib_sparse_spmv.fypp index 40683d78e..e744a98d7 100644 --- a/src/sparse/stdlib_sparse_spmv.fypp +++ b/src/sparse/stdlib_sparse_spmv.fypp @@ -247,7 +247,6 @@ contains character(1), intent(in), optional :: op ${t1}$ :: alpha_, beta_ character(1) :: op_ - character(1) :: trans_blas integer(ilp) :: nbrow, bi, bj, p integer(ilp) :: row_i_start, col_j_start, i_max_i, j_max_j integer(ilp) :: row_j_start, col_i_start, i_max_j, j_max_i @@ -278,13 +277,12 @@ contains & nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage, & br => matrix%block_shape(1), bc => matrix%block_shape(2) ) - nbrow = nrows / br**2 + nbrow = nrows / br #:if rank == 2 nrhs = size(vec_x,dim=2) #:endif - select case(op_) - case(sparse_op_none) + if( storage == sparse_full .and. op_ == sparse_op_none ) then do bi = 1, nbrow row_i_start = (bi - 1) * br + 1 if (row_i_start > nrows) cycle @@ -304,30 +302,10 @@ contains b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & c=vec_y(row_i_start:, :), ldc=i_max_i) #:endif - - if (storage /= sparse_full .and. bi /= bj) then - row_j_start = (bj - 1) * br + 1 - if (row_j_start > nrows) cycle - i_max_j = min(br, nrows - row_j_start + 1) - col_i_start = (bi - 1) * bc + 1 - if (col_i_start > ncols) cycle - j_max_i = min(bc, ncols - col_i_start + 1) - - #:if rank == 1 - call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_j_start:), incy=1) - #:else - call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & - c=vec_y(row_j_start:, :), ldc=i_max_j) - #:endif - end if end do end do - case(sparse_op_transpose) - ! storage == sparse_full here (otherwise op is mapped to 'N') + else if( storage == sparse_full .and. op_ == sparse_op_transpose ) then do bi = 1, nbrow row_i_start = (bi - 1) * br + 1 if (row_i_start > nrows) cycle @@ -350,90 +328,225 @@ contains end do end do + else if( storage == sparse_lower .and. op_ /= sparse_op_hermitian ) then + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_i_start:), incy=1) + #:else + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & + c=vec_y(row_i_start:, :), ldc=i_max_i) + #:endif + + if (bi == bj) cycle + + row_j_start = (bj - 1) * br + 1 + if (row_j_start > nrows) cycle + i_max_j = min(br, nrows - row_j_start + 1) + col_i_start = (bi - 1) * bc + 1 + if (col_i_start > ncols) cycle + j_max_i = min(bc, ncols - col_i_start + 1) + + #:if rank == 1 + call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_j_start:), incy=1) + #:else + call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & + c=vec_y(row_j_start:, :), ldc=i_max_j) + #:endif + end do + end do + + else if( storage == sparse_upper .and. op_ /= sparse_op_hermitian ) then + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_i_start:), incy=1) + #:else + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & + c=vec_y(row_i_start:, :), ldc=i_max_i) + #:endif + + if (bi == bj) cycle + + row_j_start = (bj - 1) * br + 1 + if (row_j_start > nrows) cycle + i_max_j = min(br, nrows - row_j_start + 1) + col_i_start = (bi - 1) * bc + 1 + if (col_i_start > ncols) cycle + j_max_i = min(bc, ncols - col_i_start + 1) + + #:if rank == 1 + call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_j_start:), incy=1) + #:else + call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & + c=vec_y(row_j_start:, :), ldc=i_max_j) + #:endif + end do + end do + #:if t1.startswith('complex') - case(sparse_op_hermitian) - if (storage == sparse_full) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(col_j_start:), incy=1) - #:else - call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(row_i_start:, :), ldb=i_max_i, beta=one_${s1}$, & - c=vec_y(col_j_start:, :), ldc=j_max_j) - #:endif - end do + else if( storage == sparse_full .and. op_ == sparse_op_hermitian ) then + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(col_j_start:), incy=1) + #:else + call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(row_i_start:, :), ldb=i_max_i, beta=one_${s1}$, & + c=vec_y(col_j_start:, :), ldc=j_max_j) + #:endif + end do + end do + + else if( storage == sparse_lower .and. op_ == sparse_op_hermitian ) then + #:if rank == 1 + if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc)) + if(.not.allocated(ywork_br)) allocate(ywork_br(br)) + #:else + if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc,nrhs)) + if(.not.allocated(ywork_br)) allocate(ywork_br(br,nrhs)) + #:endif + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + xwork_bc(1:j_max_j) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1)) + ywork_br(1:i_max_i) = zero_${s1}$ + call gemv('N', m=i_max_i, n=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & + x=xwork_bc, incx=1, beta=zero_${s1}$, y=ywork_br, incy=1) + vec_y(row_i_start:row_i_start+i_max_i-1) = vec_y(row_i_start:row_i_start+i_max_i-1) + & + alpha_ * conjg(ywork_br(1:i_max_i)) + #:else + xwork_bc(1:j_max_j, :) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1, :)) + ywork_br(1:i_max_i, :) = zero_${s1}$ + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & + b=xwork_bc, ldb=bc, beta=zero_${s1}$, c=ywork_br, ldc=br) + vec_y(row_i_start:row_i_start+i_max_i-1, :) = vec_y(row_i_start:row_i_start+i_max_i-1, :) + & + alpha_ * conjg(ywork_br(1:i_max_i, :)) + #:endif + + if (bi == bj) cycle + + row_j_start = (bj - 1) * br + 1 + if (row_j_start > nrows) cycle + i_max_j = min(br, nrows - row_j_start + 1) + col_i_start = (bi - 1) * bc + 1 + if (col_i_start > ncols) cycle + j_max_i = min(bc, ncols - col_i_start + 1) + + #:if rank == 1 + call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_j_start:), incy=1) + #:else + call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & + c=vec_y(row_j_start:, :), ldc=i_max_j) + #:endif end do - else - ! symmetric-storage: full matrix is symmetric; compute y = alpha * A^H * x - #:if rank == 1 - if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc)) - if(.not.allocated(ywork_br)) allocate(ywork_br(br)) - #:else - if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc,nrhs)) - if(.not.allocated(ywork_br)) allocate(ywork_br(br,nrhs)) - #:endif - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - ! y_i += conjg(A_ij) * x_j == conjg( A_ij * conjg(x_j) ) - #:if rank == 1 - xwork_bc(1:j_max_j) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1)) - ywork_br(1:i_max_i) = zero_${s1}$ - call gemv('N', m=i_max_i, n=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & - x=xwork_bc, incx=1, beta=zero_${s1}$, y=ywork_br, incy=1) - vec_y(row_i_start:row_i_start+i_max_i-1) = vec_y(row_i_start:row_i_start+i_max_i-1) + & - alpha_ * conjg(ywork_br(1:i_max_i)) - #:else - xwork_bc(1:j_max_j, :) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1, :)) - ywork_br(1:i_max_i, :) = zero_${s1}$ - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=xwork_bc, ldb=bc, beta=zero_${s1}$, c=ywork_br, ldc=br) - vec_y(row_i_start:row_i_start+i_max_i-1, :) = vec_y(row_i_start:row_i_start+i_max_i-1, :) + & - alpha_ * conjg(ywork_br(1:i_max_i, :)) - #:endif - - if (bi /= bj) then - row_j_start = (bj - 1) * br + 1 - if (row_j_start > nrows) cycle - i_max_j = min(br, nrows - row_j_start + 1) - col_i_start = (bi - 1) * bc + 1 - if (col_i_start > ncols) cycle - j_max_i = min(bc, ncols - col_i_start + 1) - - ! y_j += A_ij^H * x_i - #:if rank == 1 - call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_j_start:), incy=1) - #:else - call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & - c=vec_y(row_j_start:, :), ldc=i_max_j) - #:endif - end if - end do + end do + + else if( storage == sparse_upper .and. op_ == sparse_op_hermitian ) then + #:if rank == 1 + if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc)) + if(.not.allocated(ywork_br)) allocate(ywork_br(br)) + #:else + if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc,nrhs)) + if(.not.allocated(ywork_br)) allocate(ywork_br(br,nrhs)) + #:endif + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + xwork_bc(1:j_max_j) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1)) + ywork_br(1:i_max_i) = zero_${s1}$ + call gemv('N', m=i_max_i, n=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & + x=xwork_bc, incx=1, beta=zero_${s1}$, y=ywork_br, incy=1) + vec_y(row_i_start:row_i_start+i_max_i-1) = vec_y(row_i_start:row_i_start+i_max_i-1) + & + alpha_ * conjg(ywork_br(1:i_max_i)) + #:else + xwork_bc(1:j_max_j, :) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1, :)) + ywork_br(1:i_max_i, :) = zero_${s1}$ + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & + b=xwork_bc, ldb=bc, beta=zero_${s1}$, c=ywork_br, ldc=br) + vec_y(row_i_start:row_i_start+i_max_i-1, :) = vec_y(row_i_start:row_i_start+i_max_i-1, :) + & + alpha_ * conjg(ywork_br(1:i_max_i, :)) + #:endif + + if (bi == bj) cycle + + row_j_start = (bj - 1) * br + 1 + if (row_j_start > nrows) cycle + i_max_j = min(br, nrows - row_j_start + 1) + col_i_start = (bi - 1) * bc + 1 + if (col_i_start > ncols) cycle + j_max_i = min(bc, ncols - col_i_start + 1) + + #:if rank == 1 + call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_j_start:), incy=1) + #:else + call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & + c=vec_y(row_j_start:, :), ldc=i_max_j) + #:endif end do - end if + end do #:endif - end select + end if end associate end subroutine From 9fa989f1ed1715d52502c2adb41f3b20c87f720f Mon Sep 17 00:00:00 2001 From: jalvesz Date: Sun, 15 Mar 2026 14:57:21 +0100 Subject: [PATCH 05/18] enhance bsr testing --- test/linalg/test_linalg_sparse.fypp | 87 ++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 20 deletions(-) diff --git a/test/linalg/test_linalg_sparse.fypp b/test/linalg/test_linalg_sparse.fypp index 8531ceeae..54665a706 100644 --- a/test/linalg/test_linalg_sparse.fypp +++ b/test/linalg/test_linalg_sparse.fypp @@ -111,39 +111,86 @@ contains #:for k1, t1, s1 in (KINDS_TYPES) block integer, parameter :: wp = ${k1}$ - type(BSR_${s1}$_type) :: BSR - real(wp) :: dense(4,4) + real(wp), parameter :: tol = 10*epsilon(0._wp) + type(BSR_${s1}$_type) :: BSR, BSR2 + real(wp) :: dense(6,6) ${t1}$, allocatable :: vec_x(:) ${t1}$, allocatable :: vec_y(:) ${t1}$, allocatable :: vec_ref(:) + ! |1 0| |6 7| 0 0 + ! |2 1| |8 2| 0 0 + ! ----------------- + ! 0 0 |1 4| 0 0 + ! 0 0 |5 1| 0 0 + ! ----------------- + ! 0 0 |4 3| |7 2| + ! 0 0 |0 0| |0 0| dense = 0._wp - dense(1:2,1:2) = reshape(real([1,3,2,4],kind=wp),[2,2]) - dense(3:4,3:4) = reshape(real([5,7,6,8],kind=wp),[2,2]) - - call BSR%malloc([2,2],4,4,2) - BSR%rowptr = [1,2,3] - BSR%col = [1,2] - BSR%data(:,:,1) = reshape(real([1,3,2,4],kind=wp),[2,2]) - BSR%data(:,:,2) = reshape(real([5,7,6,8],kind=wp),[2,2]) + dense(1:2,1:2) = reshape(real([1,2,0,1],kind=wp),[2,2]) + dense(1:2,3:4) = reshape(real([6,8,7,2],kind=wp),[2,2]) + dense(3:4,3:4) = reshape(real([1,5,4,1],kind=wp),[2,2]) + dense(5:6,3:4) = reshape(real([4,0,3,0],kind=wp),[2,2]) + dense(5:6,5:6) = reshape(real([7,0,2,0],kind=wp),[2,2]) + + call BSR%malloc([2,2],3,3,5) + BSR%storage = sparse_full + BSR%rowptr = [1,3,4,6] + BSR%col = [1,2,2,2,3] + BSR%data(:,:,1) = reshape(real([1,2,0,1],kind=wp),[2,2]) + BSR%data(:,:,2) = reshape(real([6,8,7,2],kind=wp),[2,2]) + BSR%data(:,:,3) = reshape(real([1,5,4,1],kind=wp),[2,2]) + BSR%data(:,:,4) = reshape(real([4,0,3,0],kind=wp),[2,2]) + BSR%data(:,:,5) = reshape(real([7,0,2,0],kind=wp),[2,2]) - allocate( vec_x(4) , source = 1._wp ) - allocate( vec_y(4) , source = 0._wp ) - allocate( vec_ref(4) , source = 0._wp ) + allocate( vec_x(6) , source = 1._wp ) + allocate( vec_y(6) , source = 0._wp ) + allocate( vec_ref(6) , source = 0._wp ) vec_ref = matmul( dense, vec_x ) call spmv( BSR, vec_x, vec_y ) - call check(error, all(vec_y == vec_ref) ) + call check(error, norm2(vec_y - vec_ref) < tol , "error in BSR SpMV" ) if (allocated(error)) return ! Test in-place transpose - vec_y = 1._wp - vec_ref = matmul( transpose(dense), vec_y ) - vec_x = 0._wp - call spmv( BSR, vec_y, vec_x, op=sparse_op_transpose ) - call check(error, all(vec_x == vec_ref) ) + vec_x = 1._wp + vec_ref = matmul( transpose(dense), vec_x ) + vec_y = 0._wp + call spmv( BSR, vec_x, vec_y, op=sparse_op_transpose ) + call check(error, norm2(vec_y - vec_ref) < tol , "error in BSR SpMV transpose" ) + if (allocated(error)) return + + ! Test symmetry storage (upper) + ! |1 0| |6 7| 0 0 + ! |2 1| |8 2| 0 0 + ! ----------------- + ! |6 8| |1 4| 0 0 + ! |7 2| |5 1| 0 0 + ! ----------------- + ! 0 0 0 0 |7 2| + ! 0 0 0 0 |0 0| + dense = 0._wp + dense(1:2,1:2) = reshape(real([1,2,0,1],kind=wp),[2,2]) + dense(1:2,3:4) = reshape(real([6,8,7,2],kind=wp),[2,2]) + dense(3:4,1:2) = reshape(real([6,7,8,2],kind=wp),[2,2]) + dense(3:4,3:4) = reshape(real([1,5,4,1],kind=wp),[2,2]) + dense(5:6,5:6) = reshape(real([7,0,2,0],kind=wp),[2,2]) + + call BSR2%malloc([2,2],3,3,4) + BSR2%storage = sparse_upper + BSR2%rowptr = [1,3,4,5] + BSR2%col = [1,2,2,3] + BSR2%data(:,:,1) = reshape(real([1,2,0,1],kind=wp),[2,2]) + BSR2%data(:,:,2) = reshape(real([6,8,7,2],kind=wp),[2,2]) + BSR2%data(:,:,3) = reshape(real([1,5,4,1],kind=wp),[2,2]) + BSR2%data(:,:,4) = reshape(real([7,0,2,0],kind=wp),[2,2]) + + vec_x = 1._wp + vec_ref = matmul( dense, vec_x ) + vec_y = 0._wp + call spmv( BSR2, vec_x, vec_y ) + call check(error, norm2(vec_y - vec_ref) < tol , "error in BSR symmetric SpMV" ) if (allocated(error)) return - end block #:endfor end subroutine From d58eb0be3c25e846fca7ba6e60e44b83c0a1c1f4 Mon Sep 17 00:00:00 2001 From: jalvesz Date: Sun, 15 Mar 2026 14:57:47 +0100 Subject: [PATCH 06/18] link blas --- src/sparse/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparse/CMakeLists.txt b/src/sparse/CMakeLists.txt index 3cd110f9c..7d5955c24 100644 --- a/src/sparse/CMakeLists.txt +++ b/src/sparse/CMakeLists.txt @@ -15,4 +15,4 @@ set(sparse_f90Files configure_stdlib_target(${PROJECT_NAME}_sparse sparse_f90Files sparse_fppFiles sparse_cppFiles) -target_link_libraries(${PROJECT_NAME}_sparse PUBLIC ${PROJECT_NAME}_constants ${PROJECT_NAME}_sorting) +target_link_libraries(${PROJECT_NAME}_sparse PUBLIC ${PROJECT_NAME}_constants ${PROJECT_NAME}_sorting ${PROJECT_NAME}_blas) From 3982e1af0401466b2a027577d39aa2c1709d7634 Mon Sep 17 00:00:00 2001 From: jalvesz Date: Sun, 15 Mar 2026 17:32:38 +0100 Subject: [PATCH 07/18] split sparse_spmv into module submodules --- src/sparse/CMakeLists.txt | 6 + src/sparse/stdlib_sparse_spmv.fypp | 929 ++--------------------- src/sparse/stdlib_sparse_spmv_bsr.fypp | 332 ++++++++ src/sparse/stdlib_sparse_spmv_coo.fypp | 101 +++ src/sparse/stdlib_sparse_spmv_csc.fypp | 119 +++ src/sparse/stdlib_sparse_spmv_csr.fypp | 123 +++ src/sparse/stdlib_sparse_spmv_ell.fypp | 79 ++ src/sparse/stdlib_sparse_spmv_sellc.fypp | 193 +++++ 8 files changed, 1003 insertions(+), 879 deletions(-) create mode 100644 src/sparse/stdlib_sparse_spmv_bsr.fypp create mode 100644 src/sparse/stdlib_sparse_spmv_coo.fypp create mode 100644 src/sparse/stdlib_sparse_spmv_csc.fypp create mode 100644 src/sparse/stdlib_sparse_spmv_csr.fypp create mode 100644 src/sparse/stdlib_sparse_spmv_ell.fypp create mode 100644 src/sparse/stdlib_sparse_spmv_sellc.fypp diff --git a/src/sparse/CMakeLists.txt b/src/sparse/CMakeLists.txt index 7d5955c24..a75541943 100644 --- a/src/sparse/CMakeLists.txt +++ b/src/sparse/CMakeLists.txt @@ -4,6 +4,12 @@ set(sparse_fppFiles stdlib_sparse_kinds.fypp stdlib_sparse_operators.fypp stdlib_sparse_spmv.fypp + stdlib_sparse_spmv_bsr.fypp + stdlib_sparse_spmv_coo.fypp + stdlib_sparse_spmv_csr.fypp + stdlib_sparse_spmv_csc.fypp + stdlib_sparse_spmv_ell.fypp + stdlib_sparse_spmv_sellc.fypp ) set(sparse_cppFiles diff --git a/src/sparse/stdlib_sparse_spmv.fypp b/src/sparse/stdlib_sparse_spmv.fypp index e744a98d7..87bcf7eef 100644 --- a/src/sparse/stdlib_sparse_spmv.fypp +++ b/src/sparse/stdlib_sparse_spmv.fypp @@ -13,7 +13,6 @@ module stdlib_sparse_spmv use stdlib_sparse_constants use stdlib_sparse_kinds - use stdlib_linalg_blas, only: gemv, gemm implicit none private @@ -24,890 +23,62 @@ module stdlib_sparse_spmv interface spmv #:for k1, t1, s1 in (KINDS_TYPES) #:for rank in RANKS - module procedure :: spmv_coo_${rank}$d_${s1}$ - module procedure :: spmv_bsr_${rank}$d_${s1}$ - module procedure :: spmv_csr_${rank}$d_${s1}$ - module procedure :: spmv_csc_${rank}$d_${s1}$ - module procedure :: spmv_ell_${rank}$d_${s1}$ - #:endfor - module procedure :: spmv_sellc_${s1}$ - #:endfor - end interface - public :: spmv - -contains - - !! spmv_coo - #:for k1, t1, s1 in (KINDS_TYPES) - #:for rank in RANKS - subroutine spmv_coo_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) - type(COO_${s1}$_type), intent(in) :: matrix - ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ - ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ - ${t1}$, intent(in), optional :: alpha - ${t1}$, intent(in), optional :: beta - character(1), intent(in), optional :: op - ${t1}$ :: alpha_ - character(1) :: op_ - integer(ilp) :: k, ik, jk - - op_ = sparse_op_none; if(present(op)) op_ = op - alpha_ = one_${k1}$ - if(present(alpha)) alpha_ = alpha - if(present(beta)) then - vec_y = beta * vec_y - else - vec_y = zero_${s1}$ - endif - - associate( data => matrix%data, index => matrix%index, storage => matrix%storage, nnz => matrix%nnz ) - select case(op_) - case(sparse_op_none) - if(storage == sparse_full) then - do concurrent (k = 1:nnz) - ik = index(1,k) - jk = index(2,k) - vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$jk) - end do - - else - do concurrent (k = 1:nnz) - ik = index(1,k) - jk = index(2,k) - vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$jk) - if( ik==jk ) cycle - vec_y(${rksfx2(rank-1)}$jk) = vec_y(${rksfx2(rank-1)}$jk) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$ik) - end do - - end if - case(sparse_op_transpose) - if(storage == sparse_full) then - do concurrent (k = 1:nnz) - jk = index(1,k) - ik = index(2,k) - vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$jk) - end do - - else - do concurrent (k = 1:nnz) - jk = index(1,k) - ik = index(2,k) - vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$jk) - if( ik==jk ) cycle - vec_y(${rksfx2(rank-1)}$jk) = vec_y(${rksfx2(rank-1)}$jk) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$ik) - end do - - end if - #:if t1.startswith('complex') - case(sparse_op_hermitian) - if(storage == sparse_full) then - do concurrent (k = 1:nnz) - jk = index(1,k) - ik = index(2,k) - vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*conjg(data(k)) * vec_x(${rksfx2(rank-1)}$jk) - end do - - else - do concurrent (k = 1:nnz) - jk = index(1,k) - ik = index(2,k) - vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*conjg(data(k)) * vec_x(${rksfx2(rank-1)}$jk) - if( ik==jk ) cycle - vec_y(${rksfx2(rank-1)}$jk) = vec_y(${rksfx2(rank-1)}$jk) + alpha_*conjg(data(k)) * vec_x(${rksfx2(rank-1)}$ik) - end do - - end if - #:endif - end select - end associate - end subroutine - - #:endfor - #:endfor - - !! spmv_csr - #:for k1, t1, s1 in (KINDS_TYPES) - #:for rank in RANKS - subroutine spmv_csr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) - type(CSR_${s1}$_type), intent(in) :: matrix - ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ - ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ - ${t1}$, intent(in), optional :: alpha - ${t1}$, intent(in), optional :: beta - character(1), intent(in), optional :: op - ${t1}$ :: alpha_ - character(1) :: op_ - integer(ilp) :: i, j - #:if rank == 1 - ${t1}$ :: aux, aux2 - #:else - ${t1}$ :: aux(size(vec_x,dim=1)), aux2(size(vec_x,dim=1)) - #:endif - - op_ = sparse_op_none; if(present(op)) op_ = op - alpha_ = one_${k1}$ - if(present(alpha)) alpha_ = alpha - if(present(beta)) then - vec_y = beta * vec_y - else - vec_y = zero_${s1}$ - endif - - associate( data => matrix%data, col => matrix%col, rowptr => matrix%rowptr, & - & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage ) - - if( storage == sparse_full .and. op_==sparse_op_none ) then - do i = 1, nrows - aux = zero_${k1}$ - do j = rowptr(i), rowptr(i+1)-1 - aux = aux + data(j) * vec_x(${rksfx2(rank-1)}$col(j)) - end do - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * aux - end do - - else if( storage == sparse_full .and. op_==sparse_op_transpose ) then - do i = 1, nrows - aux = alpha_ * vec_x(${rksfx2(rank-1)}$i) - do j = rowptr(i), rowptr(i+1)-1 - vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + data(j) * aux - end do - end do - - else if( storage == sparse_lower .and. op_/=sparse_op_hermitian )then - do i = 1 , nrows - aux = zero_${s1}$ - aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i) - do j = rowptr(i), rowptr(i+1)-2 - aux = aux + data(j) * vec_x(${rksfx2(rank-1)}$col(j)) - vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + data(j) * aux2 - end do - aux = alpha_ * aux + data(j) * aux2 - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + aux - end do - - else if( storage == sparse_upper .and. op_/=sparse_op_hermitian )then - do i = 1 , nrows - aux = vec_x(${rksfx2(rank-1)}$i) * data(rowptr(i)) - aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i) - do j = rowptr(i)+1, rowptr(i+1)-1 - aux = aux + data(j) * vec_x(${rksfx2(rank-1)}$col(j)) - vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + data(j) * aux2 - end do - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * aux - end do - - #:if t1.startswith('complex') - else if( storage == sparse_full .and. op_==sparse_op_hermitian) then - do i = 1, nrows - aux = alpha_ * vec_x(${rksfx2(rank-1)}$i) - do j = rowptr(i), rowptr(i+1)-1 - vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + conjg(data(j)) * aux - end do - end do - - else if( storage == sparse_lower .and. op_==sparse_op_hermitian )then - do i = 1 , nrows - aux = zero_${s1}$ - aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i) - do j = rowptr(i), rowptr(i+1)-2 - aux = aux + conjg(data(j)) * vec_x(${rksfx2(rank-1)}$col(j)) - vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + conjg(data(j)) * aux2 - end do - aux = alpha_ * aux + conjg(data(j)) * aux2 - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + aux - end do - - else if( storage == sparse_upper .and. op_==sparse_op_hermitian )then - do i = 1 , nrows - aux = vec_x(${rksfx2(rank-1)}$i) * conjg(data(rowptr(i))) - aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i) - do j = rowptr(i)+1, rowptr(i+1)-1 - aux = aux + conjg(data(j)) * vec_x(${rksfx2(rank-1)}$col(j)) - vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + conjg(data(j)) * aux2 - end do - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * aux - end do - #:endif - end if - end associate - end subroutine - - #:endfor - #:endfor - - !! spmv_bsr - #:for k1, t1, s1 in (KINDS_TYPES) - #:for rank in RANKS - subroutine spmv_bsr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) - type(BSR_${s1}$_type), intent(in) :: matrix - ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ - ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ - ${t1}$, intent(in), optional :: alpha - ${t1}$, intent(in), optional :: beta - character(1), intent(in), optional :: op - ${t1}$ :: alpha_, beta_ - character(1) :: op_ - integer(ilp) :: nbrow, bi, bj, p - integer(ilp) :: row_i_start, col_j_start, i_max_i, j_max_j - integer(ilp) :: row_j_start, col_i_start, i_max_j, j_max_i - - #:if rank == 2 - integer(ilp) :: nrhs - #:endif - #:if t1.startswith('complex') - #:if rank == 1 - ${t1}$, allocatable :: xwork_bc(:) - ${t1}$, allocatable :: ywork_br(:) - #:else - ${t1}$, allocatable :: xwork_bc(:,:) - ${t1}$, allocatable :: ywork_br(:,:) - #:endif - #:endif - - op_ = sparse_op_none; if(present(op)) op_ = op - alpha_ = one_${s1}$; if(present(alpha)) alpha_ = alpha - beta_ = zero_${s1}$; if(present(beta)) beta_ = beta - if(present(beta)) then - vec_y = beta_ * vec_y - else - vec_y = zero_${s1}$ - endif - - associate( data => matrix%data, col => matrix%col, rowptr => matrix%rowptr, & - & nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage, & - br => matrix%block_shape(1), bc => matrix%block_shape(2) ) - - nbrow = nrows / br - #:if rank == 2 - nrhs = size(vec_x,dim=2) - #:endif - - if( storage == sparse_full .and. op_ == sparse_op_none ) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_i_start:), incy=1) - #:else - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & - c=vec_y(row_i_start:, :), ldc=i_max_i) - #:endif - end do - end do - - else if( storage == sparse_full .and. op_ == sparse_op_transpose ) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(col_j_start:), incy=1) - #:else - call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(row_i_start:, :), ldb=i_max_i, beta=one_${s1}$, & - c=vec_y(col_j_start:, :), ldc=j_max_j) - #:endif - end do - end do - - else if( storage == sparse_lower .and. op_ /= sparse_op_hermitian ) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_i_start:), incy=1) - #:else - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & - c=vec_y(row_i_start:, :), ldc=i_max_i) - #:endif - - if (bi == bj) cycle - - row_j_start = (bj - 1) * br + 1 - if (row_j_start > nrows) cycle - i_max_j = min(br, nrows - row_j_start + 1) - col_i_start = (bi - 1) * bc + 1 - if (col_i_start > ncols) cycle - j_max_i = min(bc, ncols - col_i_start + 1) - - #:if rank == 1 - call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_j_start:), incy=1) - #:else - call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & - c=vec_y(row_j_start:, :), ldc=i_max_j) - #:endif - end do - end do - - else if( storage == sparse_upper .and. op_ /= sparse_op_hermitian ) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_i_start:), incy=1) - #:else - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & - c=vec_y(row_i_start:, :), ldc=i_max_i) - #:endif - - if (bi == bj) cycle - - row_j_start = (bj - 1) * br + 1 - if (row_j_start > nrows) cycle - i_max_j = min(br, nrows - row_j_start + 1) - col_i_start = (bi - 1) * bc + 1 - if (col_i_start > ncols) cycle - j_max_i = min(bc, ncols - col_i_start + 1) - - #:if rank == 1 - call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_j_start:), incy=1) - #:else - call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & - c=vec_y(row_j_start:, :), ldc=i_max_j) - #:endif - end do - end do - - #:if t1.startswith('complex') - else if( storage == sparse_full .and. op_ == sparse_op_hermitian ) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(col_j_start:), incy=1) - #:else - call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(row_i_start:, :), ldb=i_max_i, beta=one_${s1}$, & - c=vec_y(col_j_start:, :), ldc=j_max_j) - #:endif - end do - end do - - else if( storage == sparse_lower .and. op_ == sparse_op_hermitian ) then - #:if rank == 1 - if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc)) - if(.not.allocated(ywork_br)) allocate(ywork_br(br)) - #:else - if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc,nrhs)) - if(.not.allocated(ywork_br)) allocate(ywork_br(br,nrhs)) - #:endif - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - xwork_bc(1:j_max_j) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1)) - ywork_br(1:i_max_i) = zero_${s1}$ - call gemv('N', m=i_max_i, n=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & - x=xwork_bc, incx=1, beta=zero_${s1}$, y=ywork_br, incy=1) - vec_y(row_i_start:row_i_start+i_max_i-1) = vec_y(row_i_start:row_i_start+i_max_i-1) + & - alpha_ * conjg(ywork_br(1:i_max_i)) - #:else - xwork_bc(1:j_max_j, :) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1, :)) - ywork_br(1:i_max_i, :) = zero_${s1}$ - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & - b=xwork_bc, ldb=bc, beta=zero_${s1}$, c=ywork_br, ldc=br) - vec_y(row_i_start:row_i_start+i_max_i-1, :) = vec_y(row_i_start:row_i_start+i_max_i-1, :) + & - alpha_ * conjg(ywork_br(1:i_max_i, :)) - #:endif - - if (bi == bj) cycle - - row_j_start = (bj - 1) * br + 1 - if (row_j_start > nrows) cycle - i_max_j = min(br, nrows - row_j_start + 1) - col_i_start = (bi - 1) * bc + 1 - if (col_i_start > ncols) cycle - j_max_i = min(bc, ncols - col_i_start + 1) - - #:if rank == 1 - call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_j_start:), incy=1) - #:else - call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & - c=vec_y(row_j_start:, :), ldc=i_max_j) - #:endif - end do - end do - - else if( storage == sparse_upper .and. op_ == sparse_op_hermitian ) then - #:if rank == 1 - if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc)) - if(.not.allocated(ywork_br)) allocate(ywork_br(br)) - #:else - if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc,nrhs)) - if(.not.allocated(ywork_br)) allocate(ywork_br(br,nrhs)) - #:endif - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - xwork_bc(1:j_max_j) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1)) - ywork_br(1:i_max_i) = zero_${s1}$ - call gemv('N', m=i_max_i, n=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & - x=xwork_bc, incx=1, beta=zero_${s1}$, y=ywork_br, incy=1) - vec_y(row_i_start:row_i_start+i_max_i-1) = vec_y(row_i_start:row_i_start+i_max_i-1) + & - alpha_ * conjg(ywork_br(1:i_max_i)) - #:else - xwork_bc(1:j_max_j, :) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1, :)) - ywork_br(1:i_max_i, :) = zero_${s1}$ - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & - b=xwork_bc, ldb=bc, beta=zero_${s1}$, c=ywork_br, ldc=br) - vec_y(row_i_start:row_i_start+i_max_i-1, :) = vec_y(row_i_start:row_i_start+i_max_i-1, :) + & - alpha_ * conjg(ywork_br(1:i_max_i, :)) - #:endif - - if (bi == bj) cycle - - row_j_start = (bj - 1) * br + 1 - if (row_j_start > nrows) cycle - i_max_j = min(br, nrows - row_j_start + 1) - col_i_start = (bi - 1) * bc + 1 - if (col_i_start > ncols) cycle - j_max_i = min(bc, ncols - col_i_start + 1) - - #:if rank == 1 - call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_j_start:), incy=1) - #:else - call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & - c=vec_y(row_j_start:, :), ldc=i_max_j) - #:endif - end do - end do - #:endif - end if - end associate - end subroutine - - #:endfor - #:endfor - - !! spmv_csc - #:for k1, t1, s1 in (KINDS_TYPES) - #:for rank in RANKS - subroutine spmv_csc_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) - type(CSC_${s1}$_type), intent(in) :: matrix - ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ - ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ - ${t1}$, intent(in), optional :: alpha - ${t1}$, intent(in), optional :: beta - character(1), intent(in), optional :: op - ${t1}$ :: alpha_ - character(1) :: op_ - integer(ilp) :: i, j - #:if rank == 1 - ${t1}$ :: aux - #:else - ${t1}$ :: aux(size(vec_x,dim=1)) - #:endif - - op_ = sparse_op_none; if(present(op)) op_ = op - alpha_ = one_${k1}$ - if(present(alpha)) alpha_ = alpha - if(present(beta)) then - vec_y = beta * vec_y - else - vec_y = zero_${s1}$ - endif - - associate( data => matrix%data, colptr => matrix%colptr, row => matrix%row, & - & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage ) - if( storage == sparse_full .and. op_==sparse_op_none ) then - do concurrent(j=1:ncols) - aux = alpha_ * vec_x(${rksfx2(rank-1)}$j) - do i = colptr(j), colptr(j+1)-1 - vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + data(i) * aux - end do - end do - - else if( storage == sparse_full .and. op_==sparse_op_transpose ) then - do concurrent(j=1:ncols) - aux = zero_${k1}$ - do i = colptr(j), colptr(j+1)-1 - aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$row(i)) - end do - vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux - end do - - else if( storage == sparse_lower .and. op_/=sparse_op_hermitian )then - do j = 1 , ncols - aux = vec_x(${rksfx2(rank-1)}$j) * data(colptr(j)) - do i = colptr(j)+1, colptr(j+1)-1 - aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$row(i)) - vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * data(i) * vec_x(${rksfx2(rank-1)}$j) - end do - vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux - end do - - else if( storage == sparse_upper .and. op_/=sparse_op_hermitian )then - do j = 1 , ncols - aux = zero_${s1}$ - do i = colptr(j), colptr(i+1)-2 - aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$j) - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * data(i) * vec_x(${rksfx2(rank-1)}$row(i)) - end do - aux = aux + data(colptr(j)) * vec_x(${rksfx2(rank-1)}$j) - vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux - end do - - #:if t1.startswith('complex') - else if( storage == sparse_full .and. op_==sparse_op_hermitian ) then - do concurrent(j=1:ncols) - aux = zero_${k1}$ - do i = colptr(j), colptr(j+1)-1 - aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i)) - end do - vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux - end do - - else if( storage == sparse_lower .and. op_==sparse_op_hermitian )then - do j = 1 , ncols - aux = vec_x(${rksfx2(rank-1)}$j) * conjg(data(colptr(j))) - do i = colptr(j)+1, colptr(j+1)-1 - aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i)) - vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * conjg(data(i)) * vec_x(${rksfx2(rank-1)}$j) - end do - vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux - end do - - else if( storage == sparse_upper .and. op_==sparse_op_hermitian )then - do j = 1 , ncols - aux = zero_${s1}$ - do i = colptr(j), colptr(i+1)-2 - aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$j) - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i)) - end do - aux = aux + conjg(data(colptr(j))) * vec_x(${rksfx2(rank-1)}$j) - vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux - end do - #:endif - end if - end associate - end subroutine - - #:endfor - #:endfor - - !! spmv_ell - #:for k1, t1, s1 in (KINDS_TYPES) - #:for rank in RANKS - subroutine spmv_ell_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) - type(ELL_${s1}$_type), intent(in) :: matrix - ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ - ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ - ${t1}$, intent(in), optional :: alpha - ${t1}$, intent(in), optional :: beta - character(1), intent(in), optional :: op - ${t1}$ :: alpha_ - character(1) :: op_ - integer(ilp) :: i, j, k - - op_ = sparse_op_none; if(present(op)) op_ = op - alpha_ = one_${k1}$ - if(present(alpha)) alpha_ = alpha - if(present(beta)) then - vec_y = beta * vec_y - else - vec_y = zero_${s1}$ - endif - associate( data => matrix%data, index => matrix%index, MNZ_P_ROW => matrix%K, & - & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage ) - if( storage == sparse_full .and. op_==sparse_op_none ) then - do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) - j = index(i,k) - if(j>0) vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$j) - end do - else if( storage == sparse_full .and. op_==sparse_op_transpose ) then - do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) - j = index(i,k) - if(j>0) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$i) - end do - else if( storage /= sparse_full .and. op_/=sparse_op_hermitian ) then - do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) - j = index(i,k) - if(j>0) then - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$j) - if(i==j) cycle - vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$i) - end if - end do - #:if t1.startswith('complex') - else if( storage == sparse_full .and. op_==sparse_op_hermitian ) then - do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) - j = index(i,k) - if(j>0) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$i) - end do - else if( storage /= sparse_full .and. op_==sparse_op_hermitian ) then - do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) - j = index(i,k) - if(j>0) then - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$j) - if(i==j) cycle - vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$i) - end if - end do - #:endif - end if - end associate - end subroutine - - #:endfor - #:endfor - - !! spmv_sellc - #:set CHUNKS = [4,8,16] - #:for k1, t1, s1 in (KINDS_TYPES) - subroutine spmv_sellc_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) - !! This algorithm was gracefully provided by Ivan Privec and adapted by Jose Alves - type(SELLC_${s1}$_type), intent(in) :: matrix - ${t1}$, intent(in) :: vec_x(:) - ${t1}$, intent(inout) :: vec_y(:) - ${t1}$, intent(in), optional :: alpha - ${t1}$, intent(in), optional :: beta - character(1), intent(in), optional :: op - ${t1}$ :: alpha_ - character(1) :: op_ - integer(ilp) :: i, nz, rowidx, num_chunks, rm - - op_ = sparse_op_none; if(present(op)) op_ = op - alpha_ = one_${s1}$ - if(present(alpha)) alpha_ = alpha - if(present(beta)) then - vec_y = beta * vec_y - else - vec_y = zero_${s1}$ - endif - - associate( data => matrix%data, ia => matrix%rowptr , ja => matrix%col, cs => matrix%chunk_size, & - & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage ) - - if( .not.any( ${CHUNKS}$ == cs ) ) then - print *, "error: sellc chunk size not supported." - return - end if - - num_chunks = nrows / cs - rm = nrows - num_chunks * cs - if( storage == sparse_full .and. op_==sparse_op_none ) then - - select case(cs) - #:for chunk in CHUNKS - case(${chunk}$) - do i = 1, num_chunks - nz = ia(i+1) - ia(i) - rowidx = (i - 1)*${chunk}$ + 1 - call chunk_kernel_${chunk}$(nz,data(:,ia(i)),ja(:,ia(i)),vec_x,vec_y(rowidx:)) - end do - #:endfor - end select - - ! remainder - if(rm>0)then - i = num_chunks + 1 - nz = ia(i+1) - ia(i) - rowidx = (i - 1)*cs + 1 - call chunk_kernel_rm(nz,cs,rm,data(:,ia(i)),ja(:,ia(i)),vec_x,vec_y(rowidx:)) - end if - - else if( storage == sparse_full .and. op_==sparse_op_transpose ) then - - select case(cs) - #:for chunk in CHUNKS - case(${chunk}$) - do i = 1, num_chunks - nz = ia(i+1) - ia(i) - rowidx = (i - 1)*${chunk}$ + 1 - call chunk_kernel_trans_${chunk}$(nz,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y) - end do - #:endfor - end select - - ! remainder - if(rm>0)then - i = num_chunks + 1 - nz = ia(i+1) - ia(i) - rowidx = (i - 1)*cs + 1 - call chunk_kernel_rm_trans(nz,cs,rm,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y) - end if - - #:if t1.startswith('complex') - else if( storage == sparse_full .and. op_==sparse_op_hermitian ) then + module subroutine spmv_coo_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(COO_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op + end subroutine - select case(cs) - #:for chunk in CHUNKS - case(${chunk}$) - do i = 1, num_chunks - nz = ia(i+1) - ia(i) - rowidx = (i - 1)*${chunk}$ + 1 - call chunk_kernel_herm_${chunk}$(nz,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y) - end do - #:endfor - end select - - ! remainder - if(rm>0)then - i = num_chunks + 1 - nz = ia(i+1) - ia(i) - rowidx = (i - 1)*cs + 1 - call chunk_kernel_rm_herm(nz,cs,rm,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y) - end if - #:endif - else - print *, "error: sellc format for spmv operation not yet supported." - return - end if - end associate + module subroutine spmv_bsr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(BSR_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op + end subroutine - contains - #:for chunk in CHUNKS - pure subroutine chunk_kernel_${chunk}$(n,a,col,x,y) - integer, value :: n - ${t1}$, intent(in) :: a(${chunk}$,n), x(:) - integer(ilp), intent(in) :: col(${chunk}$,n) - ${t1}$, intent(inout) :: y(${chunk}$) - integer :: j - do j = 1, n - y(:) = y(:) + alpha_ * a(:,j) * x(col(:,j)) - end do + module subroutine spmv_csr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(CSR_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op end subroutine - pure subroutine chunk_kernel_trans_${chunk}$(n,a,col,x,y) - integer, value :: n - ${t1}$, intent(in) :: a(${chunk}$,n), x(${chunk}$) - integer(ilp), intent(in) :: col(${chunk}$,n) - ${t1}$, intent(inout) :: y(:) - integer :: j, k - do j = 1, n - do k = 1, ${chunk}$ - y(col(k,j)) = y(col(k,j)) + alpha_ * a(k,j) * x(k) - end do - end do + + module subroutine spmv_csc_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(CSC_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op end subroutine - #:if t1.startswith('complex') - pure subroutine chunk_kernel_herm_${chunk}$(n,a,col,x,y) - integer, value :: n - ${t1}$, intent(in) :: a(${chunk}$,n), x(${chunk}$) - integer(ilp), intent(in) :: col(${chunk}$,n) - ${t1}$, intent(inout) :: y(:) - integer :: j, k - do j = 1, n - do k = 1, ${chunk}$ - y(col(k,j)) = y(col(k,j)) + alpha_ * conjg(a(k,j)) * x(k) - end do - end do + + module subroutine spmv_ell_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(ELL_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op end subroutine - #:endif #:endfor - - pure subroutine chunk_kernel_rm(n,cs,r,a,col,x,y) - integer, value :: n, cs, r - ${t1}$, intent(in) :: a(cs,n), x(:) - integer(ilp), intent(in) :: col(cs,n) - ${t1}$, intent(inout) :: y(r) - integer :: j - do j = 1, n - y(1:r) = y(1:r) + alpha_ * a(1:r,j) * x(col(1:r,j)) - end do - end subroutine - pure subroutine chunk_kernel_rm_trans(n,cs,r,a,col,x,y) - integer, value :: n, cs, r - ${t1}$, intent(in) :: a(cs,n), x(r) - integer(ilp), intent(in) :: col(cs,n) - ${t1}$, intent(inout) :: y(:) - integer :: j, k - do j = 1, n - do k = 1, r - y(col(k,j)) = y(col(k,j)) + alpha_ * a(k,j) * x(k) - end do - end do - end subroutine - #:if t1.startswith('complex') - pure subroutine chunk_kernel_rm_herm(n,cs,r,a,col,x,y) - integer, value :: n, cs, r - ${t1}$, intent(in) :: a(cs,n), x(r) - integer(ilp), intent(in) :: col(cs,n) - ${t1}$, intent(inout) :: y(:) - integer :: j, k - do j = 1, n - do k = 1, r - y(col(k,j)) = y(col(k,j)) + alpha_ * conjg(a(k,j)) * x(k) - end do - end do - end subroutine - #:endif - end subroutine - - #:endfor + module subroutine spmv_sellc_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(SELLC_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x(:) + ${t1}$, intent(inout) :: vec_y(:) + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op + end subroutine + #:endfor + end interface + public :: spmv end module diff --git a/src/sparse/stdlib_sparse_spmv_bsr.fypp b/src/sparse/stdlib_sparse_spmv_bsr.fypp new file mode 100644 index 000000000..97e7d4eef --- /dev/null +++ b/src/sparse/stdlib_sparse_spmv_bsr.fypp @@ -0,0 +1,332 @@ +#:include "common.fypp" +#:set RANKS = range(1, 2+1) +#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX)) +#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX)) +#:set KINDS_TYPES = R_KINDS_TYPES+C_KINDS_TYPES +#! define ranks without parentheses +#:def rksfx2(rank) +#{if rank > 0}#${":," + ":," * (rank - 1)}$#{endif}# +#:enddef +submodule (stdlib_sparse_spmv) stdlib_sparse_spmv_bsr + use stdlib_linalg_blas, only: gemv, gemm +contains + + !! spmv_bsr + #:for k1, t1, s1 in (KINDS_TYPES) + #:for rank in RANKS + module subroutine spmv_bsr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(BSR_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op + ${t1}$ :: alpha_, beta_ + character(1) :: op_ + integer(ilp) :: nbrow, bi, bj, p + integer(ilp) :: row_i_start, col_j_start, i_max_i, j_max_j + integer(ilp) :: row_j_start, col_i_start, i_max_j, j_max_i + + #:if rank == 2 + integer(ilp) :: nrhs + #:endif + #:if t1.startswith('complex') + #:if rank == 1 + ${t1}$, allocatable :: xwork_bc(:) + ${t1}$, allocatable :: ywork_br(:) + #:else + ${t1}$, allocatable :: xwork_bc(:,:) + ${t1}$, allocatable :: ywork_br(:,:) + #:endif + #:endif + + op_ = sparse_op_none; if(present(op)) op_ = op + alpha_ = one_${s1}$; if(present(alpha)) alpha_ = alpha + beta_ = zero_${s1}$; if(present(beta)) beta_ = beta + if(present(beta)) then + vec_y = beta_ * vec_y + else + vec_y = zero_${s1}$ + endif + + associate( data => matrix%data, col => matrix%col, rowptr => matrix%rowptr, & + & nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage, & + br => matrix%block_shape(1), bc => matrix%block_shape(2) ) + + nbrow = nrows / br + #:if rank == 2 + nrhs = size(vec_x,dim=2) + #:endif + + if( storage == sparse_full .and. op_ == sparse_op_none ) then + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_i_start:), incy=1) + #:else + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & + c=vec_y(row_i_start:, :), ldc=i_max_i) + #:endif + end do + end do + + else if( storage == sparse_full .and. op_ == sparse_op_transpose ) then + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(col_j_start:), incy=1) + #:else + call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(row_i_start:, :), ldb=i_max_i, beta=one_${s1}$, & + c=vec_y(col_j_start:, :), ldc=j_max_j) + #:endif + end do + end do + + else if( storage == sparse_lower .and. op_ /= sparse_op_hermitian ) then + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_i_start:), incy=1) + #:else + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & + c=vec_y(row_i_start:, :), ldc=i_max_i) + #:endif + + if (bi == bj) cycle + + row_j_start = (bj - 1) * br + 1 + if (row_j_start > nrows) cycle + i_max_j = min(br, nrows - row_j_start + 1) + col_i_start = (bi - 1) * bc + 1 + if (col_i_start > ncols) cycle + j_max_i = min(bc, ncols - col_i_start + 1) + + #:if rank == 1 + call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_j_start:), incy=1) + #:else + call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & + c=vec_y(row_j_start:, :), ldc=i_max_j) + #:endif + end do + end do + + else if( storage == sparse_upper .and. op_ /= sparse_op_hermitian ) then + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_i_start:), incy=1) + #:else + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & + c=vec_y(row_i_start:, :), ldc=i_max_i) + #:endif + + if (bi == bj) cycle + + row_j_start = (bj - 1) * br + 1 + if (row_j_start > nrows) cycle + i_max_j = min(br, nrows - row_j_start + 1) + col_i_start = (bi - 1) * bc + 1 + if (col_i_start > ncols) cycle + j_max_i = min(bc, ncols - col_i_start + 1) + + #:if rank == 1 + call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_j_start:), incy=1) + #:else + call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & + c=vec_y(row_j_start:, :), ldc=i_max_j) + #:endif + end do + end do + + #:if t1.startswith('complex') + else if( storage == sparse_full .and. op_ == sparse_op_hermitian ) then + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(col_j_start:), incy=1) + #:else + call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(row_i_start:, :), ldb=i_max_i, beta=one_${s1}$, & + c=vec_y(col_j_start:, :), ldc=j_max_j) + #:endif + end do + end do + + else if( storage == sparse_lower .and. op_ == sparse_op_hermitian ) then + #:if rank == 1 + if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc)) + if(.not.allocated(ywork_br)) allocate(ywork_br(br)) + #:else + if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc,nrhs)) + if(.not.allocated(ywork_br)) allocate(ywork_br(br,nrhs)) + #:endif + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + xwork_bc(1:j_max_j) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1)) + ywork_br(1:i_max_i) = zero_${s1}$ + call gemv('N', m=i_max_i, n=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & + x=xwork_bc, incx=1, beta=zero_${s1}$, y=ywork_br, incy=1) + vec_y(row_i_start:row_i_start+i_max_i-1) = vec_y(row_i_start:row_i_start+i_max_i-1) + & + alpha_ * conjg(ywork_br(1:i_max_i)) + #:else + xwork_bc(1:j_max_j, :) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1, :)) + ywork_br(1:i_max_i, :) = zero_${s1}$ + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & + b=xwork_bc, ldb=bc, beta=zero_${s1}$, c=ywork_br, ldc=br) + vec_y(row_i_start:row_i_start+i_max_i-1, :) = vec_y(row_i_start:row_i_start+i_max_i-1, :) + & + alpha_ * conjg(ywork_br(1:i_max_i, :)) + #:endif + + if (bi == bj) cycle + + row_j_start = (bj - 1) * br + 1 + if (row_j_start > nrows) cycle + i_max_j = min(br, nrows - row_j_start + 1) + col_i_start = (bi - 1) * bc + 1 + if (col_i_start > ncols) cycle + j_max_i = min(bc, ncols - col_i_start + 1) + + #:if rank == 1 + call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_j_start:), incy=1) + #:else + call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & + c=vec_y(row_j_start:, :), ldc=i_max_j) + #:endif + end do + end do + + else if( storage == sparse_upper .and. op_ == sparse_op_hermitian ) then + #:if rank == 1 + if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc)) + if(.not.allocated(ywork_br)) allocate(ywork_br(br)) + #:else + if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc,nrhs)) + if(.not.allocated(ywork_br)) allocate(ywork_br(br,nrhs)) + #:endif + do bi = 1, nbrow + row_i_start = (bi - 1) * br + 1 + if (row_i_start > nrows) cycle + i_max_i = min(br, nrows - row_i_start + 1) + do p = rowptr(bi), rowptr(bi+1)-1 + bj = col(p) + col_j_start = (bj - 1) * bc + 1 + if (col_j_start > ncols) cycle + j_max_j = min(bc, ncols - col_j_start + 1) + + #:if rank == 1 + xwork_bc(1:j_max_j) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1)) + ywork_br(1:i_max_i) = zero_${s1}$ + call gemv('N', m=i_max_i, n=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & + x=xwork_bc, incx=1, beta=zero_${s1}$, y=ywork_br, incy=1) + vec_y(row_i_start:row_i_start+i_max_i-1) = vec_y(row_i_start:row_i_start+i_max_i-1) + & + alpha_ * conjg(ywork_br(1:i_max_i)) + #:else + xwork_bc(1:j_max_j, :) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1, :)) + ywork_br(1:i_max_i, :) = zero_${s1}$ + call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & + b=xwork_bc, ldb=bc, beta=zero_${s1}$, c=ywork_br, ldc=br) + vec_y(row_i_start:row_i_start+i_max_i-1, :) = vec_y(row_i_start:row_i_start+i_max_i-1, :) + & + alpha_ * conjg(ywork_br(1:i_max_i, :)) + #:endif + + if (bi == bj) cycle + + row_j_start = (bj - 1) * br + 1 + if (row_j_start > nrows) cycle + i_max_j = min(br, nrows - row_j_start + 1) + col_i_start = (bi - 1) * bc + 1 + if (col_i_start > ncols) cycle + j_max_i = min(bc, ncols - col_i_start + 1) + + #:if rank == 1 + call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & + x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & + y=vec_y(row_j_start:), incy=1) + #:else + call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & + b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & + c=vec_y(row_j_start:, :), ldc=i_max_j) + #:endif + end do + end do + #:endif + end if + end associate + end subroutine + + #:endfor + #:endfor + +end submodule stdlib_sparse_spmv_bsr \ No newline at end of file diff --git a/src/sparse/stdlib_sparse_spmv_coo.fypp b/src/sparse/stdlib_sparse_spmv_coo.fypp new file mode 100644 index 000000000..4b766c4b7 --- /dev/null +++ b/src/sparse/stdlib_sparse_spmv_coo.fypp @@ -0,0 +1,101 @@ +#:include "common.fypp" +#:set RANKS = range(1, 2+1) +#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX)) +#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX)) +#:set KINDS_TYPES = R_KINDS_TYPES+C_KINDS_TYPES +#! define ranks without parentheses +#:def rksfx2(rank) +#{if rank > 0}#${":," + ":," * (rank - 1)}$#{endif}# +#:enddef +submodule (stdlib_sparse_spmv) stdlib_sparse_spmv_coo +contains + + !! spmv_coo + #:for k1, t1, s1 in (KINDS_TYPES) + #:for rank in RANKS + module subroutine spmv_coo_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(COO_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op + ${t1}$ :: alpha_ + character(1) :: op_ + integer(ilp) :: col_index, entry_index, row_index + + op_ = sparse_op_none; if(present(op)) op_ = op + alpha_ = one_${k1}$ + if(present(alpha)) alpha_ = alpha + if(present(beta)) then + vec_y = beta * vec_y + else + vec_y = zero_${s1}$ + endif + + associate( data => matrix%data, index => matrix%index, storage => matrix%storage, nnz => matrix%nnz ) + select case(op_) + case(sparse_op_none) + if(storage == sparse_full) then + do concurrent (entry_index = 1:nnz) + row_index = index(1,entry_index) + col_index = index(2,entry_index) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$col_index) + end do + + else + do concurrent (entry_index = 1:nnz) + row_index = index(1,entry_index) + col_index = index(2,entry_index) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$col_index) + if( row_index==col_index ) cycle + vec_y(${rksfx2(rank-1)}$col_index) = vec_y(${rksfx2(rank-1)}$col_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$row_index) + end do + + end if + case(sparse_op_transpose) + if(storage == sparse_full) then + do concurrent (entry_index = 1:nnz) + col_index = index(1,entry_index) + row_index = index(2,entry_index) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$col_index) + end do + + else + do concurrent (entry_index = 1:nnz) + col_index = index(1,entry_index) + row_index = index(2,entry_index) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$col_index) + if( row_index==col_index ) cycle + vec_y(${rksfx2(rank-1)}$col_index) = vec_y(${rksfx2(rank-1)}$col_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$row_index) + end do + + end if + #:if t1.startswith('complex') + case(sparse_op_hermitian) + if(storage == sparse_full) then + do concurrent (entry_index = 1:nnz) + col_index = index(1,entry_index) + row_index = index(2,entry_index) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*conjg(data(entry_index)) * vec_x(${rksfx2(rank-1)}$col_index) + end do + + else + do concurrent (entry_index = 1:nnz) + col_index = index(1,entry_index) + row_index = index(2,entry_index) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*conjg(data(entry_index)) * vec_x(${rksfx2(rank-1)}$col_index) + if( row_index==col_index ) cycle + vec_y(${rksfx2(rank-1)}$col_index) = vec_y(${rksfx2(rank-1)}$col_index) + alpha_*conjg(data(entry_index)) * vec_x(${rksfx2(rank-1)}$row_index) + end do + + end if + #:endif + end select + end associate + end subroutine + + #:endfor + #:endfor + +end submodule stdlib_sparse_spmv_coo \ No newline at end of file diff --git a/src/sparse/stdlib_sparse_spmv_csc.fypp b/src/sparse/stdlib_sparse_spmv_csc.fypp new file mode 100644 index 000000000..9341eb574 --- /dev/null +++ b/src/sparse/stdlib_sparse_spmv_csc.fypp @@ -0,0 +1,119 @@ +#:include "common.fypp" +#:set RANKS = range(1, 2+1) +#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX)) +#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX)) +#:set KINDS_TYPES = R_KINDS_TYPES+C_KINDS_TYPES +#! define ranks without parentheses +#:def rksfx2(rank) +#{if rank > 0}#${":," + ":," * (rank - 1)}$#{endif}# +#:enddef +submodule (stdlib_sparse_spmv) stdlib_sparse_spmv_csc +contains + + !! spmv_csc + #:for k1, t1, s1 in (KINDS_TYPES) + #:for rank in RANKS + module subroutine spmv_csc_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(CSC_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op + ${t1}$ :: alpha_ + character(1) :: op_ + integer(ilp) :: i, j + #:if rank == 1 + ${t1}$ :: aux + #:else + ${t1}$ :: aux(size(vec_x,dim=1)) + #:endif + + op_ = sparse_op_none; if(present(op)) op_ = op + alpha_ = one_${k1}$ + if(present(alpha)) alpha_ = alpha + if(present(beta)) then + vec_y = beta * vec_y + else + vec_y = zero_${s1}$ + endif + + associate( data => matrix%data, colptr => matrix%colptr, row => matrix%row, & + & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage ) + if( storage == sparse_full .and. op_==sparse_op_none ) then + do concurrent(j=1:ncols) + aux = alpha_ * vec_x(${rksfx2(rank-1)}$j) + do i = colptr(j), colptr(j+1)-1 + vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + data(i) * aux + end do + end do + + else if( storage == sparse_full .and. op_==sparse_op_transpose ) then + do concurrent(j=1:ncols) + aux = zero_${k1}$ + do i = colptr(j), colptr(j+1)-1 + aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$row(i)) + end do + vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux + end do + + else if( storage == sparse_lower .and. op_/=sparse_op_hermitian )then + do j = 1 , ncols + aux = vec_x(${rksfx2(rank-1)}$j) * data(colptr(j)) + do i = colptr(j)+1, colptr(j+1)-1 + aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$row(i)) + vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * data(i) * vec_x(${rksfx2(rank-1)}$j) + end do + vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux + end do + + else if( storage == sparse_upper .and. op_/=sparse_op_hermitian )then + do j = 1 , ncols + aux = zero_${s1}$ + do i = colptr(j), colptr(i+1)-2 + aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$j) + vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * data(i) * vec_x(${rksfx2(rank-1)}$row(i)) + end do + aux = aux + data(colptr(j)) * vec_x(${rksfx2(rank-1)}$j) + vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux + end do + + #:if t1.startswith('complex') + else if( storage == sparse_full .and. op_==sparse_op_hermitian ) then + do concurrent(j=1:ncols) + aux = zero_${k1}$ + do i = colptr(j), colptr(j+1)-1 + aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i)) + end do + vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux + end do + + else if( storage == sparse_lower .and. op_==sparse_op_hermitian )then + do j = 1 , ncols + aux = vec_x(${rksfx2(rank-1)}$j) * conjg(data(colptr(j))) + do i = colptr(j)+1, colptr(j+1)-1 + aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i)) + vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * conjg(data(i)) * vec_x(${rksfx2(rank-1)}$j) + end do + vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux + end do + + else if( storage == sparse_upper .and. op_==sparse_op_hermitian )then + do j = 1 , ncols + aux = zero_${s1}$ + do i = colptr(j), colptr(i+1)-2 + aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$j) + vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i)) + end do + aux = aux + conjg(data(colptr(j))) * vec_x(${rksfx2(rank-1)}$j) + vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux + end do + #:endif + end if + end associate + end subroutine + + #:endfor + #:endfor + +end submodule stdlib_sparse_spmv_csc \ No newline at end of file diff --git a/src/sparse/stdlib_sparse_spmv_csr.fypp b/src/sparse/stdlib_sparse_spmv_csr.fypp new file mode 100644 index 000000000..929449969 --- /dev/null +++ b/src/sparse/stdlib_sparse_spmv_csr.fypp @@ -0,0 +1,123 @@ +#:include "common.fypp" +#:set RANKS = range(1, 2+1) +#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX)) +#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX)) +#:set KINDS_TYPES = R_KINDS_TYPES+C_KINDS_TYPES +#! define ranks without parentheses +#:def rksfx2(rank) +#{if rank > 0}#${":," + ":," * (rank - 1)}$#{endif}# +#:enddef +submodule (stdlib_sparse_spmv) stdlib_sparse_spmv_csr +contains + + !! spmv_csr + #:for k1, t1, s1 in (KINDS_TYPES) + #:for rank in RANKS + module subroutine spmv_csr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(CSR_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op + ${t1}$ :: alpha_ + character(1) :: op_ + integer(ilp) :: i, j + #:if rank == 1 + ${t1}$ :: aux, aux2 + #:else + ${t1}$ :: aux(size(vec_x,dim=1)), aux2(size(vec_x,dim=1)) + #:endif + + op_ = sparse_op_none; if(present(op)) op_ = op + alpha_ = one_${k1}$ + if(present(alpha)) alpha_ = alpha + if(present(beta)) then + vec_y = beta * vec_y + else + vec_y = zero_${s1}$ + endif + + associate( data => matrix%data, col => matrix%col, rowptr => matrix%rowptr, & + & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage ) + + if( storage == sparse_full .and. op_==sparse_op_none ) then + do i = 1, nrows + aux = zero_${k1}$ + do j = rowptr(i), rowptr(i+1)-1 + aux = aux + data(j) * vec_x(${rksfx2(rank-1)}$col(j)) + end do + vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * aux + end do + + else if( storage == sparse_full .and. op_==sparse_op_transpose ) then + do i = 1, nrows + aux = alpha_ * vec_x(${rksfx2(rank-1)}$i) + do j = rowptr(i), rowptr(i+1)-1 + vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + data(j) * aux + end do + end do + + else if( storage == sparse_lower .and. op_/=sparse_op_hermitian )then + do i = 1 , nrows + aux = zero_${s1}$ + aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i) + do j = rowptr(i), rowptr(i+1)-2 + aux = aux + data(j) * vec_x(${rksfx2(rank-1)}$col(j)) + vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + data(j) * aux2 + end do + aux = alpha_ * aux + data(j) * aux2 + vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + aux + end do + + else if( storage == sparse_upper .and. op_/=sparse_op_hermitian )then + do i = 1 , nrows + aux = vec_x(${rksfx2(rank-1)}$i) * data(rowptr(i)) + aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i) + do j = rowptr(i)+1, rowptr(i+1)-1 + aux = aux + data(j) * vec_x(${rksfx2(rank-1)}$col(j)) + vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + data(j) * aux2 + end do + vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * aux + end do + + #:if t1.startswith('complex') + else if( storage == sparse_full .and. op_==sparse_op_hermitian) then + do i = 1, nrows + aux = alpha_ * vec_x(${rksfx2(rank-1)}$i) + do j = rowptr(i), rowptr(i+1)-1 + vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + conjg(data(j)) * aux + end do + end do + + else if( storage == sparse_lower .and. op_==sparse_op_hermitian )then + do i = 1 , nrows + aux = zero_${s1}$ + aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i) + do j = rowptr(i), rowptr(i+1)-2 + aux = aux + conjg(data(j)) * vec_x(${rksfx2(rank-1)}$col(j)) + vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + conjg(data(j)) * aux2 + end do + aux = alpha_ * aux + conjg(data(j)) * aux2 + vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + aux + end do + + else if( storage == sparse_upper .and. op_==sparse_op_hermitian )then + do i = 1 , nrows + aux = vec_x(${rksfx2(rank-1)}$i) * conjg(data(rowptr(i))) + aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i) + do j = rowptr(i)+1, rowptr(i+1)-1 + aux = aux + conjg(data(j)) * vec_x(${rksfx2(rank-1)}$col(j)) + vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + conjg(data(j)) * aux2 + end do + vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * aux + end do + #:endif + end if + end associate + end subroutine + + #:endfor + #:endfor + +end submodule stdlib_sparse_spmv_csr \ No newline at end of file diff --git a/src/sparse/stdlib_sparse_spmv_ell.fypp b/src/sparse/stdlib_sparse_spmv_ell.fypp new file mode 100644 index 000000000..36102e850 --- /dev/null +++ b/src/sparse/stdlib_sparse_spmv_ell.fypp @@ -0,0 +1,79 @@ +#:include "common.fypp" +#:set RANKS = range(1, 2+1) +#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX)) +#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX)) +#:set KINDS_TYPES = R_KINDS_TYPES+C_KINDS_TYPES +#! define ranks without parentheses +#:def rksfx2(rank) +#{if rank > 0}#${":," + ":," * (rank - 1)}$#{endif}# +#:enddef +submodule (stdlib_sparse_spmv) stdlib_sparse_spmv_ell +contains + + !! spmv_ell + #:for k1, t1, s1 in (KINDS_TYPES) + #:for rank in RANKS + module subroutine spmv_ell_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + type(ELL_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ + ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op + ${t1}$ :: alpha_ + character(1) :: op_ + integer(ilp) :: i, j, k + + op_ = sparse_op_none; if(present(op)) op_ = op + alpha_ = one_${k1}$ + if(present(alpha)) alpha_ = alpha + if(present(beta)) then + vec_y = beta * vec_y + else + vec_y = zero_${s1}$ + endif + associate( data => matrix%data, index => matrix%index, MNZ_P_ROW => matrix%K, & + & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage ) + if( storage == sparse_full .and. op_==sparse_op_none ) then + do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) + j = index(i,k) + if(j>0) vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$j) + end do + else if( storage == sparse_full .and. op_==sparse_op_transpose ) then + do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) + j = index(i,k) + if(j>0) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$i) + end do + else if( storage /= sparse_full .and. op_/=sparse_op_hermitian ) then + do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) + j = index(i,k) + if(j>0) then + vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$j) + if(i==j) cycle + vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$i) + end if + end do + #:if t1.startswith('complex') + else if( storage == sparse_full .and. op_==sparse_op_hermitian ) then + do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) + j = index(i,k) + if(j>0) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$i) + end do + else if( storage /= sparse_full .and. op_==sparse_op_hermitian ) then + do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) + j = index(i,k) + if(j>0) then + vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$j) + if(i==j) cycle + vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$i) + end if + end do + #:endif + end if + end associate + end subroutine + + #:endfor + #:endfor + +end submodule stdlib_sparse_spmv_ell \ No newline at end of file diff --git a/src/sparse/stdlib_sparse_spmv_sellc.fypp b/src/sparse/stdlib_sparse_spmv_sellc.fypp new file mode 100644 index 000000000..52205debd --- /dev/null +++ b/src/sparse/stdlib_sparse_spmv_sellc.fypp @@ -0,0 +1,193 @@ +#:include "common.fypp" +#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX)) +#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX)) +#:set KINDS_TYPES = R_KINDS_TYPES+C_KINDS_TYPES +#:set CHUNKS = [4,8,16] +submodule (stdlib_sparse_spmv) stdlib_sparse_spmv_sellc +contains + + !! spmv_sellc + #:for k1, t1, s1 in (KINDS_TYPES) + module subroutine spmv_sellc_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) + !! This algorithm was gracefully provided by Ivan Privec and adapted by Jose Alves + type(SELLC_${s1}$_type), intent(in) :: matrix + ${t1}$, intent(in) :: vec_x(:) + ${t1}$, intent(inout) :: vec_y(:) + ${t1}$, intent(in), optional :: alpha + ${t1}$, intent(in), optional :: beta + character(1), intent(in), optional :: op + ${t1}$ :: alpha_ + character(1) :: op_ + integer(ilp) :: i, nz, rowidx, num_chunks, rm + + op_ = sparse_op_none; if(present(op)) op_ = op + alpha_ = one_${s1}$ + if(present(alpha)) alpha_ = alpha + if(present(beta)) then + vec_y = beta * vec_y + else + vec_y = zero_${s1}$ + endif + + associate( data => matrix%data, ia => matrix%rowptr , ja => matrix%col, cs => matrix%chunk_size, & + & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage ) + + if( .not.any( ${CHUNKS}$ == cs ) ) then + print *, "error: sellc chunk size not supported." + return + end if + + num_chunks = nrows / cs + rm = nrows - num_chunks * cs + if( storage == sparse_full .and. op_==sparse_op_none ) then + + select case(cs) + #:for chunk in CHUNKS + case(${chunk}$) + do i = 1, num_chunks + nz = ia(i+1) - ia(i) + rowidx = (i - 1)*${chunk}$ + 1 + call chunk_kernel_${chunk}$(nz,data(:,ia(i)),ja(:,ia(i)),vec_x,vec_y(rowidx:)) + end do + #:endfor + end select + + ! remainder + if(rm>0)then + i = num_chunks + 1 + nz = ia(i+1) - ia(i) + rowidx = (i - 1)*cs + 1 + call chunk_kernel_rm(nz,cs,rm,data(:,ia(i)),ja(:,ia(i)),vec_x,vec_y(rowidx:)) + end if + + else if( storage == sparse_full .and. op_==sparse_op_transpose ) then + + select case(cs) + #:for chunk in CHUNKS + case(${chunk}$) + do i = 1, num_chunks + nz = ia(i+1) - ia(i) + rowidx = (i - 1)*${chunk}$ + 1 + call chunk_kernel_trans_${chunk}$(nz,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y) + end do + #:endfor + end select + + ! remainder + if(rm>0)then + i = num_chunks + 1 + nz = ia(i+1) - ia(i) + rowidx = (i - 1)*cs + 1 + call chunk_kernel_rm_trans(nz,cs,rm,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y) + end if + + #:if t1.startswith('complex') + else if( storage == sparse_full .and. op_==sparse_op_hermitian ) then + + select case(cs) + #:for chunk in CHUNKS + case(${chunk}$) + do i = 1, num_chunks + nz = ia(i+1) - ia(i) + rowidx = (i - 1)*${chunk}$ + 1 + call chunk_kernel_herm_${chunk}$(nz,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y) + end do + #:endfor + end select + + ! remainder + if(rm>0)then + i = num_chunks + 1 + nz = ia(i+1) - ia(i) + rowidx = (i - 1)*cs + 1 + call chunk_kernel_rm_herm(nz,cs,rm,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y) + end if + #:endif + else + print *, "error: sellc format for spmv operation not yet supported." + return + end if + end associate + + contains + #:for chunk in CHUNKS + pure subroutine chunk_kernel_${chunk}$(n,a,col,x,y) + integer, value :: n + ${t1}$, intent(in) :: a(${chunk}$,n), x(:) + integer(ilp), intent(in) :: col(${chunk}$,n) + ${t1}$, intent(inout) :: y(${chunk}$) + integer :: j + do j = 1, n + y(:) = y(:) + alpha_ * a(:,j) * x(col(:,j)) + end do + end subroutine + pure subroutine chunk_kernel_trans_${chunk}$(n,a,col,x,y) + integer, value :: n + ${t1}$, intent(in) :: a(${chunk}$,n), x(${chunk}$) + integer(ilp), intent(in) :: col(${chunk}$,n) + ${t1}$, intent(inout) :: y(:) + integer :: j, k + do j = 1, n + do k = 1, ${chunk}$ + y(col(k,j)) = y(col(k,j)) + alpha_ * a(k,j) * x(k) + end do + end do + end subroutine + #:if t1.startswith('complex') + pure subroutine chunk_kernel_herm_${chunk}$(n,a,col,x,y) + integer, value :: n + ${t1}$, intent(in) :: a(${chunk}$,n), x(${chunk}$) + integer(ilp), intent(in) :: col(${chunk}$,n) + ${t1}$, intent(inout) :: y(:) + integer :: j, k + do j = 1, n + do k = 1, ${chunk}$ + y(col(k,j)) = y(col(k,j)) + alpha_ * conjg(a(k,j)) * x(k) + end do + end do + end subroutine + #:endif + #:endfor + + pure subroutine chunk_kernel_rm(n,cs,r,a,col,x,y) + integer, value :: n, cs, r + ${t1}$, intent(in) :: a(cs,n), x(:) + integer(ilp), intent(in) :: col(cs,n) + ${t1}$, intent(inout) :: y(r) + integer :: j + do j = 1, n + y(1:r) = y(1:r) + alpha_ * a(1:r,j) * x(col(1:r,j)) + end do + end subroutine + pure subroutine chunk_kernel_rm_trans(n,cs,r,a,col,x,y) + integer, value :: n, cs, r + ${t1}$, intent(in) :: a(cs,n), x(r) + integer(ilp), intent(in) :: col(cs,n) + ${t1}$, intent(inout) :: y(:) + integer :: j, k + do j = 1, n + do k = 1, r + y(col(k,j)) = y(col(k,j)) + alpha_ * a(k,j) * x(k) + end do + end do + end subroutine + #:if t1.startswith('complex') + pure subroutine chunk_kernel_rm_herm(n,cs,r,a,col,x,y) + integer, value :: n, cs, r + ${t1}$, intent(in) :: a(cs,n), x(r) + integer(ilp), intent(in) :: col(cs,n) + ${t1}$, intent(inout) :: y(:) + integer :: j, k + do j = 1, n + do k = 1, r + y(col(k,j)) = y(col(k,j)) + alpha_ * conjg(a(k,j)) * x(k) + end do + end do + end subroutine + #:endif + + end subroutine + + #:endfor + +end submodule stdlib_sparse_spmv_sellc \ No newline at end of file From b1eb77f0ec64a8e5230a759b88cfdecadf55776b Mon Sep 17 00:00:00 2001 From: jalvesz Date: Sun, 15 Mar 2026 18:20:22 +0100 Subject: [PATCH 08/18] bsr add blocks --- doc/specs/stdlib_sparse.md | 13 +++++++++++-- src/sparse/stdlib_sparse_kinds.fypp | 27 ++++++++++----------------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/doc/specs/stdlib_sparse.md b/doc/specs/stdlib_sparse.md index 4d0b6df5b..bb7769428 100644 --- a/doc/specs/stdlib_sparse.md +++ b/doc/specs/stdlib_sparse.md @@ -146,16 +146,25 @@ Type-bound procedures to enable adding data in a sparse matrix. ### Syntax -`call matrix%add(i,j,v)` or +* Add single value +`call matrix%add(i,j,v)` + +* Add a block of values `call matrix%add(i(:),j(:),v(:,:))` +* BSR format: add a single block +`call bsr_matrix%add(i,j,v(:,:))` + +* BSR format: add a block of blocks +`call bsr_matrix%add(i(:),j(:),v(:,:,:,:))` + ### Arguments `i`: Shall be an integer value or rank-1 array. It is an `intent(in)` argument. `j`: Shall be an integer value or rank-1 array. It is an `intent(in)` argument. -`v`: Shall be a `real` or `complex` value or rank-2 array. The type shall be in accordance to the declared sparse matrix object. It is an `intent(in)` argument. +`v`: Shall be a `real` or `complex` value or rank-2 array. For the `BSR` format type, the unit input is a single matrix block (rank-2 array), alternatively, a block of blocks can be added unsing a rank-4 array. The type shall be in accordance to the declared sparse matrix object. It is an `intent(in)` argument. ## `at`- sparse matrix data accessors diff --git a/src/sparse/stdlib_sparse_kinds.fypp b/src/sparse/stdlib_sparse_kinds.fypp index 8bc0a9ec1..f8b16bc2d 100644 --- a/src/sparse/stdlib_sparse_kinds.fypp +++ b/src/sparse/stdlib_sparse_kinds.fypp @@ -725,7 +725,7 @@ contains class(BSR_${s1}$_type), intent(in) :: self integer(ilp), intent(in) :: ik, jk integer(ilp) :: k, br, bc, ib, jb, ii, jj, ik_, jk_ - logical :: transpose + logical(c_bool) :: transpose if( (ik<1 .or. ik>self%nrows) .or. (jk<1 .or. jk>self%ncols) ) then val = ieee_value( 0._${k1}$ , ieee_quiet_nan) @@ -766,12 +766,7 @@ contains ${t1}$, intent(in) :: val(:,:) integer(ilp), intent(in) :: ik, jk integer(ilp) :: k - - if(size(val,1) /= self%block_shape(1) .or. size(val,2) /= self%block_shape(2)) then - print *, "Warning: block size mismatch in add_bvalue_bsr" - return - end if - + ! naive implementation do k = self%rowptr(ik), self%rowptr(ik+1)-1 if( jk == self%col(k) ) then self%data(:,:,k) = self%data(:,:,k) + val(:,:) @@ -784,19 +779,17 @@ contains !> add a blocks matrices to the BSR matrix subroutine add_block_bsr_${s1}$(self,ik,jk,val) class(BSR_${s1}$_type), intent(inout) :: self - ${t1}$, intent(in) :: val(:,:,:,:) + ${t1}$, intent(in) :: val(:,:,:,:) !> 4D array to hold multiple blocks integer(ilp), intent(in) :: ik(:), jk(:) - integer(ilp) :: i, j, k - - if(size(val,1) /= self%block_shape(1) .or. size(val,2) /= self%block_shape(2)) then - print *, "Warning: block size mismatch in add_bvalue_bsr" - return - end if - + integer(ilp) :: i, j, k, brow, bcol + ! naive implementation do i = 1, size(ik) - do k = self%rowptr(ik(i)), self%rowptr(ik(i)+1)-1 + brow = ik(i) + do k = self%rowptr(brow), self%rowptr(brow+1)-1 do j = 1, size(jk) - if( jk(j) == self%col(k) ) then + bcol = jk(j) + if( skip(self%storage,brow,bcol) ) cycle + if( bcol == self%col(k) ) then self%data(:,:,k) = self%data(:,:,k) + val(:,:,i,j) end if end do From be7930a63530fa3b644ef07955eefa121b576512 Mon Sep 17 00:00:00 2001 From: Jose Alves Date: Fri, 27 Mar 2026 09:30:28 +0100 Subject: [PATCH 09/18] cleanup unrelated changes --- doc/specs/stdlib_sparse.md | 8 +- src/sparse/CMakeLists.txt | 3 +- src/sparse/stdlib_sparse_kinds.fypp | 150 ----------- src/sparse/stdlib_sparse_spmv.fypp | 9 - src/sparse/stdlib_sparse_spmv_bsr.fypp | 332 ------------------------- test/linalg/test_linalg_sparse.fypp | 91 ------- 6 files changed, 2 insertions(+), 591 deletions(-) delete mode 100644 src/sparse/stdlib_sparse_spmv_bsr.fypp diff --git a/doc/specs/stdlib_sparse.md b/doc/specs/stdlib_sparse.md index 85c818f68..f31138180 100644 --- a/doc/specs/stdlib_sparse.md +++ b/doc/specs/stdlib_sparse.md @@ -152,19 +152,13 @@ Type-bound procedures to enable adding data in a sparse matrix. * Add a block of values `call matrix%add(i(:),j(:),v(:,:))` -* BSR format: add a single block -`call bsr_matrix%add(i,j,v(:,:))` - -* BSR format: add a block of blocks -`call bsr_matrix%add(i(:),j(:),v(:,:,:,:))` - ### Arguments `i`: Shall be an integer value or rank-1 array. It is an `intent(in)` argument. `j`: Shall be an integer value or rank-1 array. It is an `intent(in)` argument. -`v`: Shall be a `real` or `complex` value or rank-2 array. For the `BSR` format type, the unit input is a single matrix block (rank-2 array), alternatively, a block of blocks can be added unsing a rank-4 array. The type shall be in accordance to the declared sparse matrix object. It is an `intent(in)` argument. +`v`: Shall be a `real` or `complex` value or rank-2 array. The type shall be in accordance to the declared sparse matrix object. It is an `intent(in)` argument. ## `at`- sparse matrix data accessors diff --git a/src/sparse/CMakeLists.txt b/src/sparse/CMakeLists.txt index a75541943..785c15bda 100644 --- a/src/sparse/CMakeLists.txt +++ b/src/sparse/CMakeLists.txt @@ -4,7 +4,6 @@ set(sparse_fppFiles stdlib_sparse_kinds.fypp stdlib_sparse_operators.fypp stdlib_sparse_spmv.fypp - stdlib_sparse_spmv_bsr.fypp stdlib_sparse_spmv_coo.fypp stdlib_sparse_spmv_csr.fypp stdlib_sparse_spmv_csc.fypp @@ -21,4 +20,4 @@ set(sparse_f90Files configure_stdlib_target(${PROJECT_NAME}_sparse sparse_f90Files sparse_fppFiles sparse_cppFiles) -target_link_libraries(${PROJECT_NAME}_sparse PUBLIC ${PROJECT_NAME}_constants ${PROJECT_NAME}_sorting ${PROJECT_NAME}_blas) +target_link_libraries(${PROJECT_NAME}_sparse PUBLIC ${PROJECT_NAME}_constants ${PROJECT_NAME}_sorting) diff --git a/src/sparse/stdlib_sparse_kinds.fypp b/src/sparse/stdlib_sparse_kinds.fypp index f8b16bc2d..5c6a706ab 100644 --- a/src/sparse/stdlib_sparse_kinds.fypp +++ b/src/sparse/stdlib_sparse_kinds.fypp @@ -50,28 +50,6 @@ module stdlib_sparse_kinds end type #:endfor - !! version: experimental - !! - !! BSR: Compressed sparse row or Yale format - type, public, extends(sparse_type) :: BSR_type - integer(ilp) :: block_shape(2) = (/1,1/) !! shape of the blocks - integer(ilp), allocatable :: col(:) !! matrix column pointer - integer(ilp), allocatable :: rowptr(:) !! matrix row pointer - contains - procedure :: malloc => malloc_bsr - end type - - #:for k1, t1, s1 in (KINDS_TYPES) - type, public, extends(BSR_type) :: BSR_${s1}$_type - ${t1}$, allocatable :: data(:,:,:) - contains - procedure, non_overridable :: at => at_value_bsr_${s1}$ - procedure, non_overridable :: add_value => add_bvalue_bsr_${s1}$ - procedure, non_overridable :: add_block => add_block_bsr_${s1}$ - generic :: add => add_value, add_block - end type - #:endfor - !! version: experimental !! !! CSR: Compressed sparse row or Yale format @@ -348,55 +326,6 @@ contains end select end subroutine - !! (re)Allocate matrix memory for the BSR type - subroutine malloc_bsr(self,block_shape,num_brows,num_bcols,bnnz) - class(BSR_type) :: self - integer(ilp), intent(in) :: block_shape(2) !! shape of the blocks - integer(ilp), intent(in) :: num_brows !! number of block rows - integer(ilp), intent(in) :: num_bcols !! number of block columns - integer(ilp), intent(in) :: bnnz !! number of non-zero blocks - integer(ilp), allocatable :: temp_idx(:) - integer(ilp) :: br, bc - !----------------------------------------------------- - - br = max(block_shape(1), 1) - bc = max(block_shape(2), 1) - - self%block_shape = (/br,bc/) - self%nrows = num_brows * br - self%ncols = num_bcols * bc - self%nnz = br*bc*bnnz - - if(.not.allocated(self%col)) then - allocate(temp_idx(bnnz) , source = 0 ) - else - allocate(temp_idx(bnnz) , source = self%col ) - end if - call move_alloc(from=temp_idx,to=self%col) - - if(.not.allocated(self%rowptr)) then - allocate(temp_idx(num_brows+1) , source = 0 ) - else - allocate(temp_idx(num_brows+1) , source = self%rowptr ) - end if - call move_alloc(from=temp_idx,to=self%rowptr) - - select type(self) - #:for k1, t1, s1 in (KINDS_TYPES) - type is(BSR_${s1}$_type) - block - ${t1}$, allocatable :: temp_data_${s1}$(:,:,:) - if(.not.allocated(self%data)) then - allocate(temp_data_${s1}$(br,bc,bnnz) , source = zero_${s1}$ ) - else - allocate(temp_data_${s1}$(br,bc,bnnz) , source = self%data ) - end if - call move_alloc(from=temp_data_${s1}$,to=self%data) - end block - #:endfor - end select - end subroutine - !================================================================== ! data accessors !================================================================== @@ -719,83 +648,4 @@ contains end subroutine #:endfor - - #:for k1, t1, s1 in (KINDS_TYPES) - pure ${t1}$ function at_value_bsr_${s1}$(self,ik,jk) result(val) - class(BSR_${s1}$_type), intent(in) :: self - integer(ilp), intent(in) :: ik, jk - integer(ilp) :: k, br, bc, ib, jb, ii, jj, ik_, jk_ - logical(c_bool) :: transpose - - if( (ik<1 .or. ik>self%nrows) .or. (jk<1 .or. jk>self%ncols) ) then - val = ieee_value( 0._${k1}$ , ieee_quiet_nan) - return - end if - - ik_ = ik; jk_ = jk - transpose = (self%storage == sparse_lower .and. ik > jk) .or. (self%storage == sparse_upper .and. ik < jk) - if(transpose) then - ik_ = jk; jk_ = ik - end if - - br = self%block_shape(1) - bc = self%block_shape(2) - - ib = (ik_ - 1) / br + 1 - jb = (jk_ - 1) / bc + 1 - ii = modulo(ik_ - 1, br) + 1 - jj = modulo(jk_ - 1, bc) + 1 - - if(.not.allocated(self%rowptr) .or. ib < 1 .or. ib >= size(self%rowptr)) then - val = zero_${s1}$ - return - end if - - do k = self%rowptr(ib), self%rowptr(ib+1)-1 - if( jb == self%col(k) ) then - val = self%data(ii,jj,k) - return - end if - end do - val = zero_${s1}$ - end function - - !> add a single block value to the BSR matrix - subroutine add_bvalue_bsr_${s1}$(self,ik,jk,val) - class(BSR_${s1}$_type), intent(inout) :: self - ${t1}$, intent(in) :: val(:,:) - integer(ilp), intent(in) :: ik, jk - integer(ilp) :: k - ! naive implementation - do k = self%rowptr(ik), self%rowptr(ik+1)-1 - if( jk == self%col(k) ) then - self%data(:,:,k) = self%data(:,:,k) + val(:,:) - return - end if - end do - - end subroutine - - !> add a blocks matrices to the BSR matrix - subroutine add_block_bsr_${s1}$(self,ik,jk,val) - class(BSR_${s1}$_type), intent(inout) :: self - ${t1}$, intent(in) :: val(:,:,:,:) !> 4D array to hold multiple blocks - integer(ilp), intent(in) :: ik(:), jk(:) - integer(ilp) :: i, j, k, brow, bcol - ! naive implementation - do i = 1, size(ik) - brow = ik(i) - do k = self%rowptr(brow), self%rowptr(brow+1)-1 - do j = 1, size(jk) - bcol = jk(j) - if( skip(self%storage,brow,bcol) ) cycle - if( bcol == self%col(k) ) then - self%data(:,:,k) = self%data(:,:,k) + val(:,:,i,j) - end if - end do - end do - end do - end subroutine - - #:endfor end module stdlib_sparse_kinds diff --git a/src/sparse/stdlib_sparse_spmv.fypp b/src/sparse/stdlib_sparse_spmv.fypp index 87bcf7eef..5524ef7c0 100644 --- a/src/sparse/stdlib_sparse_spmv.fypp +++ b/src/sparse/stdlib_sparse_spmv.fypp @@ -32,15 +32,6 @@ module stdlib_sparse_spmv character(1), intent(in), optional :: op end subroutine - module subroutine spmv_bsr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) - type(BSR_${s1}$_type), intent(in) :: matrix - ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ - ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ - ${t1}$, intent(in), optional :: alpha - ${t1}$, intent(in), optional :: beta - character(1), intent(in), optional :: op - end subroutine - module subroutine spmv_csr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) type(CSR_${s1}$_type), intent(in) :: matrix ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ diff --git a/src/sparse/stdlib_sparse_spmv_bsr.fypp b/src/sparse/stdlib_sparse_spmv_bsr.fypp deleted file mode 100644 index 97e7d4eef..000000000 --- a/src/sparse/stdlib_sparse_spmv_bsr.fypp +++ /dev/null @@ -1,332 +0,0 @@ -#:include "common.fypp" -#:set RANKS = range(1, 2+1) -#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX)) -#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX)) -#:set KINDS_TYPES = R_KINDS_TYPES+C_KINDS_TYPES -#! define ranks without parentheses -#:def rksfx2(rank) -#{if rank > 0}#${":," + ":," * (rank - 1)}$#{endif}# -#:enddef -submodule (stdlib_sparse_spmv) stdlib_sparse_spmv_bsr - use stdlib_linalg_blas, only: gemv, gemm -contains - - !! spmv_bsr - #:for k1, t1, s1 in (KINDS_TYPES) - #:for rank in RANKS - module subroutine spmv_bsr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op) - type(BSR_${s1}$_type), intent(in) :: matrix - ${t1}$, intent(in) :: vec_x${ranksuffix(rank)}$ - ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$ - ${t1}$, intent(in), optional :: alpha - ${t1}$, intent(in), optional :: beta - character(1), intent(in), optional :: op - ${t1}$ :: alpha_, beta_ - character(1) :: op_ - integer(ilp) :: nbrow, bi, bj, p - integer(ilp) :: row_i_start, col_j_start, i_max_i, j_max_j - integer(ilp) :: row_j_start, col_i_start, i_max_j, j_max_i - - #:if rank == 2 - integer(ilp) :: nrhs - #:endif - #:if t1.startswith('complex') - #:if rank == 1 - ${t1}$, allocatable :: xwork_bc(:) - ${t1}$, allocatable :: ywork_br(:) - #:else - ${t1}$, allocatable :: xwork_bc(:,:) - ${t1}$, allocatable :: ywork_br(:,:) - #:endif - #:endif - - op_ = sparse_op_none; if(present(op)) op_ = op - alpha_ = one_${s1}$; if(present(alpha)) alpha_ = alpha - beta_ = zero_${s1}$; if(present(beta)) beta_ = beta - if(present(beta)) then - vec_y = beta_ * vec_y - else - vec_y = zero_${s1}$ - endif - - associate( data => matrix%data, col => matrix%col, rowptr => matrix%rowptr, & - & nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage, & - br => matrix%block_shape(1), bc => matrix%block_shape(2) ) - - nbrow = nrows / br - #:if rank == 2 - nrhs = size(vec_x,dim=2) - #:endif - - if( storage == sparse_full .and. op_ == sparse_op_none ) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_i_start:), incy=1) - #:else - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & - c=vec_y(row_i_start:, :), ldc=i_max_i) - #:endif - end do - end do - - else if( storage == sparse_full .and. op_ == sparse_op_transpose ) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(col_j_start:), incy=1) - #:else - call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(row_i_start:, :), ldb=i_max_i, beta=one_${s1}$, & - c=vec_y(col_j_start:, :), ldc=j_max_j) - #:endif - end do - end do - - else if( storage == sparse_lower .and. op_ /= sparse_op_hermitian ) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_i_start:), incy=1) - #:else - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & - c=vec_y(row_i_start:, :), ldc=i_max_i) - #:endif - - if (bi == bj) cycle - - row_j_start = (bj - 1) * br + 1 - if (row_j_start > nrows) cycle - i_max_j = min(br, nrows - row_j_start + 1) - col_i_start = (bi - 1) * bc + 1 - if (col_i_start > ncols) cycle - j_max_i = min(bc, ncols - col_i_start + 1) - - #:if rank == 1 - call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_j_start:), incy=1) - #:else - call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & - c=vec_y(row_j_start:, :), ldc=i_max_j) - #:endif - end do - end do - - else if( storage == sparse_upper .and. op_ /= sparse_op_hermitian ) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('N', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_j_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_i_start:), incy=1) - #:else - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_j_start:, :), ldb=j_max_j, beta=one_${s1}$, & - c=vec_y(row_i_start:, :), ldc=i_max_i) - #:endif - - if (bi == bj) cycle - - row_j_start = (bj - 1) * br + 1 - if (row_j_start > nrows) cycle - i_max_j = min(br, nrows - row_j_start + 1) - col_i_start = (bi - 1) * bc + 1 - if (col_i_start > ncols) cycle - j_max_i = min(bc, ncols - col_i_start + 1) - - #:if rank == 1 - call gemv('T', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_j_start:), incy=1) - #:else - call gemm('T','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & - c=vec_y(row_j_start:, :), ldc=i_max_j) - #:endif - end do - end do - - #:if t1.startswith('complex') - else if( storage == sparse_full .and. op_ == sparse_op_hermitian ) then - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(row_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(col_j_start:), incy=1) - #:else - call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(row_i_start:, :), ldb=i_max_i, beta=one_${s1}$, & - c=vec_y(col_j_start:, :), ldc=j_max_j) - #:endif - end do - end do - - else if( storage == sparse_lower .and. op_ == sparse_op_hermitian ) then - #:if rank == 1 - if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc)) - if(.not.allocated(ywork_br)) allocate(ywork_br(br)) - #:else - if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc,nrhs)) - if(.not.allocated(ywork_br)) allocate(ywork_br(br,nrhs)) - #:endif - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - xwork_bc(1:j_max_j) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1)) - ywork_br(1:i_max_i) = zero_${s1}$ - call gemv('N', m=i_max_i, n=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & - x=xwork_bc, incx=1, beta=zero_${s1}$, y=ywork_br, incy=1) - vec_y(row_i_start:row_i_start+i_max_i-1) = vec_y(row_i_start:row_i_start+i_max_i-1) + & - alpha_ * conjg(ywork_br(1:i_max_i)) - #:else - xwork_bc(1:j_max_j, :) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1, :)) - ywork_br(1:i_max_i, :) = zero_${s1}$ - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & - b=xwork_bc, ldb=bc, beta=zero_${s1}$, c=ywork_br, ldc=br) - vec_y(row_i_start:row_i_start+i_max_i-1, :) = vec_y(row_i_start:row_i_start+i_max_i-1, :) + & - alpha_ * conjg(ywork_br(1:i_max_i, :)) - #:endif - - if (bi == bj) cycle - - row_j_start = (bj - 1) * br + 1 - if (row_j_start > nrows) cycle - i_max_j = min(br, nrows - row_j_start + 1) - col_i_start = (bi - 1) * bc + 1 - if (col_i_start > ncols) cycle - j_max_i = min(bc, ncols - col_i_start + 1) - - #:if rank == 1 - call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_j_start:), incy=1) - #:else - call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & - c=vec_y(row_j_start:, :), ldc=i_max_j) - #:endif - end do - end do - - else if( storage == sparse_upper .and. op_ == sparse_op_hermitian ) then - #:if rank == 1 - if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc)) - if(.not.allocated(ywork_br)) allocate(ywork_br(br)) - #:else - if(.not.allocated(xwork_bc)) allocate(xwork_bc(bc,nrhs)) - if(.not.allocated(ywork_br)) allocate(ywork_br(br,nrhs)) - #:endif - do bi = 1, nbrow - row_i_start = (bi - 1) * br + 1 - if (row_i_start > nrows) cycle - i_max_i = min(br, nrows - row_i_start + 1) - do p = rowptr(bi), rowptr(bi+1)-1 - bj = col(p) - col_j_start = (bj - 1) * bc + 1 - if (col_j_start > ncols) cycle - j_max_j = min(bc, ncols - col_j_start + 1) - - #:if rank == 1 - xwork_bc(1:j_max_j) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1)) - ywork_br(1:i_max_i) = zero_${s1}$ - call gemv('N', m=i_max_i, n=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & - x=xwork_bc, incx=1, beta=zero_${s1}$, y=ywork_br, incy=1) - vec_y(row_i_start:row_i_start+i_max_i-1) = vec_y(row_i_start:row_i_start+i_max_i-1) + & - alpha_ * conjg(ywork_br(1:i_max_i)) - #:else - xwork_bc(1:j_max_j, :) = conjg(vec_x(col_j_start:col_j_start+j_max_j-1, :)) - ywork_br(1:i_max_i, :) = zero_${s1}$ - call gemm('N','N', m=i_max_i, n=nrhs, k=j_max_j, alpha=one_${s1}$, a=data(1:,1:,p), lda=br, & - b=xwork_bc, ldb=bc, beta=zero_${s1}$, c=ywork_br, ldc=br) - vec_y(row_i_start:row_i_start+i_max_i-1, :) = vec_y(row_i_start:row_i_start+i_max_i-1, :) + & - alpha_ * conjg(ywork_br(1:i_max_i, :)) - #:endif - - if (bi == bj) cycle - - row_j_start = (bj - 1) * br + 1 - if (row_j_start > nrows) cycle - i_max_j = min(br, nrows - row_j_start + 1) - col_i_start = (bi - 1) * bc + 1 - if (col_i_start > ncols) cycle - j_max_i = min(bc, ncols - col_i_start + 1) - - #:if rank == 1 - call gemv('C', m=i_max_i, n=j_max_j, alpha=alpha_, a=data(1:,1:,p), lda=br, & - x=vec_x(col_i_start:), incx=1, beta=one_${s1}$, & - y=vec_y(row_j_start:), incy=1) - #:else - call gemm('C','N', m=j_max_j, n=nrhs, k=i_max_i, alpha=alpha_, a=data(1:,1:,p), lda=br, & - b=vec_x(col_i_start:, :), ldb=j_max_i, beta=one_${s1}$, & - c=vec_y(row_j_start:, :), ldc=i_max_j) - #:endif - end do - end do - #:endif - end if - end associate - end subroutine - - #:endfor - #:endfor - -end submodule stdlib_sparse_spmv_bsr \ No newline at end of file diff --git a/test/linalg/test_linalg_sparse.fypp b/test/linalg/test_linalg_sparse.fypp index 54665a706..429c39310 100644 --- a/test/linalg/test_linalg_sparse.fypp +++ b/test/linalg/test_linalg_sparse.fypp @@ -22,7 +22,6 @@ contains testsuite = [ & new_unittest('coo', test_coo), & new_unittest('coo2ordered', test_coo2ordered), & - new_unittest('bsr', test_bsr), & new_unittest('csr', test_csr), & new_unittest('csc', test_csc), & new_unittest('ell', test_ell), & @@ -105,96 +104,6 @@ contains end subroutine - subroutine test_bsr(error) - !> Error handling - type(error_type), allocatable, intent(out) :: error - #:for k1, t1, s1 in (KINDS_TYPES) - block - integer, parameter :: wp = ${k1}$ - real(wp), parameter :: tol = 10*epsilon(0._wp) - type(BSR_${s1}$_type) :: BSR, BSR2 - real(wp) :: dense(6,6) - ${t1}$, allocatable :: vec_x(:) - ${t1}$, allocatable :: vec_y(:) - ${t1}$, allocatable :: vec_ref(:) - - ! |1 0| |6 7| 0 0 - ! |2 1| |8 2| 0 0 - ! ----------------- - ! 0 0 |1 4| 0 0 - ! 0 0 |5 1| 0 0 - ! ----------------- - ! 0 0 |4 3| |7 2| - ! 0 0 |0 0| |0 0| - dense = 0._wp - dense(1:2,1:2) = reshape(real([1,2,0,1],kind=wp),[2,2]) - dense(1:2,3:4) = reshape(real([6,8,7,2],kind=wp),[2,2]) - dense(3:4,3:4) = reshape(real([1,5,4,1],kind=wp),[2,2]) - dense(5:6,3:4) = reshape(real([4,0,3,0],kind=wp),[2,2]) - dense(5:6,5:6) = reshape(real([7,0,2,0],kind=wp),[2,2]) - - call BSR%malloc([2,2],3,3,5) - BSR%storage = sparse_full - BSR%rowptr = [1,3,4,6] - BSR%col = [1,2,2,2,3] - BSR%data(:,:,1) = reshape(real([1,2,0,1],kind=wp),[2,2]) - BSR%data(:,:,2) = reshape(real([6,8,7,2],kind=wp),[2,2]) - BSR%data(:,:,3) = reshape(real([1,5,4,1],kind=wp),[2,2]) - BSR%data(:,:,4) = reshape(real([4,0,3,0],kind=wp),[2,2]) - BSR%data(:,:,5) = reshape(real([7,0,2,0],kind=wp),[2,2]) - - allocate( vec_x(6) , source = 1._wp ) - allocate( vec_y(6) , source = 0._wp ) - allocate( vec_ref(6) , source = 0._wp ) - - vec_ref = matmul( dense, vec_x ) - call spmv( BSR, vec_x, vec_y ) - call check(error, norm2(vec_y - vec_ref) < tol , "error in BSR SpMV" ) - if (allocated(error)) return - - ! Test in-place transpose - vec_x = 1._wp - vec_ref = matmul( transpose(dense), vec_x ) - vec_y = 0._wp - call spmv( BSR, vec_x, vec_y, op=sparse_op_transpose ) - call check(error, norm2(vec_y - vec_ref) < tol , "error in BSR SpMV transpose" ) - if (allocated(error)) return - - ! Test symmetry storage (upper) - ! |1 0| |6 7| 0 0 - ! |2 1| |8 2| 0 0 - ! ----------------- - ! |6 8| |1 4| 0 0 - ! |7 2| |5 1| 0 0 - ! ----------------- - ! 0 0 0 0 |7 2| - ! 0 0 0 0 |0 0| - dense = 0._wp - dense(1:2,1:2) = reshape(real([1,2,0,1],kind=wp),[2,2]) - dense(1:2,3:4) = reshape(real([6,8,7,2],kind=wp),[2,2]) - dense(3:4,1:2) = reshape(real([6,7,8,2],kind=wp),[2,2]) - dense(3:4,3:4) = reshape(real([1,5,4,1],kind=wp),[2,2]) - dense(5:6,5:6) = reshape(real([7,0,2,0],kind=wp),[2,2]) - - call BSR2%malloc([2,2],3,3,4) - BSR2%storage = sparse_upper - BSR2%rowptr = [1,3,4,5] - BSR2%col = [1,2,2,3] - BSR2%data(:,:,1) = reshape(real([1,2,0,1],kind=wp),[2,2]) - BSR2%data(:,:,2) = reshape(real([6,8,7,2],kind=wp),[2,2]) - BSR2%data(:,:,3) = reshape(real([1,5,4,1],kind=wp),[2,2]) - BSR2%data(:,:,4) = reshape(real([7,0,2,0],kind=wp),[2,2]) - - vec_x = 1._wp - vec_ref = matmul( dense, vec_x ) - vec_y = 0._wp - call spmv( BSR2, vec_x, vec_y ) - call check(error, norm2(vec_y - vec_ref) < tol , "error in BSR symmetric SpMV" ) - if (allocated(error)) return - end block - #:endfor - end subroutine - subroutine test_csr(error) !> Error handling type(error_type), allocatable, intent(out) :: error From 6c6efbbd4d485c03ddf3d1007096ea5edfca427a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Alves?= <102541118+jalvesz@users.noreply.github.com> Date: Fri, 17 Apr 2026 08:54:34 +0200 Subject: [PATCH 10/18] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/sparse/stdlib_sparse_spmv_csc.fypp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sparse/stdlib_sparse_spmv_csc.fypp b/src/sparse/stdlib_sparse_spmv_csc.fypp index 9341eb574..3d3f2a28a 100644 --- a/src/sparse/stdlib_sparse_spmv_csc.fypp +++ b/src/sparse/stdlib_sparse_spmv_csc.fypp @@ -70,9 +70,9 @@ contains else if( storage == sparse_upper .and. op_/=sparse_op_hermitian )then do j = 1 , ncols aux = zero_${s1}$ - do i = colptr(j), colptr(i+1)-2 + do i = colptr(j), colptr(j+1)-2 aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$j) - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * data(i) * vec_x(${rksfx2(rank-1)}$row(i)) + vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * data(i) * vec_x(${rksfx2(rank-1)}$row(i)) end do aux = aux + data(colptr(j)) * vec_x(${rksfx2(rank-1)}$j) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux From ce3ae88052e3865e82adcc806fefbbc4e1c611a1 Mon Sep 17 00:00:00 2001 From: Jose Alves Date: Fri, 17 Apr 2026 09:12:43 +0200 Subject: [PATCH 11/18] remove residual change --- src/sparse/stdlib_sparse_kinds.fypp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sparse/stdlib_sparse_kinds.fypp b/src/sparse/stdlib_sparse_kinds.fypp index 5c6a706ab..3cbbe9bff 100644 --- a/src/sparse/stdlib_sparse_kinds.fypp +++ b/src/sparse/stdlib_sparse_kinds.fypp @@ -648,4 +648,5 @@ contains end subroutine #:endfor + end module stdlib_sparse_kinds From 76df552eaebfcb21682c8fb2ac4f631bf83ec090 Mon Sep 17 00:00:00 2001 From: Jose Alves Date: Fri, 17 Apr 2026 09:13:39 +0200 Subject: [PATCH 12/18] revert to plain do loops --- src/sparse/stdlib_sparse_spmv_coo.fypp | 56 +++++++++++++------------- src/sparse/stdlib_sparse_spmv_ell.fypp | 42 +++++++++++-------- 2 files changed, 53 insertions(+), 45 deletions(-) diff --git a/src/sparse/stdlib_sparse_spmv_coo.fypp b/src/sparse/stdlib_sparse_spmv_coo.fypp index 4b766c4b7..7aba6e9d8 100644 --- a/src/sparse/stdlib_sparse_spmv_coo.fypp +++ b/src/sparse/stdlib_sparse_spmv_coo.fypp @@ -22,7 +22,7 @@ contains character(1), intent(in), optional :: op ${t1}$ :: alpha_ character(1) :: op_ - integer(ilp) :: col_index, entry_index, row_index + integer(ilp) :: col_index, k, row_index op_ = sparse_op_none; if(present(op)) op_ = op alpha_ = one_${k1}$ @@ -37,56 +37,56 @@ contains select case(op_) case(sparse_op_none) if(storage == sparse_full) then - do concurrent (entry_index = 1:nnz) - row_index = index(1,entry_index) - col_index = index(2,entry_index) - vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$col_index) + do k = 1, nnz + row_index = index(1,k) + col_index = index(2,k) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$col_index) end do else - do concurrent (entry_index = 1:nnz) - row_index = index(1,entry_index) - col_index = index(2,entry_index) - vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$col_index) + do k = 1, nnz + row_index = index(1,k) + col_index = index(2,k) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$col_index) if( row_index==col_index ) cycle - vec_y(${rksfx2(rank-1)}$col_index) = vec_y(${rksfx2(rank-1)}$col_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$row_index) + vec_y(${rksfx2(rank-1)}$col_index) = vec_y(${rksfx2(rank-1)}$col_index) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$row_index) end do end if case(sparse_op_transpose) if(storage == sparse_full) then - do concurrent (entry_index = 1:nnz) - col_index = index(1,entry_index) - row_index = index(2,entry_index) - vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$col_index) + do k = 1, nnz + col_index = index(1,k) + row_index = index(2,k) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$col_index) end do else - do concurrent (entry_index = 1:nnz) - col_index = index(1,entry_index) - row_index = index(2,entry_index) - vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$col_index) + do k = 1, nnz + col_index = index(1,k) + row_index = index(2,k) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$col_index) if( row_index==col_index ) cycle - vec_y(${rksfx2(rank-1)}$col_index) = vec_y(${rksfx2(rank-1)}$col_index) + alpha_*data(entry_index) * vec_x(${rksfx2(rank-1)}$row_index) + vec_y(${rksfx2(rank-1)}$col_index) = vec_y(${rksfx2(rank-1)}$col_index) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$row_index) end do end if #:if t1.startswith('complex') case(sparse_op_hermitian) if(storage == sparse_full) then - do concurrent (entry_index = 1:nnz) - col_index = index(1,entry_index) - row_index = index(2,entry_index) - vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*conjg(data(entry_index)) * vec_x(${rksfx2(rank-1)}$col_index) + do k = 1, nnz + col_index = index(1,k) + row_index = index(2,k) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*conjg(data(k)) * vec_x(${rksfx2(rank-1)}$col_index) end do else - do concurrent (entry_index = 1:nnz) - col_index = index(1,entry_index) - row_index = index(2,entry_index) - vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*conjg(data(entry_index)) * vec_x(${rksfx2(rank-1)}$col_index) + do k = 1, nnz + col_index = index(1,k) + row_index = index(2,k) + vec_y(${rksfx2(rank-1)}$row_index) = vec_y(${rksfx2(rank-1)}$row_index) + alpha_*conjg(data(k)) * vec_x(${rksfx2(rank-1)}$col_index) if( row_index==col_index ) cycle - vec_y(${rksfx2(rank-1)}$col_index) = vec_y(${rksfx2(rank-1)}$col_index) + alpha_*conjg(data(entry_index)) * vec_x(${rksfx2(rank-1)}$row_index) + vec_y(${rksfx2(rank-1)}$col_index) = vec_y(${rksfx2(rank-1)}$col_index) + alpha_*conjg(data(k)) * vec_x(${rksfx2(rank-1)}$row_index) end do end if diff --git a/src/sparse/stdlib_sparse_spmv_ell.fypp b/src/sparse/stdlib_sparse_spmv_ell.fypp index 36102e850..243d1cbd4 100644 --- a/src/sparse/stdlib_sparse_spmv_ell.fypp +++ b/src/sparse/stdlib_sparse_spmv_ell.fypp @@ -35,38 +35,46 @@ contains associate( data => matrix%data, index => matrix%index, MNZ_P_ROW => matrix%K, & & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage ) if( storage == sparse_full .and. op_==sparse_op_none ) then - do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) - j = index(i,k) - if(j>0) vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$j) + do i = 1, nrows + do k = 1, MNZ_P_ROW + j = index(i,k) + if(j>0) vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$j) + end do end do else if( storage == sparse_full .and. op_==sparse_op_transpose ) then - do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) - j = index(i,k) - if(j>0) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$i) + do i = 1, nrows + do k = 1, MNZ_P_ROW + j = index(i,k) + if(j>0) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$i) + end do end do else if( storage /= sparse_full .and. op_/=sparse_op_hermitian ) then - do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) - j = index(i,k) - if(j>0) then + do i = 1, nrows + do k = 1, MNZ_P_ROW + j = index(i,k) + if(j<=0) cycle vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$j) if(i==j) cycle vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$i) - end if + end do end do #:if t1.startswith('complex') else if( storage == sparse_full .and. op_==sparse_op_hermitian ) then - do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) - j = index(i,k) - if(j>0) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$i) + do i = 1, nrows + do k = 1, MNZ_P_ROW + j = index(i,k) + if(j>0) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$i) + end do end do else if( storage /= sparse_full .and. op_==sparse_op_hermitian ) then - do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW) - j = index(i,k) - if(j>0) then + do i = 1, nrows + do k = 1, MNZ_P_ROW + j = index(i,k) + if(j<=0) cycle vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$j) if(i==j) cycle vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$i) - end if + end do end do #:endif end if From 992692063bf5a21c932e7016ca20c38c2f11bce9 Mon Sep 17 00:00:00 2001 From: Jose Alves Date: Fri, 17 Apr 2026 09:14:09 +0200 Subject: [PATCH 13/18] fix indexing issue --- src/sparse/stdlib_sparse_spmv_csc.fypp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sparse/stdlib_sparse_spmv_csc.fypp b/src/sparse/stdlib_sparse_spmv_csc.fypp index 3d3f2a28a..667e05103 100644 --- a/src/sparse/stdlib_sparse_spmv_csc.fypp +++ b/src/sparse/stdlib_sparse_spmv_csc.fypp @@ -101,9 +101,9 @@ contains else if( storage == sparse_upper .and. op_==sparse_op_hermitian )then do j = 1 , ncols aux = zero_${s1}$ - do i = colptr(j), colptr(i+1)-2 + do i = colptr(j), colptr(j+1)-2 aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$j) - vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i)) + vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i)) end do aux = aux + conjg(data(colptr(j))) * vec_x(${rksfx2(rank-1)}$j) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux From 7e4b19dde1434c71eb8d5c4caa5081103d0d9c2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Alves?= <102541118+jalvesz@users.noreply.github.com> Date: Mon, 20 Apr 2026 07:53:33 +0200 Subject: [PATCH 14/18] Remove extra blank line in stdlib_sparse_kinds.fypp --- src/sparse/stdlib_sparse_kinds.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparse/stdlib_sparse_kinds.fypp b/src/sparse/stdlib_sparse_kinds.fypp index 3cbbe9bff..b5141bad7 100644 --- a/src/sparse/stdlib_sparse_kinds.fypp +++ b/src/sparse/stdlib_sparse_kinds.fypp @@ -648,5 +648,5 @@ contains end subroutine #:endfor - + end module stdlib_sparse_kinds From 303678e4783f6e5d419db04a39915f7fd2df8d16 Mon Sep 17 00:00:00 2001 From: jalvesz Date: Mon, 20 Apr 2026 23:15:15 +0200 Subject: [PATCH 15/18] add fixes to CSC format --- src/sparse/stdlib_sparse_spmv_csc.fypp | 10 +- test/linalg/test_linalg_sparse.fypp | 138 +++++++++++++++++++------ 2 files changed, 114 insertions(+), 34 deletions(-) diff --git a/src/sparse/stdlib_sparse_spmv_csc.fypp b/src/sparse/stdlib_sparse_spmv_csc.fypp index 667e05103..83b4ed41d 100644 --- a/src/sparse/stdlib_sparse_spmv_csc.fypp +++ b/src/sparse/stdlib_sparse_spmv_csc.fypp @@ -71,10 +71,10 @@ contains do j = 1 , ncols aux = zero_${s1}$ do i = colptr(j), colptr(j+1)-2 - aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$j) - vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * data(i) * vec_x(${rksfx2(rank-1)}$row(i)) + aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$row(i)) + vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * data(i) * vec_x(${rksfx2(rank-1)}$j) end do - aux = aux + data(colptr(j)) * vec_x(${rksfx2(rank-1)}$j) + aux = aux + data(colptr(j+1)-1) * vec_x(${rksfx2(rank-1)}$j) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux end do @@ -103,9 +103,9 @@ contains aux = zero_${s1}$ do i = colptr(j), colptr(j+1)-2 aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$j) - vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i)) + vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * conjg(data(i)) * vec_x(${rksfx2(rank-1)}$j) end do - aux = aux + conjg(data(colptr(j))) * vec_x(${rksfx2(rank-1)}$j) + aux = aux + conjg(data(colptr(j+1)-1)) * vec_x(${rksfx2(rank-1)}$j) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux end do #:endif diff --git a/test/linalg/test_linalg_sparse.fypp b/test/linalg/test_linalg_sparse.fypp index 3186eb70c..4fc3432ce 100644 --- a/test/linalg/test_linalg_sparse.fypp +++ b/test/linalg/test_linalg_sparse.fypp @@ -256,45 +256,125 @@ contains #:for k1, t1, s1 in (KINDS_TYPES) block integer, parameter :: wp = ${k1}$ - type(COO_${s1}$_type) :: COO - type(CSR_${s1}$_type) :: CSR - type(ELL_${s1}$_type) :: ELL - ${t1}$, allocatable :: dense(:,:) - ${t1}$, allocatable :: vec_x(:) - ${t1}$, allocatable :: vec_y1(:), vec_y2(:), vec_y3(:), vec_y4(:) + type(COO_${s1}$_type) :: COO_low, COO_upper + type(CSR_${s1}$_type) :: CSR_low, CSR_upper + type(CSC_${s1}$_type) :: CSC_low, CSC_upper + type(ELL_${s1}$_type) :: ELL_low, ELL_upper + ${t1}$, allocatable :: dense(:,:), dense_low(:,:), dense_upper(:,:) + ${t1}$, allocatable :: vec_x(:), vec_y(:), vec_y_ref(:) + integer :: i, j + + allocate( vec_x(4), source = real([1,2,3,4], kind=wp) ) + allocate( vec_y(4), source = 0._wp ) + allocate( vec_y_ref(4), source = 0._wp ) + + allocate( dense(4,4), source = & + reshape(real([1,2,0,0, & + 2,3,4,0, & + 0,4,5,6, & + 0,0,6,7], kind=wp), [4,4]) ) + allocate( dense_low(4,4), source = dense ) + allocate( dense_upper(4,4), source = dense ) + + do i = 1, 4 + do j = i + 1, 4 + dense_low(i,j) = 0._wp + dense_upper(j,i) = 0._wp + end do + end do - allocate( vec_x(4) , source = 1._wp ) - allocate( vec_y1(4) , source = 0._wp ) - allocate( vec_y2(4) , source = 0._wp ) - allocate( vec_y3(4) , source = 0._wp ) - allocate( vec_y4(4) , source = 0._wp ) + vec_y_ref = matmul(dense, vec_x) - allocate( dense(4,4) , source = & - reshape(real([1,0,0,0, & - 2,1,0,0, & - 0,2,1,0,& - 0,0,2,1],kind=wp),[4,4]) ) + call dense2coo(dense_low, COO_low) + COO_low%storage = sparse_lower + call coo2csr(COO_low, CSR_low) + call coo2csc(COO_low, CSC_low) + call csr2ell(CSR_low, ELL_low) - call dense2coo( dense , COO ) - COO%storage = sparse_upper - call coo2csr(COO, CSR) - call csr2ell(CSR, ELL) + call dense2coo(dense_upper, COO_upper) + COO_upper%storage = sparse_upper + call coo2csr(COO_upper, CSR_upper) + call coo2csc(COO_upper, CSC_upper) + call csr2ell(CSR_upper, ELL_upper) - dense(2,1) = 2._wp; dense(3,2) = 2._wp; dense(4,3) = 2._wp - vec_y1 = matmul( dense, vec_x ) - call check(error, all(vec_y1 == [3,5,5,3]) ) + vec_y = 0._wp + call spmv(COO_low, vec_x, vec_y) + call check(error, all(vec_y == vec_y_ref), "error in COO lower ${s1}$ spmv") if (allocated(error)) return - call spmv( COO , vec_x, vec_y2 ) - call check(error, all(vec_y1 == vec_y2) ) + vec_y = 0._wp + call spmv(COO_low, vec_x, vec_y, op=sparse_op_transpose) + call check(error, all(vec_y == vec_y_ref), "error in COO lower ${s1}$ transpose spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(COO_upper, vec_x, vec_y) + call check(error, all(vec_y == vec_y_ref), "error in COO upper ${s1}$ spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(COO_upper, vec_x, vec_y, op=sparse_op_transpose) + call check(error, all(vec_y == vec_y_ref), "error in COO upper ${s1}$ transpose spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(CSR_low, vec_x, vec_y) + call check(error, all(vec_y == vec_y_ref), "error in CSR lower ${s1}$ spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(CSR_low, vec_x, vec_y, op=sparse_op_transpose) + call check(error, all(vec_y == vec_y_ref), "error in CSR lower ${s1}$ transpose spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(CSR_upper, vec_x, vec_y) + call check(error, all(vec_y == vec_y_ref), "error in CSR upper ${s1}$ spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(CSR_upper, vec_x, vec_y, op=sparse_op_transpose) + call check(error, all(vec_y == vec_y_ref), "error in CSR upper ${s1}$ transpose spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(CSC_low, vec_x, vec_y) + call check(error, all(vec_y == vec_y_ref), "error in CSC lower ${s1}$ spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(CSC_low, vec_x, vec_y, op=sparse_op_transpose) + call check(error, all(vec_y == vec_y_ref), "error in CSC lower ${s1}$ transpose spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(CSC_upper, vec_x, vec_y) + call check(error, all(vec_y == vec_y_ref), "error in CSC upper ${s1}$ spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(CSC_upper, vec_x, vec_y, op=sparse_op_transpose) + call check(error, all(vec_y == vec_y_ref), "error in CSC upper ${s1}$ transpose spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(ELL_low, vec_x, vec_y) + call check(error, all(vec_y == vec_y_ref), "error in ELL lower ${s1}$ spmv") + if (allocated(error)) return + + vec_y = 0._wp + call spmv(ELL_low, vec_x, vec_y, op=sparse_op_transpose) + call check(error, all(vec_y == vec_y_ref), "error in ELL lower ${s1}$ transpose spmv") if (allocated(error)) return - call spmv( CSR , vec_x, vec_y3 ) - call check(error, all(vec_y1 == vec_y3) ) + vec_y = 0._wp + call spmv(ELL_upper, vec_x, vec_y) + call check(error, all(vec_y == vec_y_ref), "error in ELL upper ${s1}$ spmv") if (allocated(error)) return - call spmv( ELL , vec_x, vec_y4 ) - call check(error, all(vec_y1 == vec_y4) ) + vec_y = 0._wp + call spmv(ELL_upper, vec_x, vec_y, op=sparse_op_transpose) + call check(error, all(vec_y == vec_y_ref), "error in ELL upper ${s1}$ transpose spmv") if (allocated(error)) return end block #:endfor From 6cea497c95ec3ed4bf76fc6cc9bf24ba9738dccd Mon Sep 17 00:00:00 2001 From: jalvesz Date: Tue, 28 Apr 2026 19:57:08 +0200 Subject: [PATCH 16/18] make reference arrays in test static Co-authored-by: Copilot --- test/linalg/test_linalg_sparse.fypp | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/test/linalg/test_linalg_sparse.fypp b/test/linalg/test_linalg_sparse.fypp index 4fc3432ce..b4b097d54 100644 --- a/test/linalg/test_linalg_sparse.fypp +++ b/test/linalg/test_linalg_sparse.fypp @@ -260,21 +260,20 @@ contains type(CSR_${s1}$_type) :: CSR_low, CSR_upper type(CSC_${s1}$_type) :: CSC_low, CSC_upper type(ELL_${s1}$_type) :: ELL_low, ELL_upper - ${t1}$, allocatable :: dense(:,:), dense_low(:,:), dense_upper(:,:) - ${t1}$, allocatable :: vec_x(:), vec_y(:), vec_y_ref(:) + ${t1}$, parameter :: dense(4,4) = reshape(real([1,2,0,0, & + 2,3,4,0, & + 0,4,5,6, & + 0,0,6,7], kind=wp), [4,4]) + ${t1}$, parameter :: vec_x(4) = real([1,2,3,4], kind=wp) + ${t1}$, parameter :: vec_y_ref(4) = matmul(dense, vec_x) + ${t1}$ :: dense_low(4,4), dense_upper(4,4) + ${t1}$ :: vec_y(4) integer :: i, j - allocate( vec_x(4), source = real([1,2,3,4], kind=wp) ) - allocate( vec_y(4), source = 0._wp ) - allocate( vec_y_ref(4), source = 0._wp ) + vec_y = 0._wp - allocate( dense(4,4), source = & - reshape(real([1,2,0,0, & - 2,3,4,0, & - 0,4,5,6, & - 0,0,6,7], kind=wp), [4,4]) ) - allocate( dense_low(4,4), source = dense ) - allocate( dense_upper(4,4), source = dense ) + dense_low = dense + dense_upper = dense do i = 1, 4 do j = i + 1, 4 @@ -283,8 +282,6 @@ contains end do end do - vec_y_ref = matmul(dense, vec_x) - call dense2coo(dense_low, COO_low) COO_low%storage = sparse_lower call coo2csr(COO_low, CSR_low) From 77cf78d38a1e8f6a7053dc85bf48508b4def129c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Alves?= <102541118+jalvesz@users.noreply.github.com> Date: Tue, 28 Apr 2026 22:19:56 +0200 Subject: [PATCH 17/18] Update test/linalg/test_linalg_sparse.fypp Co-authored-by: Jeremie Vandenplas --- test/linalg/test_linalg_sparse.fypp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/linalg/test_linalg_sparse.fypp b/test/linalg/test_linalg_sparse.fypp index b4b097d54..df13fac1d 100644 --- a/test/linalg/test_linalg_sparse.fypp +++ b/test/linalg/test_linalg_sparse.fypp @@ -260,14 +260,15 @@ contains type(CSR_${s1}$_type) :: CSR_low, CSR_upper type(CSC_${s1}$_type) :: CSC_low, CSC_upper type(ELL_${s1}$_type) :: ELL_low, ELL_upper - ${t1}$, parameter :: dense(4,4) = reshape(real([1,2,0,0, & + integer, parameter :: ndim = 4 + ${t1}$, parameter :: dense(ndim,ndim) = reshape(real([1,2,0,0, & 2,3,4,0, & 0,4,5,6, & - 0,0,6,7], kind=wp), [4,4]) - ${t1}$, parameter :: vec_x(4) = real([1,2,3,4], kind=wp) - ${t1}$, parameter :: vec_y_ref(4) = matmul(dense, vec_x) - ${t1}$ :: dense_low(4,4), dense_upper(4,4) - ${t1}$ :: vec_y(4) + 0,0,6,7], kind=wp), [ndim,ndim]) + ${t1}$, parameter :: vec_x(ndim) = real([1,2,3,4], kind=wp) + ${t1}$, parameter :: vec_y_ref(ndim) = matmul(dense, vec_x) + ${t1}$ :: dense_low(ndim,ndim), dense_upper(ndim,ndim) + ${t1}$ :: vec_y(ndim) integer :: i, j vec_y = 0._wp From ac45093984855006b9929576e59724c0aad6cf48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Alves?= <102541118+jalvesz@users.noreply.github.com> Date: Tue, 28 Apr 2026 22:20:14 +0200 Subject: [PATCH 18/18] Update test/linalg/test_linalg_sparse.fypp Co-authored-by: Jeremie Vandenplas --- test/linalg/test_linalg_sparse.fypp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/linalg/test_linalg_sparse.fypp b/test/linalg/test_linalg_sparse.fypp index df13fac1d..0ebe235b5 100644 --- a/test/linalg/test_linalg_sparse.fypp +++ b/test/linalg/test_linalg_sparse.fypp @@ -276,8 +276,8 @@ contains dense_low = dense dense_upper = dense - do i = 1, 4 - do j = i + 1, 4 + do i = 1, ndim + do j = i + 1, ndim dense_low(i,j) = 0._wp dense_upper(j,i) = 0._wp end do