Skip to content

Commit b384158

Browse files
chore: update minified library versions
1 parent 3d482d0 commit b384158

File tree

9 files changed

+33
-11
lines changed

9 files changed

+33
-11
lines changed

cp-algo/min/linalg/matrix.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#ifndef CP_ALGO_LINALG_MATRIX_HPP
22
#define CP_ALGO_LINALG_MATRIX_HPP
3+
#pragma GCC push_options
4+
#pragma GCC target("avx2")
35
#include "../random/rng.hpp"
46
#include "../math/common.hpp"
57
#include "vector.hpp"
@@ -9,4 +11,5 @@
911
#include <vector>
1012
#include <array>
1113
namespace cp_algo::linalg{enum gauss_mode{normal,reverse};template<typename base_t,class _vec_t=std::conditional_t<math::modint_type<base_t>,modint_vec<base_t>,vec<base_t>>>struct matrix:big_vector<_vec_t>{using vec_t=_vec_t;using base=base_t;using Base=big_vector<vec_t>;using Base::Base;matrix(size_t n):Base(n,vec_t(n)){}matrix(size_t n,size_t m):Base(n,vec_t(m)){}matrix(Base const&t):Base(t){}matrix(Base&&t):Base(std::move(t)){}template<std::ranges::input_range R>matrix(R&&r):Base(std::ranges::to<Base>(std::forward<R>(r))){}size_t n()const{return size(*this);}size_t m()const{return n()?size(row(0)):0;}void resize(size_t n,size_t m){Base::resize(n);for(auto&it:*this){it.resize(m);}}auto&row(size_t i){return(*this)[i];}auto const&row(size_t i)const{return(*this)[i];}auto elements(){return*this|std::views::join;}auto elements()const{return*this|std::views::join;}matrix operator-()const{return*this|std::views::transform([](auto x){return vec_t(-x);});}matrix&operator+=(matrix const&t){for(auto[a,b]:std::views::zip(elements(),t.elements())){a+=b;}return*this;}matrix&operator-=(matrix const&t){for(auto[a,b]:std::views::zip(elements(),t.elements())){a-=b;}return*this;}matrix operator+(matrix const&t)const{return matrix(*this)+=t;}matrix operator-(matrix const&t)const{return matrix(*this)-=t;}matrix&operator*=(base t){for(auto&it:*this)it*=t;return*this;}matrix operator*(base t)const{return matrix(*this)*=t;}matrix&operator/=(base t){return*this*=base(1)/t;}matrix operator/(base t)const{return matrix(*this)/=t;}matrix&operator*=(matrix const&t){return*this=*this*t;}void read_transposed(){for(size_t j=0;j<m();j++){for(size_t i=0;i<n();i++){std::cin>>(*this)[i][j];}}}void read(){for(auto&it:*this){it.read();}}void print()const{for(auto const&it:*this){it.print();}}static matrix block_diagonal(big_vector<matrix>const&blocks){size_t n=0;for(auto&it:blocks){assert(it.n()==it.m());n+=it.n();}matrix res(n);n=0;for(auto&it:blocks){for(size_t i=0;i<it.n();i++){std::ranges::copy(it[i],begin(res[n+i])+n);}n+=it.n();}return res;}static matrix random(size_t n,size_t m){matrix res(n,m);std::ranges::generate(res,std::bind(vec_t::random,m));return res;}static matrix random(size_t n){return random(n,n);}static matrix eye(size_t n){matrix res(n);for(size_t i=0;i<n;i++){res[i][i]=1;}return res;}matrix operator|(matrix const&b)const{assert(n()==b.n());matrix res(n(),m()+b.m());for(size_t i=0;i<n();i++){res[i]=row(i)|b[i];}return res;}void assign_submatrix(auto viewx,auto viewy,matrix const&t){for(auto[a,b]:std::views::zip(*this|viewx,t)){std::ranges::copy(b,begin(a|viewy));}}auto submatrix(auto viewx,auto viewy)const{return*this|viewx|std::views::transform([viewy](auto const&y){return y|viewy;});}matrix T()const{matrix res(m(),n());for(size_t i=0;i<n();i++){for(size_t j=0;j<m();j++){res[j][i]=row(i)[j];}}return res;}matrix operator*(matrix const&b)const{assert(m()==b.n());matrix res(n(),b.m());for(size_t i=0;i<n();i++){for(size_t j=0;j<m();j++){res[i].add_scaled(b[j],row(i)[j]);}}return res.normalize();}vec_t apply(vec_t const&x)const{return(matrix(1,x)**this)[0];}matrix pow(uint64_t k)const{assert(n()==m());return bpow(*this,k,eye(n()));}matrix&normalize(){for(auto&it:*this){it.normalize();}return*this;}template<gauss_mode mode=normal>void eliminate(size_t i,size_t k){auto kinv=base(1)/row(i).normalize()[k];for(size_t j=(mode==normal)*i;j<n();j++){if(j!=i){row(j).add_scaled(row(i),-row(j).normalize(k)*kinv);}}}template<gauss_mode mode=normal>void eliminate(size_t i){row(i).normalize();for(size_t j=(mode==normal)*i;j<n();j++){if(j!=i){row(j).reduce_by(row(i));}}}template<gauss_mode mode=normal>matrix&gauss(){for(size_t i=0;i<n();i++){eliminate<mode>(i);}return normalize();}template<gauss_mode mode=normal>auto echelonize(size_t lim){return gauss<mode>().sort_classify(lim);}template<gauss_mode mode=normal>auto echelonize(){return echelonize<mode>(m());}size_t rank()const{if(n()>m()){return T().rank();}return size(matrix(*this).echelonize()[0]);}base det()const{assert(n()==m());matrix b=*this;b.echelonize();base res=1;for(size_t i=0;i<n();i++){res*=b[i][i];}return res;}std::pair<base,matrix>inv()const{assert(n()==m());matrix b=*this|eye(n());if(size(b.echelonize<reverse>(n())[0])<n()){return{0,{}};}base det=1;for(size_t i=0;i<n();i++){det*=b[i][i];b[i]*=base(1)/b[i][i];}return{det,b.submatrix(std::views::all,std::views::drop(n()))};}auto kernel()const{auto A=*this;auto[pivots,free]=A.template echelonize<reverse>();matrix sols(size(free),m());for(size_t j=0;j<size(pivots);j++){base scale=base(1)/A[j][pivots[j]];for(size_t i=0;i<size(free);i++){sols[i][pivots[j]]=A[j][free[i]]*scale;}}for(size_t i=0;i<size(free);i++){sols[i][free[i]]=-1;}return sols;}std::optional<std::array<matrix,2>>solve(matrix t)const{matrix sols=(*this|t).kernel();if(sols.n()<t.m()||matrix(sols.submatrix(std::views::drop(sols.n()-t.m()),std::views::drop(m())))!=-eye(t.m())){return std::nullopt;}else{return std::array{matrix(sols.submatrix(std::views::drop(sols.n()-t.m()),std::views::take(m()))),matrix(sols.submatrix(std::views::take(sols.n()-t.m()),std::views::take(m())))};}}auto sort_classify(size_t lim){size_t rk=0;big_vector<size_t>free,pivots;for(size_t j=0;j<lim;j++){for(size_t i=rk+1;i<n()&&row(rk)[j]==base(0);i++){if(row(i)[j]!=base(0)){std::swap(row(i),row(rk));row(rk)=-row(rk);}}if(rk<n()&&row(rk)[j]!=base(0)){pivots.push_back(j);rk++;}else{free.push_back(j);}}return std::array{pivots,free};}};template<typename base_t>auto operator*(base_t t,matrix<base_t>const&A){return A*t;}}
14+
#pragma GCC pop_options
1215
#endif

cp-algo/min/linalg/vector.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#ifndef CP_ALGO_LINALG_VECTOR_HPP
22
#define CP_ALGO_LINALG_VECTOR_HPP
3+
#pragma GCC push_options
4+
#pragma GCC target("avx2")
35
#include "../random/rng.hpp"
46
#include "../number_theory/modint.hpp"
57
#include "../util/big_alloc.hpp"
@@ -19,4 +21,5 @@ ai+=u64x4(_mm256_mul_epu32(__m256i(scaler),__m256i(bi)));
1921
ai+=scaler*bi;
2022
#endif
2123
}for(;i<n;i++){(*this)[i].add_unsafe(b[i].getr_direct()*scale.getr());}if(++counter==4){for(auto&it:*this){it.pseudonormalize();}counter=0;}}}Base const&normalize()override{for(auto&it:*this){it.normalize();}return*this;}base normalize(size_t i)override{return(*this)[i].normalize();}private:size_t counter=0;};}
24+
#pragma GCC pop_options
2225
#endif

cp-algo/min/math/cvector.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
#ifndef CP_ALGO_MATH_CVECTOR_HPP
22
#define CP_ALGO_MATH_CVECTOR_HPP
3+
#pragma GCC push_options
4+
#pragma GCC target("avx2")
35
#include "../util/simd.hpp"
46
#include "../util/complex.hpp"
57
#include "../util/checkpoint.hpp"
68
#include "../util/big_alloc.hpp"
79
#include <ranges>
810
#include <bit>
9-
namespace stdx=std::experimental;namespace cp_algo::math::fft{static constexpr size_t flen=4;using ftype=double;using vftype=dx4;using point=complex<ftype>;using vpoint=complex<vftype>;static constexpr vftype vz={};simd_target vpoint vi(vpoint const&r){return{-imag(r),real(r)};}struct cvector{big_vector<vpoint>r;cvector(size_t n){n=std::max(flen,std::bit_ceil(n));r.resize(n/flen);checkpoint("cvector create");}vpoint&at(size_t k){return r[k/flen];}vpoint at(size_t k)const{return r[k/flen];}template<class pt=point>simd_inline void set(size_t k,pt const&t){if constexpr(std::is_same_v<pt,point>){real(r[k/flen])[k%flen]=real(t);imag(r[k/flen])[k%flen]=imag(t);}else{at(k)=t;}}template<class pt=point>simd_inline pt get(size_t k)const{if constexpr(std::is_same_v<pt,point>){return{real(r[k/flen])[k%flen],imag(r[k/flen])[k%flen]};}else{return at(k);}}size_t size()const{return flen*r.size();}static constexpr size_t eval_arg(size_t n){if(n<pre_evals){return eval_args[n];}else{return eval_arg(n/2)|(n&1)<<(std::bit_width(n)-1);}}static constexpr point eval_point(size_t n){if(n%2){return-eval_point(n-1);}else if(n%4){return eval_point(n-2)*point(0,1);}else if(n/4<pre_evals){return evalp[n/4];}else{return polar<ftype>(1.,std::numbers::pi/(ftype)std::bit_floor(n)*(ftype)eval_arg(n));}}static constexpr std::array<point,32>roots=[](){std::array<point,32>res;for(size_t i=2;i<32;i++){res[i]=polar<ftype>(1.,std::numbers::pi/(1ull<<(i-2)));}return res;}();static constexpr point root(size_t n){return roots[std::bit_width(n)];}template<int step>simd_target static void exec_on_eval(size_t n,size_t k,auto&&callback){callback(k,root(4*step*n)*eval_point(step*k));}template<int step>simd_target static void exec_on_evals(size_t n,auto&&callback){point factor=root(4*step*n);for(size_t i=0;i<n;i++){callback(i,factor*eval_point(step*i));}}simd_target static void do_dot_iter(point rt,vpoint&Bv,vpoint const&Av,vpoint&res){res+=Av*Bv;real(Bv)=rotate_right(real(Bv));imag(Bv)=rotate_right(imag(Bv));auto x=real(Bv)[0],y=imag(Bv)[0];real(Bv)[0]=x*real(rt)-y*imag(rt);imag(Bv)[0]=x*imag(rt)+y*real(rt);}simd_target void dot(cvector const&t){size_t n=this->size();exec_on_evals<1>(n/flen,[&](size_t k,point rt){k*=flen;auto[Ax,Ay]=at(k);auto Bv=t.at(k);vpoint res=vz;for(size_t i=0;i<flen;i++){vpoint Av=vpoint(vz+Ax[i],vz+Ay[i]);do_dot_iter(rt,Bv,Av,res);}set(k,res);});checkpoint("dot");}template<bool partial=true>simd_target void ifft(){size_t n=size();if constexpr(!partial){point pi(0,1);exec_on_evals<4>(n/4,[&](size_t k,point rt){k*=4;point v1=conj(rt);point v2=v1*v1;point v3=v1*v2;auto A=get(k);auto B=get(k+1);auto C=get(k+2);auto D=get(k+3);set(k,(A+B)+(C+D));set(k+2,((A+B)-(C+D))*v2);set(k+1,((A-B)-pi*(C-D))*v1);set(k+3,((A-B)+pi*(C-D))*v3);});}bool parity=std::countr_zero(n)%2;if(parity){exec_on_evals<2>(n/(2*flen),[&](size_t k,point rt){k*=2*flen;vpoint cvrt={vz+real(rt),vz-imag(rt)};auto B=at(k)-at(k+flen);at(k)+=at(k+flen);at(k+flen)=B*cvrt;});}for(size_t leaf=3*flen;leaf<n;leaf+=4*flen){size_t level=std::countr_one(leaf+3);for(size_t lvl=4+parity;lvl<=level;lvl+=2){size_t i=(1<<lvl)/4;exec_on_eval<4>(n>>lvl,leaf>>lvl,[&](size_t k,point rt){k<<=lvl;vpoint v1={vz+real(rt),vz-imag(rt)};vpoint v2=v1*v1;vpoint v3=v1*v2;for(size_t j=k;j<k+i;j+=flen){auto A=at(j);auto B=at(j+i);auto C=at(j+2*i);auto D=at(j+3*i);at(j)=((A+B)+(C+D));at(j+2*i)=((A+B)-(C+D))*v2;at(j+i)=((A-B)-vi(C-D))*v1;at(j+3*i)=((A-B)+vi(C-D))*v3;}});}}checkpoint("ifft");for(size_t k=0;k<n;k+=flen){if constexpr(partial){set(k,get<vpoint>(k)/=vz+ftype(n/flen));}else{set(k,get<vpoint>(k)/=vz+ftype(n));}}}template<bool partial=true>simd_target void fft(){size_t n=size();bool parity=std::countr_zero(n)%2;for(size_t leaf=0;leaf<n;leaf+=4*flen){size_t level=std::countr_zero(n+leaf);level-=level%2!=parity;for(size_t lvl=level;lvl>=4;lvl-=2){size_t i=(1<<lvl)/4;exec_on_eval<4>(n>>lvl,leaf>>lvl,[&](size_t k,point rt){k<<=lvl;vpoint v1={vz+real(rt),vz+imag(rt)};vpoint v2=v1*v1;vpoint v3=v1*v2;for(size_t j=k;j<k+i;j+=flen){auto A=at(j);auto B=at(j+i)*v1;auto C=at(j+2*i)*v2;auto D=at(j+3*i)*v3;at(j)=(A+C)+(B+D);at(j+i)=(A+C)-(B+D);at(j+2*i)=(A-C)+vi(B-D);at(j+3*i)=(A-C)-vi(B-D);}});}}if(parity){exec_on_evals<2>(n/(2*flen),[&](size_t k,point rt){k*=2*flen;vpoint vrt={vz+real(rt),vz+imag(rt)};auto t=at(k+flen)*vrt;at(k+flen)=at(k)-t;at(k)+=t;});}if constexpr(!partial){point pi(0,1);exec_on_evals<4>(n/4,[&](size_t k,point rt){k*=4;point v1=rt;point v2=v1*v1;point v3=v1*v2;auto A=get(k);auto B=get(k+1)*v1;auto C=get(k+2)*v2;auto D=get(k+3)*v3;set(k,(A+C)+(B+D));set(k+1,(A+C)-(B+D));set(k+2,(A-C)+pi*(B-D));set(k+3,(A-C)-pi*(B-D));});}checkpoint("fft");}static constexpr size_t pre_evals=1<<16;static const std::array<size_t,pre_evals>eval_args;static const std::array<point,pre_evals>evalp;};const std::array<size_t,cvector::pre_evals>cvector::eval_args=[](){std::array<size_t,pre_evals>res={};for(size_t i=1;i<pre_evals;i++){res[i]=res[i>>1]|(i&1)<<(std::bit_width(i)-1);}return res;}();const std::array<point,cvector::pre_evals>cvector::evalp=[](){std::array<point,pre_evals>res={};res[0]=1;for(size_t n=1;n<pre_evals;n++){res[n]=polar<ftype>(1.,std::numbers::pi*ftype(eval_args[n])/ftype(4*std::bit_floor(n)));}return res;}();}
11+
namespace stdx=std::experimental;namespace cp_algo::math::fft{static constexpr size_t flen=4;using ftype=double;using vftype=dx4;using point=complex<ftype>;using vpoint=complex<vftype>;static constexpr vftype vz={};vpoint vi(vpoint const&r){return{-imag(r),real(r)};}struct cvector{big_vector<vpoint>r;cvector(size_t n){n=std::max(flen,std::bit_ceil(n));r.resize(n/flen);checkpoint("cvector create");}vpoint&at(size_t k){return r[k/flen];}vpoint at(size_t k)const{return r[k/flen];}template<class pt=point>inline void set(size_t k,pt const&t){if constexpr(std::is_same_v<pt,point>){real(r[k/flen])[k%flen]=real(t);imag(r[k/flen])[k%flen]=imag(t);}else{at(k)=t;}}template<class pt=point>inline pt get(size_t k)const{if constexpr(std::is_same_v<pt,point>){return{real(r[k/flen])[k%flen],imag(r[k/flen])[k%flen]};}else{return at(k);}}size_t size()const{return flen*r.size();}static constexpr size_t eval_arg(size_t n){if(n<pre_evals){return eval_args[n];}else{return eval_arg(n/2)|(n&1)<<(std::bit_width(n)-1);}}static constexpr point eval_point(size_t n){if(n%2){return-eval_point(n-1);}else if(n%4){return eval_point(n-2)*point(0,1);}else if(n/4<pre_evals){return evalp[n/4];}else{return polar<ftype>(1.,std::numbers::pi/(ftype)std::bit_floor(n)*(ftype)eval_arg(n));}}static constexpr std::array<point,32>roots=[](){std::array<point,32>res;for(size_t i=2;i<32;i++){res[i]=polar<ftype>(1.,std::numbers::pi/(1ull<<(i-2)));}return res;}();static constexpr point root(size_t n){return roots[std::bit_width(n)];}template<int step>static void exec_on_eval(size_t n,size_t k,auto&&callback){callback(k,root(4*step*n)*eval_point(step*k));}template<int step>static void exec_on_evals(size_t n,auto&&callback){point factor=root(4*step*n);for(size_t i=0;i<n;i++){callback(i,factor*eval_point(step*i));}}static void do_dot_iter(point rt,vpoint&Bv,vpoint const&Av,vpoint&res){res+=Av*Bv;real(Bv)=rotate_right(real(Bv));imag(Bv)=rotate_right(imag(Bv));auto x=real(Bv)[0],y=imag(Bv)[0];real(Bv)[0]=x*real(rt)-y*imag(rt);imag(Bv)[0]=x*imag(rt)+y*real(rt);}void dot(cvector const&t){size_t n=this->size();exec_on_evals<1>(n/flen,[&](size_t k,point rt)__attribute__((always_inline)){k*=flen;auto[Ax,Ay]=at(k);auto Bv=t.at(k);vpoint res=vz;for(size_t i=0;i<flen;i++){vpoint Av=vpoint(vz+Ax[i],vz+Ay[i]);do_dot_iter(rt,Bv,Av,res);}set(k,res);});checkpoint("dot");}template<bool partial=true>void ifft(){size_t n=size();if constexpr(!partial){point pi(0,1);exec_on_evals<4>(n/4,[&](size_t k,point rt)__attribute__((always_inline)){k*=4;point v1=conj(rt);point v2=v1*v1;point v3=v1*v2;auto A=get(k);auto B=get(k+1);auto C=get(k+2);auto D=get(k+3);set(k,(A+B)+(C+D));set(k+2,((A+B)-(C+D))*v2);set(k+1,((A-B)-pi*(C-D))*v1);set(k+3,((A-B)+pi*(C-D))*v3);});}bool parity=std::countr_zero(n)%2;if(parity){exec_on_evals<2>(n/(2*flen),[&](size_t k,point rt)__attribute__((always_inline)){k*=2*flen;vpoint cvrt={vz+real(rt),vz-imag(rt)};auto B=at(k)-at(k+flen);at(k)+=at(k+flen);at(k+flen)=B*cvrt;});}for(size_t leaf=3*flen;leaf<n;leaf+=4*flen){size_t level=std::countr_one(leaf+3);for(size_t lvl=4+parity;lvl<=level;lvl+=2){size_t i=(1<<lvl)/4;exec_on_eval<4>(n>>lvl,leaf>>lvl,[&](size_t k,point rt)__attribute__((always_inline)){k<<=lvl;vpoint v1={vz+real(rt),vz-imag(rt)};vpoint v2=v1*v1;vpoint v3=v1*v2;for(size_t j=k;j<k+i;j+=flen){auto A=at(j);auto B=at(j+i);auto C=at(j+2*i);auto D=at(j+3*i);at(j)=((A+B)+(C+D));at(j+2*i)=((A+B)-(C+D))*v2;at(j+i)=((A-B)-vi(C-D))*v1;at(j+3*i)=((A-B)+vi(C-D))*v3;}});}}checkpoint("ifft");for(size_t k=0;k<n;k+=flen){if constexpr(partial){set(k,get<vpoint>(k)/=vz+ftype(n/flen));}else{set(k,get<vpoint>(k)/=vz+ftype(n));}}}template<bool partial=true>void fft(){size_t n=size();bool parity=std::countr_zero(n)%2;for(size_t leaf=0;leaf<n;leaf+=4*flen){size_t level=std::countr_zero(n+leaf);level-=level%2!=parity;for(size_t lvl=level;lvl>=4;lvl-=2){size_t i=(1<<lvl)/4;exec_on_eval<4>(n>>lvl,leaf>>lvl,[&](size_t k,point rt)__attribute__((always_inline)){k<<=lvl;vpoint v1={vz+real(rt),vz+imag(rt)};vpoint v2=v1*v1;vpoint v3=v1*v2;for(size_t j=k;j<k+i;j+=flen){auto A=at(j);auto B=at(j+i)*v1;auto C=at(j+2*i)*v2;auto D=at(j+3*i)*v3;at(j)=(A+C)+(B+D);at(j+i)=(A+C)-(B+D);at(j+2*i)=(A-C)+vi(B-D);at(j+3*i)=(A-C)-vi(B-D);}});}}if(parity){exec_on_evals<2>(n/(2*flen),[&](size_t k,point rt)__attribute__((always_inline)){k*=2*flen;vpoint vrt={vz+real(rt),vz+imag(rt)};auto t=at(k+flen)*vrt;at(k+flen)=at(k)-t;at(k)+=t;});}if constexpr(!partial){point pi(0,1);exec_on_evals<4>(n/4,[&](size_t k,point rt)__attribute__((always_inline)){k*=4;point v1=rt;point v2=v1*v1;point v3=v1*v2;auto A=get(k);auto B=get(k+1)*v1;auto C=get(k+2)*v2;auto D=get(k+3)*v3;set(k,(A+C)+(B+D));set(k+1,(A+C)-(B+D));set(k+2,(A-C)+pi*(B-D));set(k+3,(A-C)-pi*(B-D));});}checkpoint("fft");}static constexpr size_t pre_evals=1<<16;static const std::array<size_t,pre_evals>eval_args;static const std::array<point,pre_evals>evalp;};const std::array<size_t,cvector::pre_evals>cvector::eval_args=[](){std::array<size_t,pre_evals>res={};for(size_t i=1;i<pre_evals;i++){res[i]=res[i>>1]|(i&1)<<(std::bit_width(i)-1);}return res;}();const std::array<point,cvector::pre_evals>cvector::evalp=[](){std::array<point,pre_evals>res={};res[0]=1;for(size_t n=1;n<pre_evals;n++){res[n]=polar<ftype>(1.,std::numbers::pi*ftype(eval_args[n])/ftype(4*std::bit_floor(n)));}return res;}();}
12+
#pragma GCC pop_options
1013
#endif

0 commit comments

Comments
 (0)