// Iterator (index) classes inline int inRange(const int val, const int count, const int base, const int stride) { const int offset = val - base ; const int pos = offset / stride ; return pos >= 0 && pos < count && pos * stride == offset ; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // // // // class `LocBlocksIndex': enumerate local blocks of range. // // // // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // class LocBlocksIndex : public Location, public Block { public : LocBlocksIndex(Range _x) : x(_x), simple(_x.format() != DIST_BLOCK_CYCLIC) { x.crds(&d_count, &d_bas, &d_str) ; dim = x.dim() ; } void beginLocBlk() { crd = dim.crd() ; looping = 0 ; if(simple) { // Normal case if(inRange(crd, d_count, d_bas, d_str)) { x.block(this, crd) ; sub = sub_bas ; looping = 1 ; } } else { // Block-Cyclic case // Here the triplet range (d_count, d_bas, d_str) is in terms of // ``virtual processors'', ie, relative to the kernel cyclic // range rather than the actual process dimension. // // `d_str' may be +1 or -1. P = dim.size() ; // Compute local range of `blk' subscript, which // identifies individual blocks. Kernel subscript // is invariantly: // // ker = crd + P * blk // // (where `ker' is in the `d'-triplet range). if(d_str > 0) { blk = (d_bas - crd + P - 1) / P ; blk_count = (d_bas + d_count - crd + P - 1) / P - blk ; ker = crd + P * blk ; ker_stp = P ; } else { blk = (d_bas - crd + P) / P - 1 ; blk_count = blk - (d_bas - d_count - crd + P) / P + 1 ; ker = crd + P * blk ; ker_stp = -P ; } blk_l = 0 ; if(blk_l < blk_count) { x.block(this, ker) ; sub = sub_bas ; looping = 1 ; } } } int test() {return looping ; } void nextLocBlk() { if(simple) // Normal case looping = 0 ; else { // Block-Cyclic case blk += d_str ; ker += ker_stp ; blk_l++ ; if(blk_l < blk_count) { x.block(this, ker) ; sub = sub_bas ; } else looping = 0 ; } } protected : Range x ; int simple ; // Flag set TRUE if distribution is *not* block-cyclic. int d_count, d_bas, d_str ; int looping ; // For block cyclic ranges: int P ; int blk_count, blk_l, ker, ker_stp ; private : LocBlocksIndex(const LocBlocksIndex&) {} ; // No copying allowed. } ; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // // // // class `LocBlocksIndex2': Like `LocBlocksIndex', but range // // restricted by an interval. // // // // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // class LocBlocksIndex2 : public Location, public Block { public : LocBlocksIndex2(Range _x, const int _i_count, const int _i_bas) : x(_x), i_count(_i_count), i_bas(_i_bas), simple(_x.format() != DIST_BLOCK_CYCLIC) { x.crds(&d_count, &d_bas, &d_str, i_count, i_bas) ; dim = x.dim() ; } void beginLocBlk() { crd = dim.crd() ; looping = 0 ; if(simple) { // Normal case if(inRange(crd, d_count, d_bas, d_str)) { x.block(this, crd, i_count, i_bas) ; sub = sub_bas ; looping = 1 ; } } else { // Block-Cyclic case // Here the triplet range (d_count, d_bas, d_str) is in terms of // ``virtual processors'', ie, relative to the kernel cyclic // range rather than the actual process dimension. // // `d_str' may be +1 or -1. P = dim.size() ; // Compute local range of `blk' subscript, which // identifies individual blocks. Kernel subscript // is invariantly: // // ker = crd + P * blk // // (where `ker' is in the `d'-triplet range). if(d_str > 0) { blk = (d_bas - crd + P - 1) / P ; blk_count = (d_bas + d_count - crd + P - 1) / P - blk ; ker = crd + P * blk ; ker_stp = P ; } else { blk = (d_bas - crd + P) / P - 1 ; blk_count = blk - (d_bas - d_count - crd + P) / P + 1 ; ker = crd + P * blk ; ker_stp = -P ; } blk_l = 0 ; if(blk_l < blk_count) { x.block(this, ker, i_count, i_bas) ; sub = sub_bas ; looping = 1 ; } } } int test() {return looping ; } void nextLocBlk() { if(simple) // Normal case looping = 0 ; else { // Block-Cyclic case blk += d_str ; ker += ker_stp ; blk_l++ ; if(blk_l < blk_count) { x.block(this, ker, i_count, i_bas) ; sub = sub_bas ; } else looping = 0 ; } } public: int i_count, i_bas ; protected : Range x ; int simple ; // Flag set TRUE if distribution is *not* block-cyclic. int d_count, d_bas, d_str ; int looping ; // For block cyclic ranges: int P ; int blk_count, blk_l, ker, ker_stp ; private : LocBlocksIndex2(const LocBlocksIndex2&) {} ; // No copying allowed. } ; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // // // // class `LocBlocksIndex3': Like `LocBlocksIndex', but range // // restricted by a triplet. // // // // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // class LocBlocksIndex3 : public Location, public Block { public : LocBlocksIndex3(Range _x, const int _i_count, const int _i_bas, const int _i_str) : x(_x), i_count(_i_count), i_bas(_i_bas), i_str(_i_str), simple(_x.format() != DIST_BLOCK_CYCLIC) { x.crds(&d_count, &d_bas, &d_str, i_count, i_bas, i_str) ; dim = x.dim() ; } void beginLocBlk() { crd = dim.crd() ; looping = 0 ; if(simple) { // Normal case if(inRange(crd, d_count, d_bas, d_str)) { x.block(this, crd, i_count, i_bas, i_str) ; sub = sub_bas ; looping = 1 ; } } else { // Block-Cyclic case // Here the triplet range (d_count, d_bas, d_str) is in terms of // ``virtual processors'', ie, relative to the kernel cyclic // range rather than the actual process dimension. // // `d_str' may be +1 or -1. P = dim.size() ; // Compute local range of `blk' subscript, which // identifies individual blocks. Kernel subscript // is invariantly: // // ker = crd + P * blk // // (where `ker' is in the `d'-triplet range). if(d_str > 0) { blk = (d_bas - crd + P - 1) / P ; blk_count = (d_bas + d_count - crd + P - 1) / P - blk ; ker = crd + P * blk ; ker_stp = P ; } else { blk = (d_bas - crd + P) / P - 1 ; blk_count = blk - (d_bas - d_count - crd + P) / P + 1 ; ker = crd + P * blk ; ker_stp = -P ; } blk_l = 0 ; if(blk_l < blk_count) { x.block(this, ker, i_count, i_bas, i_str) ; sub = sub_bas ; looping = 1 ; } } } int test() {return looping ; } void nextLocBlk() { if(simple) // Normal case looping = 0 ; else { // Block-Cyclic case blk += d_str ; ker += ker_stp ; blk_l++ ; if(blk_l < blk_count) { x.block(this, ker, i_count, i_bas, i_str) ; sub = sub_bas ; } else looping = 0 ; } } public : int i_count, i_bas, i_str ; protected : Range x ; int simple ; // Flag set TRUE if distribution is *not* block-cyclic. int d_count, d_bas, d_str ; int looping ; // For block cyclic ranges: int P ; int blk_count, blk_l, ker, ker_stp ; private : LocBlocksIndex3(const LocBlocksIndex3&) {} ; // No copying allowed. } ; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // // // // class `AllBlocksIndex': enumerate all blocks of range. // // // // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // class AllBlocksIndex : public LocBlocksIndex { public : AllBlocksIndex(Range _x) : LocBlocksIndex(_x) {} // g++ v2.7.2 needs this ~AllBlocksIndex() {} void beginAllBlk() { looping = 0 ; if(simple) { // Normal case crd = d_bas ; d_l = 0 ; if(d_l < d_count) { x.block(this, crd) ; sub = sub_bas ; looping = 1 ; } } else { // Block-Cylic case // The triplet (d_count, d_bas, d_str) defines range of // subscripts in kernel cyclic range. P = dim.size() ; d_l = 0 ; ker = d_bas ; crd = ker % P ; blk = ker / P ; d_l = 0 ; if(d_l < d_count) { x.block(this, ker) ; sub = sub_bas ; looping = 1 ; } } } void nextAllBlk() { if(simple) { // Normal case d_l++ ; crd += d_str ; if(d_l < d_count) { x.block(this, crd) ; sub = sub_bas ; } else looping = 0 ; } else { // Block-Cyclic case d_l++ ; ker += d_str ; crd = ker % P ; blk = ker / P ; if(d_l < d_count) { x.block(this, ker) ; sub = sub_bas ; } else looping = 0 ; } } private : int d_l ; } ; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // // // // class `AllBlocksIndex2': Like `AllBlocksIndex', but range // // restricted by an interval. // // // // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // class AllBlocksIndex2 : public LocBlocksIndex2 { public : AllBlocksIndex2(Range _x, const int _i_count, const int _i_bas) : LocBlocksIndex2(_x, _i_count, _i_bas) {} void beginAllBlk() { looping = 0 ; if(simple) { // Normal case crd = d_bas ; d_l = 0 ; if(d_l < d_count) { x.block(this, crd, i_count, i_bas) ; sub = sub_bas ; looping = 1 ; } } else { // Block-Cylic case // The triplet (d_count, d_bas, d_str) defines range of // subscripts in kernel cyclic range. P = dim.size() ; d_l = 0 ; ker = d_bas ; crd = ker % P ; blk = ker / P ; d_l = 0 ; if(d_l < d_count) { x.block(this, ker, i_count, i_bas) ; sub = sub_bas ; looping = 1 ; } } } void nextAllBlk() { if(simple) { // Normal case d_l++ ; crd += d_str ; if(d_l < d_count) { x.block(this, crd, i_count, i_bas) ; sub = sub_bas ; } else looping = 0 ; } else { // Block-Cyclic case d_l++ ; ker += d_str ; crd = ker % P ; blk = ker / P ; if(d_l < d_count) { x.block(this, ker, i_count, i_bas) ; sub = sub_bas ; } else looping = 0 ; } } private : int d_l ; } ; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // // // // class `AllBlocksIndex3': Like `AllBlocksIndex', but range // // restricted by a triplet. // // // // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // class AllBlocksIndex3 : public LocBlocksIndex3 { public : AllBlocksIndex3(Range _x, const int _i_count, const int _i_bas, const int _i_str) : LocBlocksIndex3(_x, _i_count, _i_bas, _i_str) {} void beginAllBlk() { looping = 0 ; if(simple) { // Normal case crd = d_bas ; d_l = 0 ; if(d_l < d_count) { x.block(this, crd, i_count, i_bas, i_str) ; sub = sub_bas ; looping = 1 ; } } else { // Block-Cylic case // The triplet (d_count, d_bas, d_str) defines range of // subscripts in kernel cyclic range. P = dim.size() ; d_l = 0 ; ker = d_bas ; crd = ker % P ; blk = ker / P ; d_l = 0 ; if(d_l < d_count) { x.block(this, ker, i_count, i_bas, i_str) ; sub = sub_bas ; looping = 1 ; } } } void nextAllBlk() { if(simple) { // Normal case d_l++ ; crd += d_str ; if(d_l < d_count) { x.block(this, crd, i_count, i_bas, i_str) ; sub = sub_bas ; } else looping = 0 ; } else { // Block-Cyclic case d_l++ ; ker += d_str ; crd = ker % P ; blk = ker / P ; if(d_l < d_count) { x.block(this, ker, i_count, i_bas, i_str) ; sub = sub_bas ; } else looping = 0 ; } } private : int d_l ; } ;