Initially,
-
a = [0, 1, 2, 3, 4]
-
b = [0, 10, 20, 30, 40]
-
c = [-1, -1, -1, -1, -1]
|
FORALL ( i = 2:4 )
-
a(i) = a(i-1) + a(i+1)
-
c(i) = b(i) * a(i+1)
|
END FORALL
|
Afterwards,
-
a = [0, 2, 4, 6, 4]
-
b = [0, 10, 20, 30, 40]
-
c = [-1, 40, 120, 120, -1]
|
Initially,
-
a = [0, 1, 2, 3, 4]
-
b = [10, 20, 30, 40, 50]
-
c = [-1, -1, -1, -1, -1]
|
DO i = 2, 4
-
a(i) = a(i-1) + a(i+1)
-
c(i) = b(i) * a(i+1)
|
END DO
|
Afterwards,
-
a = [0, 2, 5, 9, 4]
-
b = [10, 20, 30, 40, 50]
-
c = [-1, 20, 60, 120, -1]
|
k0 = 0
|
FORALL ( i=1:n, j=1:m )
-
x(i,j) = CMPLX((i-1)*1.0/(n-1),(j-1)*1.0/(m-1))
-
k(i,j) = 0
-
xtmp(i,j) = -x(i,j)
-
mask(i,j) = .TRUE.
|
END FORALL
|
DO WHILE (ANY(mask(1:n,1:m)) .AND. k0<1000)
-
FORALL ( i=1:n, j=1:m, mask(i,j))
-
xtmp(i,j) = xtmp(i,j)*xtmp(i,j)-x(i,j)
-
k(i,j) = k(i,j) + 1
-
mask(i,j) = ABS(xtmp(i,j))<2.0
-
END FORALL
-
k0 = k0 + 1
|
END DO
|
Initially,
-
a = [0, 2, 4, 6, 1, 3, 5, 7]
-
b = [6, 5, 4, 3, 2, 3, 4, 5]
-
c = [-1,-1,-1,-1,-1,-1,-1,-1]
|
!HPF$ INDEPENDENT
|
DO j = 1, 3
-
a(j) = a(b(j))
-
c(a(j)) = a(j)*b(a(j))
|
END DO
|
Afterwards,
-
a = [3, 1, 6, 6, 1, 3, 5, 7]
-
b = [6, 5, 4, 3, 2, 3, 4, 5]
-
c = [6,-1,12,-1,-1,18,-1,-1]
|
Initially,
-
a = [0, 2, 4, 6, 1, 3, 5, 7]
-
b = [6, 5, 4, 3, 2, 3, 4, 5]
-
c = [-1,-1,-1,-1,-1,-1,-1,-1]
|
!HPF$ INDEPENDENT
|
FORALL ( j = 1:3 )
-
a(j) = a(b(j))
-
c(a(j)) = a(j)*b(a(j))
|
END FORALL
|
Afterwards,
-
a = [3, 1, 6, 6, 1, 3, 5, 7]
-
b = [6, 5, 4, 3, 2, 3, 4, 5]
-
c = [6,-1,12,-1,-1,18,-1,-1]
|
Always true
-
!HPF$ INDEPENDENT
-
FORALL (i=2:n-1) a(i)=b(i-1)+b(i)+b(i+1)
-
!HPF$ INDEPENDENT, NEW(j)
-
DO k = 2, m-1, 2
-
!HPF$ INDEPENDENT, NEW(vl,vr)
-
DO j = 2, n-1, 2
-
vr = x(j,k) - x(j-1,k)
-
vl = x(j+1,k) - x(j,k)
-
x(j,k) = x(j,k) + 0.5*(vr-vl)
-
END DO
-
END DO
|
Some compilers will catch these on their own; some won't
|
Extended intrinsics: MAXLOC, MINLOC
|
One elemental intrinsic: ILEN
|
System inquiry intrinsics: NUMBER_OF_PROCESSORS
|
New reduction functions: IAND
|
Combining-scatter functions: SUM_SCATTER
|
Prefix reduction functions: SUM_PREFIX
|
Sorting functions: GRADE_UP
|
Bit manipulation functions: POPCNT
|
Data distribution inquiry subroutines: HPF_DISTRIBUTION
|
Accumulations through indirection arrays
-
x = SUM_SCATTER( flux, x, nbr(1,1:n) )
-
x = SUM_SCATTER( -flux, x, nbr(2,1:n) )
-
! Equivalent to the following
-
DO i = 1, n
-
x(nbr(1,i)) = x(nbr(1,i)) + flux(i)
-
x(nbr(2,i)) = x(nbr(2,i)) - flux(i)
-
END DO
|
Manipulating array-based sparse structures
-
inum(1:n) = MAX( iend(1:n)-ibeg(1:n)+1, 0 )
-
ibeg_new(1:n) = SUM_PREFIX(inum(1:n)) + 1
-
iend_new(1:n) = ibeg_new(1:n)+inum(1:n)-1
-
! Moving the data left as exercise for reader
|
!
|
! 3D FFT subroutine used by the PESSL implementation.
|
!
-
subroutine fft (n1, n2, n3, isign, scale, x, y)
-
use blacs
-
use types
-
implicit none
|
!
|
! Arguments
|
!
-
integer, intent(in) :: n1, n2, n3, isign
-
real(R8), intent(in) :: scale
|
!
-
complex(R8), dimension(:,:,:), intent(in) :: x
|
!hpf$ template gridx(n1,n2,n3)
|
!hpf$ distribute(*,*,block) :: gridx
|
!hpf$ align(:,:,:) with *gridx :: x
|
!
-
complex(R8), dimension(:,:,:), intent(out) :: y
|
!hpf$ template gridy(n3,n2,n1)
|
!hpf$ distribute(*,*,block) :: gridy
|
!hpf$ align(:,:,:) with *gridy :: y
|
!
|
! Interfaces
|
!
-
interface
-
extrinsic (f77_local) subroutine &
-
& pdcft3 (x, y, n1, n2, n3, isign, scale, ictxt, ip)
-
use types
-
implicit none
-
integer, intent(in) :: n1, n2, n3, isign, ictxt
-
real(R8), intent(in) :: scale
-
integer, dimension(:), intent(in) :: ip
-
complex(R8), dimension(:,:,:), intent(in) :: x
|
!hpf$ template tempx(n1,n2,n3)
|
!hpf$ distribute(*,*,block) :: tempx
|
!hpf$ align(:,:,:) with *tempx :: x
-
complex(R8), dimension(:,:,:), intent(out) :: y
|
!hpf$ template tempy(n3,n2,n1)
|
!hpf$ distribute(*,*,block) :: tempy
|
!hpf$ align(:,:,:) with *tempy :: y
|
!
-
ip = 0
-
call pdcft3 (x, y, n1, n2, n3, isign, scale, ictxt, ip)
|
!
|
!
-
call blacs_get (0, 0, val)
-
ictxt = val(1)
|
!
|
! Calculate the USERMAP for the BLACS grid, and set up the grid. The
|
! processor ID assignment is done by the same algorithm used by PGHPF,
|
! and may be different for other HPF implementions.
|
!
-
nprow = size (usermap, 1)
-
npcol = size (usermap, 2)
-
do j = 1, npcol
-
do i = 1, nprow
-
usermap(i,j) = (j - 1) * nprow + (i - 1)
-
end do
-
end do
-
call blacs_gridmap (ictxt, usermap, nprow, nprow, npcol)
|
!
-
ip = 0
-
call pdcft3 (x, y, n1, n2, n3, isign, scale, ictxt, ip)
|
!
-
call blacs_gridexit (ictxt)
|
!
|