HPF 2D Jacobi Kernel
Generated Code
program jacobi
integer n, m
parameter (n = 1024)
CHPF$ processors p (4, 8)
CHPF$ template t (n, n)
CHPF$ align a (i, j) with t(i, j)
CHPF$ align b (i, j) with t(i, j)
CHPF$ distribute t(block, block) onto p
C --------------------------------------------------------------
C declarations for heap-based runtime dynamic storage allocation
C --------------------------------------------------------------
common /hpf$heap$common/ hpf$heap
dimension hpf$heap$integer(0:0)
equivalence (hpf$heap$integer(0), hpf$heap)
dimension hpf$heap$double(0:0)
equivalence (hpf$heap$double(0), hpf$heap)
#include
integer status(MPI_STATUS_SIZE)
C -----------------------------------------------
C declarations for compiler-generated temporaries
C -----------------------------------------------
external hpf_arrayrtd_get_lextent, hpf_arrayrtd_get_gextent
save a, b
logical p$wrap
integer counter$b$24, send$buf$b$24$index, i1, i2, p$q1, p$q2
integer counter$b$25, send$buf$b$25$index, counter$b$26
integer send$buf$b$26$index, counter$b$27
integer send$buf$b$27$index, recv$buf$b$24$index
integer recv$buf$b$25$index, recv$buf$b$26$index
integer recv$buf$b$27$index, a$coord$0, a$coord$1
integer hpf_arrayrtd_get_lextent, hpf_arrayrtd_get_gextent
integer a$local$extent$0, a$global$extent$0, b$coord$0
integer b$coord$1, b$local$extent$0, b$global$extent$0
integer p$myid1, p$myid2, hpf$heap$integer, p$cmap, p$dims
integer sendproc, recvproc, myid, ierr, request, j, i
integer*4 a$data, a$coord, b$data, b$coord, p$coord
real*8 hpf$heap$double, lnltmp1, hpf_nonlocal_lookupd
real*8 lnltmp2, lnltmp3, lnltmp4, lnltmp5, lnltmp6, lnltmp7
real*8 lnltmp8, lnltmp9, lnltmp10, lnltmp11, lnltmp12
real*8 lnltmp13, lnltmp14, lnltmp15, lnltmp16, lnltmp17
real*8 lnltmp18, lnltmp19, lnltmp20, lnltmp21, lnltmp22
real*8 lnltmp23, lnltmp24, a, b, hpf$heap
integer*4 hash$nonlocals, send$buf$b$24, send$buf$b$25
integer*4 send$buf$b$26, send$buf$b$27, recv$buf$b$24
integer*4 recv$buf$b$25, recv$buf$b$26, recv$buf$b$27, a$align
integer*4 a$dist, a$tmpl, a$proc, a$align$new, a$dist$new
integer*4 a$tmpl$new, a$proc$new, a$desc, b$align, b$dist
integer*4 b$tmpl, b$proc, b$align$new, b$dist$new, b$tmpl$new
integer*4 b$proc$new, b$desc, t$template, t$proc, t$dist
integer*4 p$processors
dimension a(0:0), b(0:0), p$dims(1:2), p$wrap(1:2)
C
C
C -----------------------------
C runtime system initialization
C -----------------------------
call mpi_init(ierr)
call mpi_comm_rank(MPI_COMM_WORLD, myid, ierr)
C -----------------------------------------------
C initializations for run-time descriptor indices
C -----------------------------------------------
a$align$new = 0
a$align = 0
b$align$new = 0
b$align = 0
t$dist = 0
a$dist$new = 0
a$dist = 0
b$dist$new = 0
b$dist = 0
a$desc = 0
b$desc = 0
C
C
C -------------------------------------
C building array descriptor for array a
C -------------------------------------
call hpf_arrayrtd_alloc(2, 2, a$desc)
call hpf_arrayrtd_setdim(a$desc, 0, 1, 1024)
call hpf_arrayrtd_setdim(a$desc, 1, 1, 1024)
C
C -------------------------------------
C building array descriptor for array b
C -------------------------------------
call hpf_arrayrtd_alloc(2, 2, b$desc)
call hpf_arrayrtd_setdim(b$desc, 0, 1, 1024)
call hpf_arrayrtd_setdim(b$desc, 1, 1, 1024)
C
C ------------------------------------
C building tmpl descriptor: template t
C ------------------------------------
call hpf_tmplrtd_alloc(2, t$template)
call hpf_tmplrtd_setdim(t$template, 0, 1, 1024)
call hpf_tmplrtd_setdim(t$template, 1, 1, 1024)
C
C ---------------------------------------
C building procs descriptor: processors p
C ---------------------------------------
call hpf_procrtd_alloc(2, p$processors)
call hpf_procrtd_setdim(p$processors, 0, 1, 4)
call hpf_procrtd_setdim(p$processors, 1, 1, 8)
C
C ----------------------------------------------------
C initialize processor topology for processors array p
C ----------------------------------------------------
p$dims(1) = 4
p$wrap(1) = .false.
p$dims(2) = 8
p$wrap(2) = .false.
call mpi_cart_create(MPI_COMM_WORLD, 2, p$dims, p$wrap, .false.,
* p$cmap, ierr)
call hpf_procrtd_get_coords(hpf$heap, p$processors, p$coord)
call mpi_cart_coords(p$cmap, myid, 2, hpf$heap$integer(p$coord),
* ierr)
p$myid1 = hpf$heap$integer(p$coord + 0)
p$myid2 = hpf$heap$integer(p$coord + 1)
call hpf_procrtd_set_chandle(p$processors, p$cmap)
C
C -----------------------------------------------
C building dist descriptor: distribute template t
C -----------------------------------------------
call hpf_distrtd_alloc(2, t$dist)
call hpf_distrtd_setdim(t$dist, 0, -1, 256, 0)
call hpf_distrtd_setdim(t$dist, 1, -1, 128, 1)
t$proc = p$processors
C
C --------------------------------------------
C building dist descriptor: distribute array a
C --------------------------------------------
call hpf_distrtd_clone(t$dist, a$dist$new)
a$proc$new = t$proc
C
C --------------------------------------------
C building dist descriptor: distribute array b
C --------------------------------------------
call hpf_distrtd_clone(t$dist, b$dist$new)
b$proc$new = t$proc
C
C -----------------------------------------
C building align descriptor: align b with t
C -----------------------------------------
call hpf_alignrtd_alloc(2, 2, b$align$new)
call hpf_alignrtd_setdim_src(b$align$new, 0, 0)
call hpf_alignrtd_setdim_src(b$align$new, 1, 1)
call hpf_alignrtd_setdim_tmpl(b$align$new, 0, 0, 1, 0, 0)
call hpf_alignrtd_setdim_tmpl(b$align$new, 1, 1, 1, 0, 0)
C
C ------------------------------------------------------------
C establish template and processor association: align b with t
C ------------------------------------------------------------
b$tmpl$new = t$template
b$proc$new = t$proc
C
C -----------------------------------------
C building align descriptor: align a with t
C -----------------------------------------
call hpf_alignrtd_alloc(2, 2, a$align$new)
call hpf_alignrtd_setdim_src(a$align$new, 0, 0)
call hpf_alignrtd_setdim_src(a$align$new, 1, 1)
call hpf_alignrtd_setdim_tmpl(a$align$new, 0, 0, 1, 0, 0)
call hpf_alignrtd_setdim_tmpl(a$align$new, 1, 1, 1, 0, 0)
C
C ------------------------------------------------------------
C establish template and processor association: align a with t
C ------------------------------------------------------------
a$tmpl$new = t$template
a$proc$new = t$proc
C
C ------------------------------------------------------------------------
C allocate or redistribute array a; compute array-indexed processor coords
C ------------------------------------------------------------------------
call hpf_array_remap(a, a$align, a$dist, a$tmpl, a$proc, a$align
*$new, a$dist$new, a$tmpl$new, a$proc$new, a$desc, a$data)
C
C -----------------------------------------------------------------------
C map array-indexed processor coordinates to partitioned array dimensions
C -----------------------------------------------------------------------
call hpf_arrayrtd_get_coords(hpf$heap, a$desc, a$coord)
a$coord$0 = hpf$heap$integer(a$coord + 0)
a$coord$1 = hpf$heap$integer(a$coord + 1)
C
C -----------------------------------------------------------------
C initializing scalar extent vars used for linearization of array a
C -----------------------------------------------------------------
a$local$extent$0 = hpf_arrayrtd_get_lextent(a$desc, 0)
a$global$extent$0 = hpf_arrayrtd_get_gextent(a$desc, 0)
C
C ------------------------------------------------------------------------
C allocate or redistribute array b; compute array-indexed processor coords
C ------------------------------------------------------------------------
call hpf_array_remap(b, b$align, b$dist, b$tmpl, b$proc, b$align
*$new, b$dist$new, b$tmpl$new, b$proc$new, b$desc, b$data)
C
C -----------------------------------------------------------------------
C map array-indexed processor coordinates to partitioned array dimensions
C -----------------------------------------------------------------------
call hpf_arrayrtd_get_coords(hpf$heap, b$desc, b$coord)
b$coord$0 = hpf$heap$integer(b$coord + 0)
b$coord$1 = hpf$heap$integer(b$coord + 1)
C
C -----------------------------------------------------------------
C initializing scalar extent vars used for linearization of array b
C -----------------------------------------------------------------
b$local$extent$0 = hpf_arrayrtd_get_lextent(b$desc, 0)
b$global$extent$0 = hpf_arrayrtd_get_gextent(b$desc, 0)
C
call hpf_nonlocals_alloc(hash$nonlocals)
C
C Loop section ---[ 0 <= p$q2 <= 7, 0 <= p$q1 <= 3 ]---
C
do p$q1 = 0, 3
do p$q2 = 0, 7
if (p$myid1 .ne. p$q1 .or. p$myid2 .ne. p$q2) then
C --< Loop Counters >--
counter$b$24 = 0
if (p$q2 .eq. p$myid2 .and. max(256 * p$myid1 + 1, 256 * p
*$q1) .le. min(256 * p$myid1 + 256, 256 * p$q1 + 255, 1022) .and. m
*ax(128 * p$myid2 + 1, 2) .le. min(128 * p$myid2 + 128, 1023)) then
counter$b$24 = counter$b$24 + (min(256 * p$myid1 + 256,
*256 * p$q1 + 255, 1022) - max(256 * p$myid1 + 1, 256 * p$q1) + 1)
** (min(128 * p$myid2 + 128, 1023) - max(128 * p$myid2 + 1, 2) + 1)
endif
call hpf_buffer_alloc(counter$b$24 * 8, send$buf$b$24)
call hpf_ptr_to_index(hpf$heap, send$buf$b$24, 8, send$buf
*$b$24$index)
C --< Pack Loop For Send For Nonlocal Read >--
counter$b$24 = 0
C
C Loop section ---[ max(((128 * p$myid2) + 1), 2) <= i2 <= min(((128
C * p$myid2) + 128), 1023), max(((256 * p$myid1) + 1), (256 * p$q1)) <= i1 <= mi
Cn(((256 * p$myid1) + 256), ((256 * p$q1) + 255), 1022) ]---
C
if (p$q2 .eq. p$myid2) then
do i1 = max(256 * p$myid1 + 1, 256 * p$q1), min(256 * p$
*myid1 + 256, 256 * p$q1 + 255, 1022)
do i2 = max(128 * p$myid2 + 1, 2), min(128 * p$myid2 +
* 128, 1023)
hpf$heap$double(send$buf$b$24$index + counter$b$24)
*= b(b$data + i1 - (b$coord$0 * 256 + 1) + (i2 - (b$coord$1 * 128 +
* 1)) * b$local$extent$0)
counter$b$24 = counter$b$24 + 1
enddo
enddo
endif
if (counter$b$24 .gt. 0) then
call mpi_send(hpf$heap$double(send$buf$b$24$index), coun
*ter$b$24, MPI_DOUBLE_PRECISION, p$q2 + 8 * p$q1, 1, p$cmap, reques
*t, ierr)
endif
call hpf_buffer_free(send$buf$b$24)
endif
enddo
enddo
continue
C
C Loop section ---[ 0 <= p$q2 <= 7, 0 <= p$q1 <= 3 ]---
C
do p$q1 = 0, 3
do p$q2 = 0, 7
if (p$myid1 .ne. p$q1 .or. p$myid2 .ne. p$q2) then
C --< Loop Counters >--
counter$b$25 = 0
if (p$q2 .eq. p$myid2 .and. max(64 * p$q1 + 3, 256 * p$myi
*d1 + 1, 256 * p$q1 + 2) .le. min(256 * p$myid1 + 256, 256 * p$q1 +
* 257) .and. max(128 * p$myid2 + 1, 2) .le. min(128 * p$myid2 + 128
*, 1023)) then
counter$b$25 = counter$b$25 + (min(256 * p$myid1 + 256,
*256 * p$q1 + 257) - max(64 * p$q1 + 3, 256 * p$myid1 + 1, 256 * p$
*q1 + 2) + 1) * (min(128 * p$myid2 + 128, 1023) - max(128 * p$myid2
* + 1, 2) + 1)
endif
call hpf_buffer_alloc(counter$b$25 * 8, send$buf$b$25)
call hpf_ptr_to_index(hpf$heap, send$buf$b$25, 8, send$buf
*$b$25$index)
C --< Pack Loop For Send For Nonlocal Read >--
counter$b$25 = 0
C
C Loop section ---[ max(((128 * p$myid2) + 1), 2) <= i2 <= min(((128
C * p$myid2) + 128), 1023), max(((64 * p$q1) + 3), ((256 * p$myid1) + 1), ((256
C* p$q1) + 2)) <= i1 <= min(((256 * p$myid1) + 256), ((256 * p$q1) + 257)) ]---
C
if (p$q2 .eq. p$myid2) then
do i1 = max(64 * p$q1 + 3, 256 * p$myid1 + 1, 256 * p$q1
* + 2), min(256 * p$myid1 + 256, 256 * p$q1 + 257)
do i2 = max(128 * p$myid2 + 1, 2), min(128 * p$myid2 +
* 128, 1023)
hpf$heap$double(send$buf$b$25$index + counter$b$25)
*= b(b$data + i1 - (b$coord$0 * 256 + 1) + (i2 - (b$coord$1 * 128 +
* 1)) * b$local$extent$0)
counter$b$25 = counter$b$25 + 1
enddo
enddo
endif
if (counter$b$25 .gt. 0) then
call mpi_send(hpf$heap$double(send$buf$b$25$index), coun
*ter$b$25, MPI_DOUBLE_PRECISION, p$q2 + 8 * p$q1, 2, p$cmap, reques
*t, ierr)
endif
call hpf_buffer_free(send$buf$b$25)
endif
enddo
enddo
continue
C
C Loop section ---[ 0 <= p$q2 <= 7, 0 <= p$q1 <= 3 ]---
C
do p$q1 = 0, 3
do p$q2 = 0, 7
if (p$myid1 .ne. p$q1 .or. p$myid2 .ne. p$q2) then
C --< Loop Counters >--
counter$b$26 = 0
if (p$q1 .eq. p$myid1 .and. p$myid2 .le. p$q2 .and. p$q2 .
*le. 1 + p$myid2 .and. max(256 * p$myid1 + 1, 2) .le. min(256 * p$m
*yid1 + 256, 1023) .and. max(128 * p$q2, 128 * p$myid2 + 1) .le. mi
*n(128 * p$q2 + 127, 128 * p$myid2 + 128, 1022)) then
counter$b$26 = counter$b$26 + (min(256 * p$myid1 + 256,
*1023) - max(256 * p$myid1 + 1, 2) + 1) * (min(128 * p$q2 + 127, 12
*8 * p$myid2 + 128, 1022) - max(128 * p$q2, 128 * p$myid2 + 1) + 1)
endif
call hpf_buffer_alloc(counter$b$26 * 8, send$buf$b$26)
call hpf_ptr_to_index(hpf$heap, send$buf$b$26, 8, send$buf
*$b$26$index)
C --< Pack Loop For Send For Nonlocal Read >--
counter$b$26 = 0
C
C Loop section ---[ max((128 * p$q2), ((128 * p$myid2) + 1)) <= i2 <
C= min(((128 * p$q2) + 127), ((128 * p$myid2) + 128), 1022), max(((256 * p$myid1
C) + 1), 2) <= i1 <= min(((256 * p$myid1) + 256), 1023) ]---
C
if (p$q1 .eq. p$myid1 .and. p$myid2 .le. p$q2 .and. p$q2 .
*le. 1 + p$myid2) then
do i1 = max(256 * p$myid1 + 1, 2), min(256 * p$myid1 + 2
*56, 1023)
do i2 = max(128 * p$q2, 128 * p$myid2 + 1), min(128 *
*p$q2 + 127, 128 * p$myid2 + 128, 1022)
hpf$heap$double(send$buf$b$26$index + counter$b$26)
*= b(b$data + i1 - (b$coord$0 * 256 + 1) + (i2 - (b$coord$1 * 128 +
* 1)) * b$local$extent$0)
counter$b$26 = counter$b$26 + 1
enddo
enddo
endif
if (counter$b$26 .gt. 0) then
call mpi_send(hpf$heap$double(send$buf$b$26$index), coun
*ter$b$26, MPI_DOUBLE_PRECISION, p$q2 + 8 * p$q1, 3, p$cmap, reques
*t, ierr)
endif
call hpf_buffer_free(send$buf$b$26)
endif
enddo
enddo
continue
C
C Loop section ---[ 0 <= p$q2 <= 7, 0 <= p$q1 <= 3 ]---
C
do p$q1 = 0, 3
do p$q2 = 0, 7
if (p$myid1 .ne. p$q1 .or. p$myid2 .ne. p$q2) then
C --< Loop Counters >--
counter$b$27 = 0
if (p$q1 .eq. p$myid1 .and. p$myid2 .le. 1 + p$q2 .and. p$
*q2 .le. p$myid2 .and. max(256 * p$myid1 + 1, 2) .le. min(256 * p$m
*yid1 + 256, 1023) .and. max(128 * p$q2 + 2, 128 * p$myid2 + 1, 3)
*.le. min(128 * p$q2 + 129, 128 * p$myid2 + 128)) then
counter$b$27 = counter$b$27 + (min(256 * p$myid1 + 256,
*1023) - max(256 * p$myid1 + 1, 2) + 1) * (min(128 * p$q2 + 129, 12
*8 * p$myid2 + 128) - max(128 * p$q2 + 2, 128 * p$myid2 + 1, 3) + 1
*)
endif
call hpf_buffer_alloc(counter$b$27 * 8, send$buf$b$27)
call hpf_ptr_to_index(hpf$heap, send$buf$b$27, 8, send$buf
*$b$27$index)
C --< Pack Loop For Send For Nonlocal Read >--
counter$b$27 = 0
C
C Loop section ---[ max(((128 * p$q2) + 2), ((128 * p$myid2) + 1), 3
C) <= i2 <= min(((128 * p$q2) + 129), ((128 * p$myid2) + 128)), max(((256 * p$my
Cid1) + 1), 2) <= i1 <= min(((256 * p$myid1) + 256), 1023) ]---
C
if (p$q1 .eq. p$myid1 .and. p$myid2 .le. 1 + p$q2 .and. p$
*q2 .le. p$myid2) then
do i1 = max(256 * p$myid1 + 1, 2), min(256 * p$myid1 + 2
*56, 1023)
do i2 = max(128 * p$q2 + 2, 128 * p$myid2 + 1, 3), min
*(128 * p$q2 + 129, 128 * p$myid2 + 128)
hpf$heap$double(send$buf$b$27$index + counter$b$27)
*= b(b$data + i1 - (b$coord$0 * 256 + 1) + (i2 - (b$coord$1 * 128 +
* 1)) * b$local$extent$0)
counter$b$27 = counter$b$27 + 1
enddo
enddo
endif
if (counter$b$27 .gt. 0) then
call mpi_send(hpf$heap$double(send$buf$b$27$index), coun
*ter$b$27, MPI_DOUBLE_PRECISION, p$q2 + 8 * p$q1, 4, p$cmap, reques
*t, ierr)
endif
call hpf_buffer_free(send$buf$b$27)
endif
enddo
enddo
continue
continue
C
C Loop section ---[ 0 <= p$q2 <= 7, 0 <= p$q1 <= 3 ]---
C
do p$q1 = 0, 3
do p$q2 = 0, 7
if (p$myid1 .ne. p$q1 .or. p$myid2 .ne. p$q2) then
C --< Loop Counters >--
counter$b$24 = 0
if (p$q2 .eq. p$myid2 .and. max(256 * p$q1 + 1, 256 * p$my
*id1) .le. min(256 * p$q1 + 256, 256 * p$myid1 + 255, 1022) .and. m
*ax(128 * p$q2 + 1, 2) .le. min(128 * p$q2 + 128, 1023)) then
counter$b$24 = counter$b$24 + (min(256 * p$q1 + 256, 256
* * p$myid1 + 255, 1022) - max(256 * p$q1 + 1, 256 * p$myid1) + 1)
** (min(128 * p$q2 + 128, 1023) - max(128 * p$q2 + 1, 2) + 1)
endif
call hpf_buffer_alloc(counter$b$24 * 8, recv$buf$b$24)
call hpf_ptr_to_index(hpf$heap, recv$buf$b$24, 8, recv$buf
*$b$24$index)
if (counter$b$24 .gt. 0) then
call mpi_recv(hpf$heap$double(recv$buf$b$24$index), coun
*ter$b$24, MPI_DOUBLE_PRECISION, p$q2 + 8 * p$q1, 1, p$cmap, reques
*t, ierr)
endif
C --< Unpack Loop From Recv For Nonlocal Read >--
counter$b$24 = 0
C
C Loop section ---[ max(((128 * p$q2) + 1), 2) <= i2 <= min(((128 *
Cp$q2) + 128), 1023), max(((256 * p$q1) + 1), (256 * p$myid1)) <= i1 <= min(((25
C6 * p$q1) + 256), ((256 * p$myid1) + 255), 1022) ]---
C
if (p$q2 .eq. p$myid2) then
do i1 = max(256 * p$q1 + 1, 256 * p$myid1), min(256 * p$
*q1 + 256, 256 * p$myid1 + 255, 1022)
do i2 = max(128 * p$q2 + 1, 2), min(128 * p$q2 + 128,
*1023)
call hpf_nonlocal_insertd(hash$nonlocals, b$data, i1
* + i2 * b$global$extent$0, hpf$heap$double(recv$buf$b$24$index + c
*ounter$b$24))
counter$b$24 = counter$b$24 + 1
enddo
enddo
endif
call hpf_buffer_free(recv$buf$b$24)
endif
enddo
enddo
continue
C
C Loop section ---[ 0 <= p$q2 <= 7, 0 <= p$q1 <= 3 ]---
C
do p$q1 = 0, 3
do p$q2 = 0, 7
if (p$myid1 .ne. p$q1 .or. p$myid2 .ne. p$q2) then
C --< Loop Counters >--
counter$b$25 = 0
if (p$q2 .eq. p$myid2 .and. max(64 * p$q1 + 3, 256 * p$q1
*+ 1, 256 * p$myid1 + 2) .le. min(256 * p$q1 + 256, 256 * p$myid1 +
* 257) .and. max(128 * p$q2 + 1, 2) .le. min(128 * p$q2 + 128, 1023
*)) then
counter$b$25 = counter$b$25 + (min(256 * p$q1 + 256, 256
* * p$myid1 + 257) - max(64 * p$q1 + 3, 256 * p$q1 + 1, 256 * p$myi
*d1 + 2) + 1) * (min(128 * p$q2 + 128, 1023) - max(128 * p$q2 + 1,
*2) + 1)
endif
call hpf_buffer_alloc(counter$b$25 * 8, recv$buf$b$25)
call hpf_ptr_to_index(hpf$heap, recv$buf$b$25, 8, recv$buf
*$b$25$index)
if (counter$b$25 .gt. 0) then
call mpi_recv(hpf$heap$double(recv$buf$b$25$index), coun
*ter$b$25, MPI_DOUBLE_PRECISION, p$q2 + 8 * p$q1, 2, p$cmap, reques
*t, ierr)
endif
C --< Unpack Loop From Recv For Nonlocal Read >--
counter$b$25 = 0
C
C Loop section ---[ max(((128 * p$q2) + 1), 2) <= i2 <= min(((128 *
Cp$q2) + 128), 1023), max(((64 * p$q1) + 3), ((256 * p$q1) + 1), ((256 * p$myid1
C) + 2)) <= i1 <= min(((256 * p$q1) + 256), ((256 * p$myid1) + 257)) ]---
C
if (p$q2 .eq. p$myid2) then
do i1 = max(64 * p$q1 + 3, 256 * p$q1 + 1, 256 * p$myid1
* + 2), min(256 * p$q1 + 256, 256 * p$myid1 + 257)
do i2 = max(128 * p$q2 + 1, 2), min(128 * p$q2 + 128,
*1023)
call hpf_nonlocal_insertd(hash$nonlocals, b$data, i1
* + i2 * b$global$extent$0, hpf$heap$double(recv$buf$b$25$index + c
*ounter$b$25))
counter$b$25 = counter$b$25 + 1
enddo
enddo
endif
call hpf_buffer_free(recv$buf$b$25)
endif
enddo
enddo
continue
C
C Loop section ---[ 0 <= p$q2 <= 7, 0 <= p$q1 <= 3 ]---
C
do p$q1 = 0, 3
do p$q2 = 0, 7
if (p$myid1 .ne. p$q1 .or. p$myid2 .ne. p$q2) then
C --< Loop Counters >--
counter$b$26 = 0
if (p$q1 .eq. p$myid1 .and. p$q2 .le. p$myid2 .and. p$myid
*2 .le. 1 + p$q2 .and. max(256 * p$myid1 + 1, 2) .le. min(256 * p$m
*yid1 + 256, 1023) .and. max(128 * p$myid2, 128 * p$q2 + 1) .le. mi
*n(128 * p$myid2 + 127, 128 * p$q2 + 128, 1022)) then
counter$b$26 = counter$b$26 + (min(256 * p$myid1 + 256,
*1023) - max(256 * p$myid1 + 1, 2) + 1) * (min(128 * p$myid2 + 127,
* 128 * p$q2 + 128, 1022) - max(128 * p$myid2, 128 * p$q2 + 1) + 1)
endif
call hpf_buffer_alloc(counter$b$26 * 8, recv$buf$b$26)
call hpf_ptr_to_index(hpf$heap, recv$buf$b$26, 8, recv$buf
*$b$26$index)
if (counter$b$26 .gt. 0) then
call mpi_recv(hpf$heap$double(recv$buf$b$26$index), coun
*ter$b$26, MPI_DOUBLE_PRECISION, p$q2 + 8 * p$q1, 3, p$cmap, reques
*t, ierr)
endif
C --< Unpack Loop From Recv For Nonlocal Read >--
counter$b$26 = 0
C
C Loop section ---[ max((128 * p$myid2), ((128 * p$q2) + 1)) <= i2 <
C= min(((128 * p$myid2) + 127), ((128 * p$q2) + 128), 1022), max(((256 * p$myid1
C) + 1), 2) <= i1 <= min(((256 * p$myid1) + 256), 1023) ]---
C
if (p$q1 .eq. p$myid1 .and. p$q2 .le. p$myid2 .and. p$myid
*2 .le. 1 + p$q2) then
do i1 = max(256 * p$myid1 + 1, 2), min(256 * p$myid1 + 2
*56, 1023)
do i2 = max(128 * p$myid2, 128 * p$q2 + 1), min(128 *
*p$myid2 + 127, 128 * p$q2 + 128, 1022)
call hpf_nonlocal_insertd(hash$nonlocals, b$data, i1
* + i2 * b$global$extent$0, hpf$heap$double(recv$buf$b$26$index + c
*ounter$b$26))
counter$b$26 = counter$b$26 + 1
enddo
enddo
endif
call hpf_buffer_free(recv$buf$b$26)
endif
enddo
enddo
continue
C
C --<< Iterations that access only local values >>--
C
C
C Loop section ---[ ((256 * p$myid1) + 2) <= i <= ((256 * p$myid1) + 255),
C ((128 * p$myid2) + 2) <= j <= ((128 * p$myid2) + 127) ]---
C
do j = 128 * p$myid2 + 2, 128 * p$myid2 + 127
do i = 256 * p$myid1 + 2, 256 * p$myid1 + 255
a(a$data + i - (a$coord$0 * 256 + 1) + (j - (a$coord$1 * 128
* + 1)) * a$local$extent$0) = 0.25 * (b(b$data + i - 1 - (b$coord$0
* * 256 + 1) + (j - (b$coord$1 * 128 + 1)) * b$local$extent$0) + b(
*b$data + i + 1 - (b$coord$0 * 256 + 1) + (j - (b$coord$1 * 128 + 1
*)) * b$local$extent$0) + b(b$data + i - (b$coord$0 * 256 + 1) + (j
* - 1 - (b$coord$1 * 128 + 1)) * b$local$extent$0) + b(b$data + i -
* (b$coord$0 * 256 + 1) + (j + 1 - (b$coord$1 * 128 + 1)) * b$local
*$extent$0))
enddo
enddo
C
C Loop section ---[ 0 <= p$q2 <= 7, 0 <= p$q1 <= 3 ]---
C
do p$q1 = 0, 3
do p$q2 = 0, 7
if (p$myid1 .ne. p$q1 .or. p$myid2 .ne. p$q2) then
C --< Loop Counters >--
counter$b$27 = 0
if (p$q1 .eq. p$myid1 .and. p$q2 .le. 1 + p$myid2 .and. p$
*myid2 .le. p$q2 .and. max(256 * p$myid1 + 1, 2) .le. min(256 * p$m
*yid1 + 256, 1023) .and. max(128 * p$myid2 + 2, 128 * p$q2 + 1, 3)
*.le. min(128 * p$myid2 + 129, 128 * p$q2 + 128)) then
counter$b$27 = counter$b$27 + (min(256 * p$myid1 + 256,
*1023) - max(256 * p$myid1 + 1, 2) + 1) * (min(128 * p$myid2 + 129,
* 128 * p$q2 + 128) - max(128 * p$myid2 + 2, 128 * p$q2 + 1, 3) + 1
*)
endif
call hpf_buffer_alloc(counter$b$27 * 8, recv$buf$b$27)
call hpf_ptr_to_index(hpf$heap, recv$buf$b$27, 8, recv$buf
*$b$27$index)
if (counter$b$27 .gt. 0) then
call mpi_recv(hpf$heap$double(recv$buf$b$27$index), coun
*ter$b$27, MPI_DOUBLE_PRECISION, p$q2 + 8 * p$q1, 4, p$cmap, reques
*t, ierr)
endif
C --< Unpack Loop From Recv For Nonlocal Read >--
counter$b$27 = 0
C
C Loop section ---[ max(((128 * p$myid2) + 2), ((128 * p$q2) + 1), 3
C) <= i2 <= min(((128 * p$myid2) + 129), ((128 * p$q2) + 128)), max(((256 * p$my
Cid1) + 1), 2) <= i1 <= min(((256 * p$myid1) + 256), 1023) ]---
C
if (p$q1 .eq. p$myid1 .and. p$q2 .le. 1 + p$myid2 .and. p$
*myid2 .le. p$q2) then
do i1 = max(256 * p$myid1 + 1, 2), min(256 * p$myid1 + 2
*56, 1023)
do i2 = max(128 * p$myid2 + 2, 128 * p$q2 + 1, 3), min
*(128 * p$myid2 + 129, 128 * p$q2 + 128)
call hpf_nonlocal_insertd(hash$nonlocals, b$data, i1
* + i2 * b$global$extent$0, hpf$heap$double(recv$buf$b$27$index + c
*ounter$b$27))
counter$b$27 = counter$b$27 + 1
enddo
enddo
endif
call hpf_buffer_free(recv$buf$b$27)
endif
enddo
enddo
C
C --<< Iterations that read (but do not compute) non-local values >>--
C
C
C Loop section ---[ max(((256 * p$myid1) + 1), 2) <= i <= min(((256 * p$my
Cid1) + 256), 1023), j = ((128 * p$myid2) + 1) ]---
C
if (1 .le. p$myid2) then
do i = max(256 * p$myid1 + 1, 2), min(256 * p$myid1 + 256, 102
*3)
j = 128 * p$myid2 + 1
if (p$myid1 * 256 + 1 .le. i - 1 .and. i - 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp3 = b(b$data + i - 1 - (b$coord$0 * 256 + 1) + (j -
*(b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp3 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i -
* 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i + 1 .and. i + 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp9 = b(b$data + i + 1 - (b$coord$0 * 256 + 1) + (j -
*(b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp9 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i +
* 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j - 1 .and. j - 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp15 = b(b$data + i - (b$coord$0 * 256 + 1) + (j - 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp15 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j - 1) * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j + 1 .and. j + 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp21 = b(b$data + i - (b$coord$0 * 256 + 1) + (j + 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp21 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j + 1) * b$global$extent$0)
endif
a(a$data + i - (a$coord$0 * 256 + 1) + (j - (a$coord$1 * 128
* + 1)) * a$local$extent$0) = 0.25 * (lnltmp3 + lnltmp9 + lnltmp15
*+ lnltmp21)
enddo
endif
C
C Loop section ---[ i = ((256 * p$myid1) + 1), ((128 * p$myid2) + 2) <= j
C<= ((128 * p$myid2) + 127) ]---
C
C
C Loop section ---[ i = ((256 * p$myid1) + 256), ((128 * p$myid2) + 2) <=
Cj <= ((128 * p$myid2) + 127) ]---
C
if (1 .le. p$myid1 .and. p$myid1 .le. 2) then
do j = 128 * p$myid2 + 2, 128 * p$myid2 + 127
i = 256 * p$myid1 + 1
if (p$myid1 * 256 + 1 .le. i - 1 .and. i - 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp4 = b(b$data + i - 1 - (b$coord$0 * 256 + 1) + (j -
*(b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp4 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i -
* 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i + 1 .and. i + 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp10 = b(b$data + i + 1 - (b$coord$0 * 256 + 1) + (j -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp10 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j - 1 .and. j - 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp16 = b(b$data + i - (b$coord$0 * 256 + 1) + (j - 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp16 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j - 1) * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j + 1 .and. j + 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp22 = b(b$data + i - (b$coord$0 * 256 + 1) + (j + 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp22 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j + 1) * b$global$extent$0)
endif
a(a$data + i - (a$coord$0 * 256 + 1) + (j - (a$coord$1 * 128
* + 1)) * a$local$extent$0) = 0.25 * (lnltmp4 + lnltmp10 + lnltmp16
* + lnltmp22)
i = 256 * p$myid1 + 256
if (p$myid1 * 256 + 1 .le. i - 1 .and. i - 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp5 = b(b$data + i - 1 - (b$coord$0 * 256 + 1) + (j -
*(b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp5 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i -
* 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i + 1 .and. i + 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp11 = b(b$data + i + 1 - (b$coord$0 * 256 + 1) + (j -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp11 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j - 1 .and. j - 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp17 = b(b$data + i - (b$coord$0 * 256 + 1) + (j - 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp17 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j - 1) * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j + 1 .and. j + 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp23 = b(b$data + i - (b$coord$0 * 256 + 1) + (j + 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp23 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j + 1) * b$global$extent$0)
endif
a(a$data + i - (a$coord$0 * 256 + 1) + (j - (a$coord$1 * 128
* + 1)) * a$local$extent$0) = 0.25 * (lnltmp5 + lnltmp11 + lnltmp17
* + lnltmp23)
enddo
endif
C
C Loop section ---[ i = 769, ((128 * p$myid2) + 2) <= j <= ((128 * p$myid2
C) + 127) ]---
C
if (3 .le. p$myid1) then
do j = 128 * p$myid2 + 2, 128 * p$myid2 + 127
i = 769
if (p$myid1 * 256 + 1 .le. i - 1 .and. i - 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp6 = b(b$data + i - 1 - (b$coord$0 * 256 + 1) + (j -
*(b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp6 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i -
* 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i + 1 .and. i + 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp12 = b(b$data + i + 1 - (b$coord$0 * 256 + 1) + (j -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp12 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j - 1 .and. j - 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp18 = b(b$data + i - (b$coord$0 * 256 + 1) + (j - 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp18 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j - 1) * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j + 1 .and. j + 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp24 = b(b$data + i - (b$coord$0 * 256 + 1) + (j + 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp24 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j + 1) * b$global$extent$0)
endif
a(a$data + i - (a$coord$0 * 256 + 1) + (j - (a$coord$1 * 128
* + 1)) * a$local$extent$0) = 0.25 * (lnltmp6 + lnltmp12 + lnltmp18
* + lnltmp24)
enddo
endif
C
C Loop section ---[ i = 256, ((128 * p$myid2) + 2) <= j <= ((128 * p$myid2
C) + 127) ]---
C
if (p$myid1 .le. 0) then
do j = 128 * p$myid2 + 2, 128 * p$myid2 + 127
i = 256
if (p$myid1 * 256 + 1 .le. i - 1 .and. i - 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp1 = b(b$data + i - 1 - (b$coord$0 * 256 + 1) + (j -
*(b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp1 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i -
* 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i + 1 .and. i + 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp7 = b(b$data + i + 1 - (b$coord$0 * 256 + 1) + (j -
*(b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp7 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i +
* 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j - 1 .and. j - 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp13 = b(b$data + i - (b$coord$0 * 256 + 1) + (j - 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp13 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j - 1) * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j + 1 .and. j + 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp19 = b(b$data + i - (b$coord$0 * 256 + 1) + (j + 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp19 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j + 1) * b$global$extent$0)
endif
a(a$data + i - (a$coord$0 * 256 + 1) + (j - (a$coord$1 * 128
* + 1)) * a$local$extent$0) = 0.25 * (lnltmp1 + lnltmp7 + lnltmp13
*+ lnltmp19)
enddo
endif
C
C Loop section ---[ max(((256 * p$myid1) + 1), 2) <= i <= min(((256 * p$my
Cid1) + 256), 1023), j = ((128 * p$myid2) + 128) ]---
C
if (p$myid2 .le. 6) then
do i = max(256 * p$myid1 + 1, 2), min(256 * p$myid1 + 256, 102
*3)
j = 128 * p$myid2 + 128
if (p$myid1 * 256 + 1 .le. i - 1 .and. i - 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp2 = b(b$data + i - 1 - (b$coord$0 * 256 + 1) + (j -
*(b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp2 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i -
* 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i + 1 .and. i + 1 .lt. p$myid1 *
*256 + 257 .and. p$myid2 * 128 + 1 .le. j .and. j .lt. p$myid2 * 12
*8 + 129) then
lnltmp8 = b(b$data + i + 1 - (b$coord$0 * 256 + 1) + (j -
*(b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp8 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i +
* 1 + j * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j - 1 .and. j - 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp14 = b(b$data + i - (b$coord$0 * 256 + 1) + (j - 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp14 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j - 1) * b$global$extent$0)
endif
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 25
*7 .and. p$myid2 * 128 + 1 .le. j + 1 .and. j + 1 .lt. p$myid2 * 12
*8 + 129) then
lnltmp20 = b(b$data + i - (b$coord$0 * 256 + 1) + (j + 1 -
* (b$coord$1 * 128 + 1)) * b$local$extent$0)
else
lnltmp20 = hpf_nonlocal_lookupd(hash$nonlocals, b$data, i
*+ (j + 1) * b$global$extent$0)
endif
a(a$data + i - (a$coord$0 * 256 + 1) + (j - (a$coord$1 * 128
* + 1)) * a$local$extent$0) = 0.25 * (lnltmp2 + lnltmp8 + lnltmp14
*+ lnltmp20)
enddo
endif
C
C --<< Iterations that access only local values >>--
C
C
C Loop section ---[ max(((256 * p$myid1) + 1), 2) <= i <= min(((256 * p$my
Cid1) + 256), 1023), max(((128 * p$myid2) + 1), 2) <= j <= min(((128 * p$myid2)
C+ 128), 1023) ]---
C
do j = max(128 * p$myid2 + 1, 2), min(128 * p$myid2 + 128, 1023)
do i = max(256 * p$myid1 + 1, 2), min(256 * p$myid1 + 256, 102
*3)
b(b$data + i - (b$coord$0 * 256 + 1) + (j - (b$coord$1 * 128
* + 1)) * b$local$extent$0) = a(a$data + i - (a$coord$0 * 256 + 1)
*+ (j - (a$coord$1 * 128 + 1)) * a$local$extent$0)
enddo
enddo
call hpf_nonlocals_free(hash$nonlocals)
C -----------------------------
C finalize run-time descriptors
C -----------------------------
call hpf_procrtd_free(p$processors)
call hpf_tmplrtd_free(t$template)
call hpf_alignrtd_free(a$align)
call hpf_alignrtd_free(a$align$new)
call hpf_distrtd_free(a$dist)
call hpf_distrtd_free(a$dist$new)
call hpf_array_free(a, a$data, a$desc)
call hpf_arrayrtd_free(a$desc)
call hpf_alignrtd_free(b$align)
call hpf_alignrtd_free(b$align$new)
call hpf_distrtd_free(b$dist)
call hpf_distrtd_free(b$dist$new)
call hpf_array_free(b, b$data, b$desc)
call hpf_arrayrtd_free(b$desc)
C ---------------------------
C runtime system finalization
C ---------------------------
call mpi_finalize(ierr)
C
end