HPF Example with Multiple Statement Groups
Generated Code
program alignloop
integer i
parameter (n = 1024)
CHPF$ processors p (4)
CHPF$ template t (n)
CHPF$ align a (i) with t(i)
CHPF$ align b (i) with t(i)
CHPF$ align c (i) with t(i)
CHPF$ distribute t(block) onto p
C --------------------------------------------------------------
C declarations for heap-based runtime dynamic storage allocation
C --------------------------------------------------------------
common /hpf$heap$common/ hpf$heap
dimension hpf$heap$integer(0:0)
equivalence (hpf$heap$integer(0), hpf$heap)
dimension hpf$heap$double(0:0)
equivalence (hpf$heap$double(0), hpf$heap)
#include
integer status(MPI_STATUS_SIZE)
C -----------------------------------------------
C declarations for compiler-generated temporaries
C -----------------------------------------------
save a, b, c
logical p$wrap
integer counter$c$3, send$buf$c$3$index, i1, p$q1
integer recv$buf$c$3$index, a$coord$0, b$coord$0, c$coord$0
integer p$myid1, hpf$heap$integer, p$cmap, p$dims, sendproc
integer recvproc, myid, ierr, request, n
integer*4 a$data, a$coord, b$data, b$coord, c$data, c$coord
integer*4 p$coord
real*8 hpf$heap$double, lnltmp1, lnltmp2, a, b, c, hpf$heap
integer*4 hash$nonlocals, send$buf$c$3, recv$buf$c$3, a$align
integer*4 a$dist, a$tmpl, a$proc, a$align$new, a$dist$new
integer*4 a$tmpl$new, a$proc$new, a$desc, b$align, b$dist
integer*4 b$tmpl, b$proc, b$align$new, b$dist$new, b$tmpl$new
integer*4 b$proc$new, b$desc, c$align, c$dist, c$tmpl, c$proc
integer*4 c$align$new, c$dist$new, c$tmpl$new, c$proc$new
integer*4 c$desc, t$template, t$proc, t$dist, p$processors
dimension a(0:0), b(0:0), c(0:0), p$dims(1:1), p$wrap(1:1)
C
C
C -----------------------------
C runtime system initialization
C -----------------------------
call mpi_init(ierr)
call mpi_comm_rank(MPI_COMM_WORLD, myid, ierr)
C -----------------------------------------------
C initializations for run-time descriptor indices
C -----------------------------------------------
a$align$new = 0
a$align = 0
b$align$new = 0
b$align = 0
c$align$new = 0
c$align = 0
t$dist = 0
a$dist$new = 0
a$dist = 0
b$dist$new = 0
b$dist = 0
c$dist$new = 0
c$dist = 0
a$desc = 0
b$desc = 0
c$desc = 0
C
C
C -------------------------------------
C building array descriptor for array a
C -------------------------------------
call hpf_arrayrtd_alloc(1, 2, a$desc)
call hpf_arrayrtd_setdim(a$desc, 0, 1, 1024)
C
C -------------------------------------
C building array descriptor for array b
C -------------------------------------
call hpf_arrayrtd_alloc(1, 2, b$desc)
call hpf_arrayrtd_setdim(b$desc, 0, 1, 1024)
C
C -------------------------------------
C building array descriptor for array c
C -------------------------------------
call hpf_arrayrtd_alloc(1, 2, c$desc)
call hpf_arrayrtd_setdim(c$desc, 0, 1, 1024)
C
C ------------------------------------
C building tmpl descriptor: template t
C ------------------------------------
call hpf_tmplrtd_alloc(1, t$template)
call hpf_tmplrtd_setdim(t$template, 0, 1, 1024)
C
C ---------------------------------------
C building procs descriptor: processors p
C ---------------------------------------
call hpf_procrtd_alloc(1, p$processors)
call hpf_procrtd_setdim(p$processors, 0, 1, 4)
C
C ----------------------------------------------------
C initialize processor topology for processors array p
C ----------------------------------------------------
p$dims(1) = 4
p$wrap(1) = .false.
call mpi_cart_create(MPI_COMM_WORLD, 1, p$dims, p$wrap, .false.,
* p$cmap, ierr)
call hpf_procrtd_get_coords(hpf$heap, p$processors, p$coord)
call mpi_cart_coords(p$cmap, myid, 1, hpf$heap$integer(p$coord),
* ierr)
p$myid1 = hpf$heap$integer(p$coord + 0)
call hpf_procrtd_set_chandle(p$processors, p$cmap)
C
C -----------------------------------------------
C building dist descriptor: distribute template t
C -----------------------------------------------
call hpf_distrtd_alloc(1, t$dist)
call hpf_distrtd_setdim(t$dist, 0, -1, 256, 0)
t$proc = p$processors
C
C --------------------------------------------
C building dist descriptor: distribute array a
C --------------------------------------------
call hpf_distrtd_clone(t$dist, a$dist$new)
a$proc$new = t$proc
C
C --------------------------------------------
C building dist descriptor: distribute array b
C --------------------------------------------
call hpf_distrtd_clone(t$dist, b$dist$new)
b$proc$new = t$proc
C
C --------------------------------------------
C building dist descriptor: distribute array c
C --------------------------------------------
call hpf_distrtd_clone(t$dist, c$dist$new)
c$proc$new = t$proc
C
C -----------------------------------------
C building align descriptor: align c with t
C -----------------------------------------
call hpf_alignrtd_alloc(1, 1, c$align$new)
call hpf_alignrtd_setdim_src(c$align$new, 0, 0)
call hpf_alignrtd_setdim_tmpl(c$align$new, 0, 0, 1, 0, 0)
C
C ------------------------------------------------------------
C establish template and processor association: align c with t
C ------------------------------------------------------------
c$tmpl$new = t$template
c$proc$new = t$proc
C
C -----------------------------------------
C building align descriptor: align b with t
C -----------------------------------------
call hpf_alignrtd_alloc(1, 1, b$align$new)
call hpf_alignrtd_setdim_src(b$align$new, 0, 0)
call hpf_alignrtd_setdim_tmpl(b$align$new, 0, 0, 1, 0, 0)
C
C ------------------------------------------------------------
C establish template and processor association: align b with t
C ------------------------------------------------------------
b$tmpl$new = t$template
b$proc$new = t$proc
C
C -----------------------------------------
C building align descriptor: align a with t
C -----------------------------------------
call hpf_alignrtd_alloc(1, 1, a$align$new)
call hpf_alignrtd_setdim_src(a$align$new, 0, 0)
call hpf_alignrtd_setdim_tmpl(a$align$new, 0, 0, 1, 0, 0)
C
C ------------------------------------------------------------
C establish template and processor association: align a with t
C ------------------------------------------------------------
a$tmpl$new = t$template
a$proc$new = t$proc
C
C ------------------------------------------------------------------------
C allocate or redistribute array a; compute array-indexed processor coords
C ------------------------------------------------------------------------
call hpf_array_remap(a, a$align, a$dist, a$tmpl, a$proc, a$align
*$new, a$dist$new, a$tmpl$new, a$proc$new, a$desc, a$data)
C
C -----------------------------------------------------------------------
C map array-indexed processor coordinates to partitioned array dimensions
C -----------------------------------------------------------------------
call hpf_arrayrtd_get_coords(hpf$heap, a$desc, a$coord)
a$coord$0 = hpf$heap$integer(a$coord + 0)
C
C ------------------------------------------------------------------------
C allocate or redistribute array b; compute array-indexed processor coords
C ------------------------------------------------------------------------
call hpf_array_remap(b, b$align, b$dist, b$tmpl, b$proc, b$align
*$new, b$dist$new, b$tmpl$new, b$proc$new, b$desc, b$data)
C
C -----------------------------------------------------------------------
C map array-indexed processor coordinates to partitioned array dimensions
C -----------------------------------------------------------------------
call hpf_arrayrtd_get_coords(hpf$heap, b$desc, b$coord)
b$coord$0 = hpf$heap$integer(b$coord + 0)
C
C ------------------------------------------------------------------------
C allocate or redistribute array c; compute array-indexed processor coords
C ------------------------------------------------------------------------
call hpf_array_remap(c, c$align, c$dist, c$tmpl, c$proc, c$align
*$new, c$dist$new, c$tmpl$new, c$proc$new, c$desc, c$data)
C
C -----------------------------------------------------------------------
C map array-indexed processor coordinates to partitioned array dimensions
C -----------------------------------------------------------------------
call hpf_arrayrtd_get_coords(hpf$heap, c$desc, c$coord)
c$coord$0 = hpf$heap$integer(c$coord + 0)
C
call hpf_nonlocals_alloc(hash$nonlocals)
C
C Loop section ---[ i = ((256 * p$myid1) + 1) ]---
C
if (1 .le. p$myid1) then
i = 256 * p$myid1 + 1
b(b$data + i - (b$coord$0 * 256 + 1)) = 2 * a(a$data + i - (a$
*coord$0 * 256 + 1))
endif
C
C Loop section ---[ ((256 * p$myid1) + 2) <= i <= ((256 * p$myid1) + 256)
C]---
C
C
C Loop section ---[ ((256 * p$myid1) + 2) <= i <= ((256 * p$myid1) + 256)
C]---
C
do i = 256 * p$myid1 + 2, 256 * p$myid1 + 256
b(b$data + i - (b$coord$0 * 256 + 1)) = 2 * a(a$data + i - (a$
*coord$0 * 256 + 1))
lnltmp1 = a(a$data + i - 1 - (a$coord$0 * 256 + 1)) + b(b$data
* + i - 1 - (b$coord$0 * 256 + 1))
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 257)
* then
c(c$data + i - (c$coord$0 * 256 + 1)) = lnltmp1
else
call hpf_nonlocal_insertd(hash$nonlocals, c$data, i, lnltmp1
*)
endif
enddo
C
C Loop section ---[ i = ((256 * p$myid1) + 257) ]---
C
if (p$myid1 .le. 2) then
i = 256 * p$myid1 + 257
lnltmp2 = a(a$data + i - 1 - (a$coord$0 * 256 + 1)) + b(b$data
* + i - 1 - (b$coord$0 * 256 + 1))
if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 257)
* then
c(c$data + i - (c$coord$0 * 256 + 1)) = lnltmp2
else
call hpf_nonlocal_insertd(hash$nonlocals, c$data, i, lnltmp2
*)
endif
endif
C
C Loop section ---[ 0 <= p$q1 <= 3 ]---
C
do p$q1 = 0, 3
if (p$myid1 .ne. p$q1) then
C --< Loop Counters >--
counter$c$3 = 0
if (max(256 * p$q1 + 1, 256 * p$myid1 + 2) .le. min(256 * p$
*q1 + 256, 256 * p$myid1 + 257)) then
counter$c$3 = counter$c$3 + min(256 * p$q1 + 256, 256 * p$
*myid1 + 257) - max(256 * p$q1 + 1, 256 * p$myid1 + 2) + 1
endif
call hpf_buffer_alloc(counter$c$3 * 8, send$buf$c$3)
call hpf_ptr_to_index(hpf$heap, send$buf$c$3, 8, send$buf$c$
*3$index)
C --< Pack Loop For Send For Nonlocal Write >--
counter$c$3 = 0
C
C Loop section ---[ max(((256 * p$q1) + 1), ((256 * p$myid1) + 2)) <=
Ci1 <= min(((256 * p$q1) + 256), ((256 * p$myid1) + 257)) ]---
C
do i1 = max(256 * p$q1 + 1, 256 * p$myid1 + 2), min(256 * p$
*q1 + 256, 256 * p$myid1 + 257)
hpf$heap$double(send$buf$c$3$index + counter$c$3) = hpf_no
*nlocal_lookupd(hash$nonlocals, c$data, i1)
counter$c$3 = counter$c$3 + 1
enddo
if (counter$c$3 .gt. 0) then
call mpi_send(hpf$heap$double(send$buf$c$3$index), counter
*$c$3, MPI_DOUBLE_PRECISION, p$q1, 1, p$cmap, request, ierr)
endif
call hpf_buffer_free(send$buf$c$3)
endif
enddo
continue
continue
C
C Loop section ---[ 0 <= p$q1 <= 3 ]---
C
do p$q1 = 0, 3
if (p$myid1 .ne. p$q1) then
C --< Loop Counters >--
counter$c$3 = 0
if (max(256 * p$myid1 + 1, 256 * p$q1 + 2) .le. min(256 * p$
*myid1 + 256, 256 * p$q1 + 257)) then
counter$c$3 = counter$c$3 + min(256 * p$myid1 + 256, 256 *
* p$q1 + 257) - max(256 * p$myid1 + 1, 256 * p$q1 + 2) + 1
endif
call hpf_buffer_alloc(counter$c$3 * 8, recv$buf$c$3)
call hpf_ptr_to_index(hpf$heap, recv$buf$c$3, 8, recv$buf$c$
*3$index)
if (counter$c$3 .gt. 0) then
call mpi_recv(hpf$heap$double(recv$buf$c$3$index), counter
*$c$3, MPI_DOUBLE_PRECISION, p$q1, 1, p$cmap, request, ierr)
endif
C --< Unpack Loop From Recv For Nonlocal Write >--
counter$c$3 = 0
C
C Loop section ---[ max(((256 * p$myid1) + 1), ((256 * p$q1) + 2)) <=
Ci1 <= min(((256 * p$myid1) + 256), ((256 * p$q1) + 257)) ]---
C
do i1 = max(256 * p$myid1 + 1, 256 * p$q1 + 2), min(256 * p$
*myid1 + 256, 256 * p$q1 + 257)
c(c$data + i1 - (c$coord$0 * 256 + 1)) = hpf$heap$double(r
*ecv$buf$c$3$index + counter$c$3)
counter$c$3 = counter$c$3 + 1
enddo
call hpf_buffer_free(recv$buf$c$3)
endif
enddo
call hpf_nonlocals_free(hash$nonlocals)
C -----------------------------
C finalize run-time descriptors
C -----------------------------
call hpf_procrtd_free(p$processors)
call hpf_tmplrtd_free(t$template)
call hpf_alignrtd_free(a$align)
call hpf_alignrtd_free(a$align$new)
call hpf_distrtd_free(a$dist)
call hpf_distrtd_free(a$dist$new)
call hpf_array_free(a, a$data, a$desc)
call hpf_arrayrtd_free(a$desc)
call hpf_alignrtd_free(b$align)
call hpf_alignrtd_free(b$align$new)
call hpf_distrtd_free(b$dist)
call hpf_distrtd_free(b$dist$new)
call hpf_array_free(b, b$data, b$desc)
call hpf_arrayrtd_free(b$desc)
call hpf_alignrtd_free(c$align)
call hpf_alignrtd_free(c$align$new)
call hpf_distrtd_free(c$dist)
call hpf_distrtd_free(c$dist$new)
call hpf_array_free(c, c$data, c$desc)
call hpf_arrayrtd_free(c$desc)
C ---------------------------
C runtime system finalization
C ---------------------------
call mpi_finalize(ierr)
C
end