HPF Example with Multiple Statement Groups
Generated Code


      program alignloop
        integer i
        parameter (n = 1024)
CHPF$   processors p (4) 
CHPF$   template t (n) 
CHPF$   align a (i) with t(i)
CHPF$   align b (i) with t(i)
CHPF$   align c (i) with t(i)
CHPF$   distribute t(block) onto p
C       --------------------------------------------------------------
C       declarations for heap-based runtime dynamic storage allocation
C       --------------------------------------------------------------
        common /hpf$heap$common/ hpf$heap
        dimension hpf$heap$integer(0:0)
        equivalence (hpf$heap$integer(0), hpf$heap)
        dimension hpf$heap$double(0:0)
        equivalence (hpf$heap$double(0), hpf$heap)
#include 
        integer status(MPI_STATUS_SIZE)
C       -----------------------------------------------
C       declarations for compiler-generated temporaries
C       -----------------------------------------------
        save a, b, c
        logical p$wrap
        integer counter$c$3, send$buf$c$3$index, i1, p$q1
        integer recv$buf$c$3$index, a$coord$0, b$coord$0, c$coord$0
        integer p$myid1, hpf$heap$integer, p$cmap, p$dims, sendproc
        integer recvproc, myid, ierr, request, n
        integer*4 a$data, a$coord, b$data, b$coord, c$data, c$coord
        integer*4 p$coord
        real*8 hpf$heap$double, lnltmp1, lnltmp2, a, b, c, hpf$heap
        integer*4 hash$nonlocals, send$buf$c$3, recv$buf$c$3, a$align
        integer*4 a$dist, a$tmpl, a$proc, a$align$new, a$dist$new
        integer*4 a$tmpl$new, a$proc$new, a$desc, b$align, b$dist
        integer*4 b$tmpl, b$proc, b$align$new, b$dist$new, b$tmpl$new
        integer*4 b$proc$new, b$desc, c$align, c$dist, c$tmpl, c$proc
        integer*4 c$align$new, c$dist$new, c$tmpl$new, c$proc$new
        integer*4 c$desc, t$template, t$proc, t$dist, p$processors
        dimension a(0:0), b(0:0), c(0:0), p$dims(1:1), p$wrap(1:1)
C       
C       
C       -----------------------------
C       runtime system initialization
C       -----------------------------
        call mpi_init(ierr)
        call mpi_comm_rank(MPI_COMM_WORLD, myid, ierr)
C       -----------------------------------------------
C       initializations for run-time descriptor indices
C       -----------------------------------------------
        a$align$new = 0
        a$align = 0
        b$align$new = 0
        b$align = 0
        c$align$new = 0
        c$align = 0
        t$dist = 0
        a$dist$new = 0
        a$dist = 0
        b$dist$new = 0
        b$dist = 0
        c$dist$new = 0
        c$dist = 0
        a$desc = 0
        b$desc = 0
        c$desc = 0
C       
C       
C       -------------------------------------
C       building array descriptor for array a
C       -------------------------------------
        call hpf_arrayrtd_alloc(1, 2, a$desc)
        call hpf_arrayrtd_setdim(a$desc, 0, 1, 1024)
C       
C       -------------------------------------
C       building array descriptor for array b
C       -------------------------------------
        call hpf_arrayrtd_alloc(1, 2, b$desc)
        call hpf_arrayrtd_setdim(b$desc, 0, 1, 1024)
C       
C       -------------------------------------
C       building array descriptor for array c
C       -------------------------------------
        call hpf_arrayrtd_alloc(1, 2, c$desc)
        call hpf_arrayrtd_setdim(c$desc, 0, 1, 1024)
C       
C       ------------------------------------
C       building tmpl descriptor: template t
C       ------------------------------------
        call hpf_tmplrtd_alloc(1, t$template)
        call hpf_tmplrtd_setdim(t$template, 0, 1, 1024)
C       
C       ---------------------------------------
C       building procs descriptor: processors p
C       ---------------------------------------
        call hpf_procrtd_alloc(1, p$processors)
        call hpf_procrtd_setdim(p$processors, 0, 1, 4)
C       
C       ----------------------------------------------------
C       initialize processor topology for processors array p
C       ----------------------------------------------------
        p$dims(1) = 4
        p$wrap(1) = .false.
        call mpi_cart_create(MPI_COMM_WORLD, 1, p$dims, p$wrap, .false.,
     * p$cmap, ierr)
        call hpf_procrtd_get_coords(hpf$heap, p$processors, p$coord)
        call mpi_cart_coords(p$cmap, myid, 1, hpf$heap$integer(p$coord),
     * ierr)
        p$myid1 = hpf$heap$integer(p$coord + 0)
        call hpf_procrtd_set_chandle(p$processors, p$cmap)
C       
C       -----------------------------------------------
C       building dist descriptor: distribute template t
C       -----------------------------------------------
        call hpf_distrtd_alloc(1, t$dist)
        call hpf_distrtd_setdim(t$dist, 0, -1, 256, 0)
        t$proc = p$processors
C       
C       --------------------------------------------
C       building dist descriptor: distribute array a
C       --------------------------------------------
        call hpf_distrtd_clone(t$dist, a$dist$new)
        a$proc$new = t$proc
C       
C       --------------------------------------------
C       building dist descriptor: distribute array b
C       --------------------------------------------
        call hpf_distrtd_clone(t$dist, b$dist$new)
        b$proc$new = t$proc
C       
C       --------------------------------------------
C       building dist descriptor: distribute array c
C       --------------------------------------------
        call hpf_distrtd_clone(t$dist, c$dist$new)
        c$proc$new = t$proc
C       
C       -----------------------------------------
C       building align descriptor: align c with t
C       -----------------------------------------
        call hpf_alignrtd_alloc(1, 1, c$align$new)
        call hpf_alignrtd_setdim_src(c$align$new, 0, 0)
        call hpf_alignrtd_setdim_tmpl(c$align$new, 0, 0, 1, 0, 0)
C       
C       ------------------------------------------------------------
C       establish template and processor association: align c with t
C       ------------------------------------------------------------
        c$tmpl$new = t$template
        c$proc$new = t$proc
C       
C       -----------------------------------------
C       building align descriptor: align b with t
C       -----------------------------------------
        call hpf_alignrtd_alloc(1, 1, b$align$new)
        call hpf_alignrtd_setdim_src(b$align$new, 0, 0)
        call hpf_alignrtd_setdim_tmpl(b$align$new, 0, 0, 1, 0, 0)
C       
C       ------------------------------------------------------------
C       establish template and processor association: align b with t
C       ------------------------------------------------------------
        b$tmpl$new = t$template
        b$proc$new = t$proc
C       
C       -----------------------------------------
C       building align descriptor: align a with t
C       -----------------------------------------
        call hpf_alignrtd_alloc(1, 1, a$align$new)
        call hpf_alignrtd_setdim_src(a$align$new, 0, 0)
        call hpf_alignrtd_setdim_tmpl(a$align$new, 0, 0, 1, 0, 0)
C       
C       ------------------------------------------------------------
C       establish template and processor association: align a with t
C       ------------------------------------------------------------
        a$tmpl$new = t$template
        a$proc$new = t$proc
C       
C       ------------------------------------------------------------------------
C       allocate or redistribute array a; compute array-indexed processor coords
C       ------------------------------------------------------------------------
        call hpf_array_remap(a, a$align, a$dist, a$tmpl, a$proc, a$align
     *$new, a$dist$new, a$tmpl$new, a$proc$new, a$desc, a$data)
C       
C       -----------------------------------------------------------------------
C       map array-indexed processor coordinates to partitioned array dimensions
C       -----------------------------------------------------------------------
        call hpf_arrayrtd_get_coords(hpf$heap, a$desc, a$coord)
        a$coord$0 = hpf$heap$integer(a$coord + 0)
C       
C       ------------------------------------------------------------------------
C       allocate or redistribute array b; compute array-indexed processor coords
C       ------------------------------------------------------------------------
        call hpf_array_remap(b, b$align, b$dist, b$tmpl, b$proc, b$align
     *$new, b$dist$new, b$tmpl$new, b$proc$new, b$desc, b$data)
C       
C       -----------------------------------------------------------------------
C       map array-indexed processor coordinates to partitioned array dimensions
C       -----------------------------------------------------------------------
        call hpf_arrayrtd_get_coords(hpf$heap, b$desc, b$coord)
        b$coord$0 = hpf$heap$integer(b$coord + 0)
C       
C       ------------------------------------------------------------------------
C       allocate or redistribute array c; compute array-indexed processor coords
C       ------------------------------------------------------------------------
        call hpf_array_remap(c, c$align, c$dist, c$tmpl, c$proc, c$align
     *$new, c$dist$new, c$tmpl$new, c$proc$new, c$desc, c$data)
C       
C       -----------------------------------------------------------------------
C       map array-indexed processor coordinates to partitioned array dimensions
C       -----------------------------------------------------------------------
        call hpf_arrayrtd_get_coords(hpf$heap, c$desc, c$coord)
        c$coord$0 = hpf$heap$integer(c$coord + 0)
C       
        call hpf_nonlocals_alloc(hash$nonlocals)
C       
C       Loop section ---[ i = ((256 * p$myid1) + 1) ]---
C       
        if (1 .le. p$myid1) then
          i = 256 * p$myid1 + 1
          b(b$data + i - (b$coord$0 * 256 + 1)) = 2 * a(a$data + i - (a$
     *coord$0 * 256 + 1))
        endif
C
C       Loop section ---[ ((256 * p$myid1) + 2) <= i <= ((256 * p$myid1) + 256) 
C]---
C       
C       
C       Loop section ---[ ((256 * p$myid1) + 2) <= i <= ((256 * p$myid1) + 256) 
C]---
C       
        do i = 256 * p$myid1 + 2, 256 * p$myid1 + 256
          b(b$data + i - (b$coord$0 * 256 + 1)) = 2 * a(a$data + i - (a$
     *coord$0 * 256 + 1))
          lnltmp1 = a(a$data + i - 1 - (a$coord$0 * 256 + 1)) + b(b$data
     * + i - 1 - (b$coord$0 * 256 + 1))
          if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 257)
     * then
            c(c$data + i - (c$coord$0 * 256 + 1)) = lnltmp1
          else
            call hpf_nonlocal_insertd(hash$nonlocals, c$data, i, lnltmp1
     *)
          endif
        enddo
C       
C       Loop section ---[ i = ((256 * p$myid1) + 257) ]---
C       
        if (p$myid1 .le. 2) then
          i = 256 * p$myid1 + 257
          lnltmp2 = a(a$data + i - 1 - (a$coord$0 * 256 + 1)) + b(b$data
     * + i - 1 - (b$coord$0 * 256 + 1))
          if (p$myid1 * 256 + 1 .le. i .and. i .lt. p$myid1 * 256 + 257)
     * then
            c(c$data + i - (c$coord$0 * 256 + 1)) = lnltmp2
          else
            call hpf_nonlocal_insertd(hash$nonlocals, c$data, i, lnltmp2
     *)
          endif
        endif
C       
C       Loop section ---[ 0 <= p$q1 <= 3 ]---
C       
        do p$q1 = 0, 3
          if (p$myid1 .ne. p$q1) then
C           --< Loop Counters >--
            counter$c$3 = 0
            if (max(256 * p$q1 + 1, 256 * p$myid1 + 2) .le. min(256 * p$
     *q1 + 256, 256 * p$myid1 + 257)) then
              counter$c$3 = counter$c$3 + min(256 * p$q1 + 256, 256 * p$
     *myid1 + 257) - max(256 * p$q1 + 1, 256 * p$myid1 + 2) + 1
            endif
            call hpf_buffer_alloc(counter$c$3 * 8, send$buf$c$3)
            call hpf_ptr_to_index(hpf$heap, send$buf$c$3, 8, send$buf$c$
     *3$index)
C           --< Pack Loop For Send For Nonlocal Write >--
            counter$c$3 = 0
C           
C           Loop section ---[ max(((256 * p$q1) + 1), ((256 * p$myid1) + 2)) <= 
Ci1 <= min(((256 * p$q1) + 256), ((256 * p$myid1) + 257)) ]---
C           
            do i1 = max(256 * p$q1 + 1, 256 * p$myid1 + 2), min(256 * p$
     *q1 + 256, 256 * p$myid1 + 257)
              hpf$heap$double(send$buf$c$3$index + counter$c$3) = hpf_no
     *nlocal_lookupd(hash$nonlocals, c$data, i1)
              counter$c$3 = counter$c$3 + 1
            enddo
            if (counter$c$3 .gt. 0) then
              call mpi_send(hpf$heap$double(send$buf$c$3$index), counter
     *$c$3, MPI_DOUBLE_PRECISION, p$q1, 1, p$cmap, request, ierr)
            endif
            call hpf_buffer_free(send$buf$c$3)
          endif
        enddo
        continue
        continue
C       
C       Loop section ---[ 0 <= p$q1 <= 3 ]---
C       
        do p$q1 = 0, 3
          if (p$myid1 .ne. p$q1) then
C           --< Loop Counters >--
            counter$c$3 = 0
            if (max(256 * p$myid1 + 1, 256 * p$q1 + 2) .le. min(256 * p$
     *myid1 + 256, 256 * p$q1 + 257)) then
              counter$c$3 = counter$c$3 + min(256 * p$myid1 + 256, 256 *
     * p$q1 + 257) - max(256 * p$myid1 + 1, 256 * p$q1 + 2) + 1
            endif
            call hpf_buffer_alloc(counter$c$3 * 8, recv$buf$c$3)
            call hpf_ptr_to_index(hpf$heap, recv$buf$c$3, 8, recv$buf$c$
     *3$index)
            if (counter$c$3 .gt. 0) then
              call mpi_recv(hpf$heap$double(recv$buf$c$3$index), counter
     *$c$3, MPI_DOUBLE_PRECISION, p$q1, 1, p$cmap, request, ierr)
            endif
C           --< Unpack Loop From Recv For Nonlocal Write >--
            counter$c$3 = 0
C           
C           Loop section ---[ max(((256 * p$myid1) + 1), ((256 * p$q1) + 2)) <= 
Ci1 <= min(((256 * p$myid1) + 256), ((256 * p$q1) + 257)) ]---
C           
            do i1 = max(256 * p$myid1 + 1, 256 * p$q1 + 2), min(256 * p$
     *myid1 + 256, 256 * p$q1 + 257)
              c(c$data + i1 - (c$coord$0 * 256 + 1)) = hpf$heap$double(r
     *ecv$buf$c$3$index + counter$c$3)
              counter$c$3 = counter$c$3 + 1
            enddo
            call hpf_buffer_free(recv$buf$c$3)
          endif
        enddo
        call hpf_nonlocals_free(hash$nonlocals)
C       -----------------------------
C       finalize run-time descriptors
C       -----------------------------
        call hpf_procrtd_free(p$processors)
        call hpf_tmplrtd_free(t$template)
        call hpf_alignrtd_free(a$align)
        call hpf_alignrtd_free(a$align$new)
        call hpf_distrtd_free(a$dist)
        call hpf_distrtd_free(a$dist$new)
        call hpf_array_free(a, a$data, a$desc)
        call hpf_arrayrtd_free(a$desc)
        call hpf_alignrtd_free(b$align)
        call hpf_alignrtd_free(b$align$new)
        call hpf_distrtd_free(b$dist)
        call hpf_distrtd_free(b$dist$new)
        call hpf_array_free(b, b$data, b$desc)
        call hpf_arrayrtd_free(b$desc)
        call hpf_alignrtd_free(c$align)
        call hpf_alignrtd_free(c$align$new)
        call hpf_distrtd_free(c$dist)
        call hpf_distrtd_free(c$dist$new)
        call hpf_array_free(c, c$data, c$desc)
        call hpf_arrayrtd_free(c$desc)
C       ---------------------------
C       runtime system finalization
C       ---------------------------
        call mpi_finalize(ierr)
C       
      end