mrnorman/YAKL

I am trying Fortran and C Plus Plus Interoperability with YAKL and I want to try it supports non-1 lower bounds.

voyage-li opened this issue · 1 comments

Fortran codes:

program wrapper
   use gator_mod
   implicit none
   integer :: n1,n2,n3
   real(8),contiguous,pointer :: a(:,:)=> null()
   real(8),contiguous,pointer :: b(:,:)=> null()
   real(8),contiguous,pointer :: c(:,:)=> null()
   integer :: i,j ,l

   interface init
      subroutine add(a,b,c, n1,n2,l) bind(C,name="add")
         use iso_c_binding
         real(8), dimension(*) :: a,b,c
         integer :: n1,n2,n3,l
      end subroutine
   end interface
   call gator_init()

   l= -1
   n1 = 1
   n2 = 2

   allocate(a(l:n1,l:n2))
   allocate(b(l:n1,l:n2))
   allocate(c(l:n1,l:n2))

   do i = l, n1
      do j = l, n2
         a(i,j ) =i*100+j*10
         b(i,j ) = i*1+j*0.1
         c(i,j ) = 1.0
      end do
   end do

   do i = l, n1
      do j = l, n2
         print *,i,j ,a(i,j ),b(i,j ),c(i,j )
      end do
   end do
   call add(a,b,c,n1,n2,l)
   do i = l, n1
      do j = l, n2
         if(a(i,j ) /= b(i,j )/c(i,j )) then
            print *,i,j ,a(i,j ),b(i,j ),c(i,j )
         end if
      end do
   end do
   call gator_finalize()

end program wrapper

C++ codes:

#include <iostream>
#include "YAKL.h"

typedef double real;

typedef yakl::Array<real, 2, yakl::memHost, yakl::styleFortran> realHost2d;
typedef yakl::Array<real, 2, yakl::memDevice, yakl::styleFortran> real2d;

void die(std::string msg)
{
    yakl::yakl_throw(msg.c_str());
}

extern "C" void add(real *a_p, real *b_p, real *c_p, int &n1, int &n2, int &l)
{
    realHost2d a_host("a_host", a_p, {l, n1}, {l, n2});
    realHost2d b_host("b_host", b_p, {l, n1}, {l, n2});
    realHost2d c_host("c_host", c_p, {l, n1}, {l, n2});

    real2d a("a", {l, n1}, {l, n2});
    real2d b("b", {l, n1}, {l, n2});
    real2d c("c", {l, n1}, {l, n2});

    a_host.deep_copy_to(a);
    b_host.deep_copy_to(b);
    c_host.deep_copy_to(c);

    yakl::fortran::parallel_for(
        yakl::fortran::Bounds<2>({l, n1}, {l, n2}), YAKL_LAMBDA(int j, int i) {
            a(i, j) = b(i, j) / c(i, j);
        });

    printf("Finish!\n");

    a.deep_copy_to(a_host);
    b.deep_copy_to(b_host);
    c.deep_copy_to(c_host);
    yakl::fence();
}

output:

Using YAKL Timers
Using memory pool. Initial size: 4.92383GB ;  Grow size: 4.92383GB.
NVIDIA A100-PCIE-40GB
          -1          -1  -110.00000000000000       -1.1000000238418579        1.0000000000000000
          -1           0  -100.00000000000000       -1.0000000000000000        1.0000000000000000
          -1           1  -90.000000000000000      -0.89999997615814209        1.0000000000000000
          -1           2  -80.000000000000000      -0.80000001192092896        1.0000000000000000
           0          -1  -10.000000000000000      -0.10000000149011612        1.0000000000000000
           0           0   0.0000000000000000        0.0000000000000000        1.0000000000000000
           0           1   10.000000000000000       0.10000000149011612        1.0000000000000000
           0           2   20.000000000000000       0.20000000298023224        1.0000000000000000
           1          -1   90.000000000000000       0.89999997615814209        1.0000000000000000
           1           0   100.00000000000000        1.0000000000000000        1.0000000000000000
           1           1   110.00000000000000        1.1000000238418579        1.0000000000000000
           1           2   120.00000000000000        1.2000000476837158        1.0000000000000000
Finish!
           0           2   20.000000000000000       0.20000000298023224        1.0000000000000000
           1           2   120.00000000000000        1.2000000476837158        1.0000000000000000
Pool Memory High Water Mark:       384
Pool Memory High Water Efficiency: 7.26321e-08

You see when i = 0 j = 2 andi =1 j = 2 is not doing the right calculation. Is there something wrong with my codes

I mistaked i j place