I am trying Fortran and C Plus Plus Interoperability with YAKL and I want to try it supports non-1 lower bounds.
voyage-li opened this issue · 1 comments
voyage-li commented
Fortran codes:
program wrapper
use gator_mod
implicit none
integer :: n1,n2,n3
real(8),contiguous,pointer :: a(:,:)=> null()
real(8),contiguous,pointer :: b(:,:)=> null()
real(8),contiguous,pointer :: c(:,:)=> null()
integer :: i,j ,l
interface init
subroutine add(a,b,c, n1,n2,l) bind(C,name="add")
use iso_c_binding
real(8), dimension(*) :: a,b,c
integer :: n1,n2,n3,l
end subroutine
end interface
call gator_init()
l= -1
n1 = 1
n2 = 2
allocate(a(l:n1,l:n2))
allocate(b(l:n1,l:n2))
allocate(c(l:n1,l:n2))
do i = l, n1
do j = l, n2
a(i,j ) =i*100+j*10
b(i,j ) = i*1+j*0.1
c(i,j ) = 1.0
end do
end do
do i = l, n1
do j = l, n2
print *,i,j ,a(i,j ),b(i,j ),c(i,j )
end do
end do
call add(a,b,c,n1,n2,l)
do i = l, n1
do j = l, n2
if(a(i,j ) /= b(i,j )/c(i,j )) then
print *,i,j ,a(i,j ),b(i,j ),c(i,j )
end if
end do
end do
call gator_finalize()
end program wrapper
C++ codes:
#include <iostream>
#include "YAKL.h"
typedef double real;
typedef yakl::Array<real, 2, yakl::memHost, yakl::styleFortran> realHost2d;
typedef yakl::Array<real, 2, yakl::memDevice, yakl::styleFortran> real2d;
void die(std::string msg)
{
yakl::yakl_throw(msg.c_str());
}
extern "C" void add(real *a_p, real *b_p, real *c_p, int &n1, int &n2, int &l)
{
realHost2d a_host("a_host", a_p, {l, n1}, {l, n2});
realHost2d b_host("b_host", b_p, {l, n1}, {l, n2});
realHost2d c_host("c_host", c_p, {l, n1}, {l, n2});
real2d a("a", {l, n1}, {l, n2});
real2d b("b", {l, n1}, {l, n2});
real2d c("c", {l, n1}, {l, n2});
a_host.deep_copy_to(a);
b_host.deep_copy_to(b);
c_host.deep_copy_to(c);
yakl::fortran::parallel_for(
yakl::fortran::Bounds<2>({l, n1}, {l, n2}), YAKL_LAMBDA(int j, int i) {
a(i, j) = b(i, j) / c(i, j);
});
printf("Finish!\n");
a.deep_copy_to(a_host);
b.deep_copy_to(b_host);
c.deep_copy_to(c_host);
yakl::fence();
}
output:
Using YAKL Timers
Using memory pool. Initial size: 4.92383GB ; Grow size: 4.92383GB.
NVIDIA A100-PCIE-40GB
-1 -1 -110.00000000000000 -1.1000000238418579 1.0000000000000000
-1 0 -100.00000000000000 -1.0000000000000000 1.0000000000000000
-1 1 -90.000000000000000 -0.89999997615814209 1.0000000000000000
-1 2 -80.000000000000000 -0.80000001192092896 1.0000000000000000
0 -1 -10.000000000000000 -0.10000000149011612 1.0000000000000000
0 0 0.0000000000000000 0.0000000000000000 1.0000000000000000
0 1 10.000000000000000 0.10000000149011612 1.0000000000000000
0 2 20.000000000000000 0.20000000298023224 1.0000000000000000
1 -1 90.000000000000000 0.89999997615814209 1.0000000000000000
1 0 100.00000000000000 1.0000000000000000 1.0000000000000000
1 1 110.00000000000000 1.1000000238418579 1.0000000000000000
1 2 120.00000000000000 1.2000000476837158 1.0000000000000000
Finish!
0 2 20.000000000000000 0.20000000298023224 1.0000000000000000
1 2 120.00000000000000 1.2000000476837158 1.0000000000000000
Pool Memory High Water Mark: 384
Pool Memory High Water Efficiency: 7.26321e-08
You see when i = 0 j = 2
andi =1 j = 2
is not doing the right calculation. Is there something wrong with my codes
voyage-li commented
I mistaked i j place