matmul.floopy doesn't seem to work
certik opened this issue · 2 comments
certik commented
I am using the latest loopy, which I installed into a Conda environment using "pip install .", I also had to install "fparser":
$ loopy matmul.floopy
Traceback (most recent call last):
File "/Users/ondrej/mambaforge/envs/loopy/bin/loopy", line 5, in <module>
loopy.cli.main()
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/cli.py", line 196, in main
cgr = lp.generate_code_v2(t_unit)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/__init__.py", line 629, in generate_code_v2
cgr = generate_code_for_a_single_kernel(program[func_id],
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/__init__.py", line 416, in generate_code_for_a_single_kernel
codegen_result = generate_host_or_device_program(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/result.py", line 339, in generate_host_or_device_program
codegen_result = build_loop_nest(codegen_state, schedule_index)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/control.py", line 203, in build_loop_nest
inner = generate_code_for_sched_index(codegen_state, schedule_index)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/control.py", line 54, in generate_code_for_sched_index
codegen_result = generate_host_or_device_program(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/result.py", line 323, in generate_host_or_device_program
temp_decls = ast_builder.get_temporary_decls(codegen_state, schedule_index)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/c/__init__.py", line 902, in get_temporary_decls
decl = self.get_temporary_var_declarator(codegen_state, tv)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/c/__init__.py", line 1084, in get_temporary_var_declarator
temp_var_decl = self.get_array_base_declarator(temp_var)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/opencl.py", line 753, in get_array_base_declarator
return POD(self, dtype, ary.name)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/c/__init__.py", line 277, in __init__
self.ctype = ast_builder.target.dtype_to_typename(dtype)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/c/__init__.py", line 442, in dtype_to_typename
return self.get_dtype_registry().dtype_to_ctype(dtype)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/pytools/__init__.py", line 777, in wrapper
result = function(obj, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/opencl.py", line 576, in get_dtype_registry
from loopy.target.c.compyte.dtypes import (DTypeRegistry,
ModuleNotFoundError: No module named 'loopy.target.c.compyte'
It seems to happen with the C target as well:
$ loopy --target=c matmul.floopy
Traceback (most recent call last):
File "/Users/ondrej/mambaforge/envs/loopy/bin/loopy", line 5, in <module>
loopy.cli.main()
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/cli.py", line 196, in main
cgr = lp.generate_code_v2(t_unit)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/__init__.py", line 629, in generate_code_v2
cgr = generate_code_for_a_single_kernel(program[func_id],
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/__init__.py", line 416, in generate_code_for_a_single_kernel
codegen_result = generate_host_or_device_program(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/result.py", line 339, in generate_host_or_device_program
codegen_result = build_loop_nest(codegen_state, schedule_index)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/control.py", line 203, in build_loop_nest
inner = generate_code_for_sched_index(codegen_state, schedule_index)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/control.py", line 54, in generate_code_for_sched_index
codegen_result = generate_host_or_device_program(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/codegen/result.py", line 323, in generate_host_or_device_program
temp_decls = ast_builder.get_temporary_decls(codegen_state, schedule_index)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/c/__init__.py", line 902, in get_temporary_decls
decl = self.get_temporary_var_declarator(codegen_state, tv)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/c/__init__.py", line 1084, in get_temporary_var_declarator
temp_var_decl = self.get_array_base_declarator(temp_var)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/c/__init__.py", line 1004, in get_array_base_declarator
arg_decl = POD(self, ary.dtype, ary.name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/c/__init__.py", line 277, in __init__
self.ctype = ast_builder.target.dtype_to_typename(dtype)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/c/__init__.py", line 442, in dtype_to_typename
return self.get_dtype_registry().dtype_to_ctype(dtype)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/pytools/__init__.py", line 777, in wrapper
result = function(obj, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ondrej/mambaforge/envs/loopy/lib/python3.11/site-packages/loopy/target/c/__init__.py", line 1345, in get_dtype_registry
from loopy.target.c.compyte.dtypes import (
ModuleNotFoundError: No module named 'loopy.target.c.compyte'
matthiasdiener commented
Did you check out the submodule when cloning the loopy repo? (e.g., git submodule update --init
).
certik commented
Ah, I didn't! After checking out the submodule, everything works now:
$ loopy matmul.floopy
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
#define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \
MACRO_NAME(int8, char) \
MACRO_NAME(int16, short) \
MACRO_NAME(int32, int) \
MACRO_NAME(int64, long)
#define LOOPY_DEFINE_FLOOR_DIV_POS_B(SUFFIX, TYPE) \
inline TYPE loopy_floor_div_pos_b_##SUFFIX(TYPE a, TYPE b) \
{ \
if (a<0) \
a = a - (b-1); \
return a/b; \
}
LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B)
#undef LOOPY_DEFINE_FLOOR_DIV_POS_B
#undef LOOPY_CALL_WITH_INTEGER_TYPES
__kernel void __attribute__ ((reqd_work_group_size(8, 16, 1))) dgemm(int const m, int const n, int const l, double const alpha, __global double const *__restrict__ a, __global double const *__restrict__ b, __global double *__restrict__ c)
{
__local double a_acc_0[16 * 32];
__local double b_acc_0[32 * 8];
if (-1 + l >= 0)
for (int k_outer = 0; k_outer <= -1 + loopy_floor_div_pos_b_int32(31 + l, 32); ++k_outer)
{
barrier(CLK_LOCAL_MEM_FENCE) /* for a_acc_0 (a_acc rev-depends on insn0) */;
if (-1 + -1 * lid(0) + -16 * gid(0) + m >= 0 && -1 + -32 * k_outer + -1 * lid(1) + l >= 0)
for (int i2_outer = 0; i2_outer <= ((-32 + l + -32 * k_outer >= 0) ? 1 : -1 + -1 * lid(1) + -2 * k_outer + (15 + l + 15 * lid(1)) / 16); ++i2_outer)
for (int i1_outer = 0; i1_outer <= ((-16 + m + -16 * gid(0) >= 0) ? 1 : -1 + -1 * lid(0) + -2 * gid(0) + (7 + m + 7 * lid(0)) / 8); ++i1_outer)
a_acc_0[32 * (8 * i1_outer + lid(0)) + 16 * i2_outer + lid(1)] = a[16 * gid(0) + 8 * i1_outer + lid(0) + m * (32 * k_outer + 16 * i2_outer + lid(1))];
if (-1 + -32 * k_outer + -1 * lid(0) + l >= 0 && -1 + -1 * lid(1) + -8 * gid(1) + n >= 0 && 7 + -1 * lid(1) >= 0)
for (int i1_outer_0 = 0; i1_outer_0 <= ((-33 + l + -32 * k_outer >= 0) ? 3 : -1 + -1 * lid(0) + -4 * k_outer + (7 + l + 7 * lid(0)) / 8); ++i1_outer_0)
b_acc_0[8 * (8 * i1_outer_0 + lid(0)) + lid(1)] = b[32 * k_outer + 8 * i1_outer_0 + lid(0) + l * (8 * gid(1) + lid(1))];
barrier(CLK_LOCAL_MEM_FENCE) /* for a_acc_0 (insn0 depends on a_acc) */;
if (-1 + -1 * lid(0) + -8 * gid(1) + n >= 0 && -1 + -1 * lid(1) + -16 * gid(0) + m >= 0)
for (int k_inner = 0; k_inner <= ((-32 + l + -32 * k_outer >= 0) ? 31 : -1 + l + -32 * k_outer); ++k_inner)
c[16 * gid(0) + lid(1) + m * (8 * gid(1) + lid(0))] = c[16 * gid(0) + lid(1) + m * (8 * gid(1) + lid(0))] + alpha * b_acc_0[8 * k_inner + lid(0)] * a_acc_0[32 * lid(1) + k_inner];
}
}%
Thanks @matthiasdiener !