heterodb/pg-strom

gpu_cache有効化テーブルの列にSUMを実行するとCUDA_ERROR_ASSERTが発生

Closed this issue · 2 comments

SELECT SUM(a) FROM cache_test_table;

実行計画

postgres=# EXPLAIN SELECT SUM(a) FROM cache_test_table;
                                        QUERY PLAN                                        
------------------------------------------------------------------------------------------
 Aggregate  (cost=101.27..101.28 rows=1 width=8)
   ->  Custom Scan (GpuPreAgg) on cache_test_table  (cost=101.25..101.26 rows=1 width=32)
         GPU Projection: pgstrom.psum((a)::bigint)
         GPU Cache: GPU0 [phase: ready, max_num_rows: 10000]
(4 rows)

エラーメッセージ

ERROR:  gpu_service.c:2000  failed on cuEventSynchronize: CUDA_ERROR_ASSERT
HINT:  device at GPU-0, function at gpuservHandleGpuTaskExec

全体クエリ:

SET pg_strom.regression_test_mode = on;
SET client_min_messages = error;
DROP SCHEMA IF EXISTS gpu_cache_temp_test CASCADE;
CREATE SCHEMA gpu_cache_temp_test;
RESET client_min_messages;
SET search_path = gpu_cache_temp_test,public;
---
--- Creating a table on GPU cache
---
CREATE TABLE cache_test_table (
  id   int,
  a    int1
);
---
--- GPU Cache configuration
---
CREATE TRIGGER row_sync_test AFTER INSERT OR UPDATE OR DELETE ON cache_test_table FOR ROW 
    EXECUTE FUNCTION pgstrom.gpucache_sync_trigger('gpu_device_id=0,max_num_rows=10000,redo_buffer_size=150m,gpu_sync_threshold=10m,gpu_sync_interval=4');
ALTER TABLE cache_test_table ENABLE ALWAYS TRIGGER row_sync_test;
-- Make GPU cache 
INSERT INTO cache_test_table(id) values (1);
-- Check gpucache_info table.
SELECT config_options FROM pgstrom.gpucache_info WHERE table_name='cache_test_table' AND database_name=current_database();

TRUNCATE TABLE cache_test_table;
-- Force to use GPU Cache
SET enable_seqscan=off;
---
--- INSERT 
---
EXPLAIN (costs off, verbose)
INSERT INTO cache_test_table(id) values (1);


INSERT INTO cache_test_table (
  SELECT x 
  ,pgstrom.random_int(1,-128,127)     -- a int1
  FROM generate_series(1,4000) x
);
VACUUM ANALYZE;


SELECT SUM(a) FROM cache_test_table;

b73ff0bb98a868ecc524f3594d30f91ed8f1b013 で修正しました。

assert(x) の条件が間違っていたものですね。負の数が入力される事を考慮していませんでした。

確認しました。ありがとうございました。