LEFT JOIN has poor/inconsistent performance
muntdan opened this issue · 5 comments
What's wrong?
I have 2 CTE and than a UNION ALL.
WITH CTE1 as (SELECT * from Table1 Left Join Table2 where targetid in (1,2,3)), CTE2 as (SELECT col1 from CTE1, left join lateral (select col2 from table2 where targetid =cte1.targetid) on true where point is null ) --this one has 0 ROWS SELECT * FROM CTE1 WHERE point IS NOT NULL UNION ALL SELECT * FROM CTE2
all selects are < 1 second and without UNION ALL separately they work ok.
When UNION ALL is applied a CTE scan operation makes a Index scan on Table2 and query takes > 6 seconds
Re-categorizing as a possible performance enhancement, though this sounds like a planner issue? I think you could force a table scan using pg_hint_plan, so perhaps there is a workaround for you.
Please let us know if the query results were inaccurate, in which case it's certainly a bug we want to investigate.
Strange enough this now takes 6 seconds no matter with or without the union so it might be from the join inside the first CTE which separately take ~1 sec but when joined they take 6 seconds
Join performance is highly dependent on a lot of factors; there's nothing specific enough here for us to look into. If you can provide an example/test case we could investigate further.
Here are the scripts to reproduce:
CREATE TABLE ten (id int, tenant text, targetid int);
INSERT INTO ten VALUES (1, 'a', 101),(2, 'a', 102),(3, 'b', 201),(4, 'b', 202),
(5, 'c', 301),(6, 'c', 302),(7, 'd', 401),(8, 'd', 402),(9, 'e', 501),(10, 'e', 502);
CREATE TABLE tp (id int, targetid int, tenant text, datetime timestamp without time zone, speed int,
CONSTRAINT tp_pkey PRIMARY KEY (id, tenant, datetime) ) PARTITION BY LIST (tenant);
DO $$
DECLARE t text; d timestamp;
BEGIN
FOR t IN SELECT distinct tenant FROM ten LOOP
raise notice 'tp_ten%', t;
EXECUTE 'CREATE TABLE '|| 'tp_ten' || t || ' PARTITION OF tp FOR VALUES IN ('''|| t ||''') PARTITION BY RANGE(datetime);';
FOR d IN SELECT * FROM generate_series('2023-12-01 00:00:00'::timestamp, '2023-12-31 23:59:59'::timestamp, '1 hour') LOOP
raise notice 'day_hour:%', TO_CHAR( d, 'DD_HH24' );
EXECUTE 'CREATE TABLE '|| 'tp_ten' || t || '_' || TO_CHAR( d, 'DD_HH24' ) || ' PARTITION OF '
|| 'tp_ten' || t || ' FOR VALUES FROM (''' || d || ''') TO ('''|| d + INTERVAL '1 hour' ||''') USING columnar;';
END LOOP;
END LOOP;
END $$;
INSERT INTO tp
SELECT rnd.id, ten.targetid,ten.tenant, rnd.datetime, rnd.speed from
(SELECT row_number() over () as id,
generate_series('2023-12-01 00:00:00', '2023-12-31 23:59:59', '1 sec'::interval)::timestamp as datetime,
floor(random() * 100 + 1)::int as speed,
floor(random() * 10 + 1)::int as random) rnd inner join ten on rnd.random=ten.id;
select count(*) from tp --2.678.400
CREATE TABLE ev (id int, targetid int, tenant text, datetime timestamp without time zone, code text,
CONSTRAINT ev_pkey PRIMARY KEY (id, tenant, datetime) ) PARTITION BY LIST (tenant);
DO $$
DECLARE t text; d timestamp;
BEGIN
FOR t IN SELECT distinct tenant FROM ten LOOP
raise notice 'ev_ten%', t;
EXECUTE 'CREATE TABLE '|| 'ev_ten' || t || ' PARTITION OF ev FOR VALUES IN ('''|| t ||''') PARTITION BY RANGE(datetime);';
FOR d IN SELECT * FROM generate_series('2023-12-01 00:00:00'::timestamp, '2023-12-31 23:59:59'::timestamp, '1 hour') LOOP
raise notice 'day_hour:%', TO_CHAR( d, 'DD_HH24' );
EXECUTE 'CREATE TABLE '|| 'ev_ten' || t || '_' || TO_CHAR( d, 'DD_HH24' ) || ' PARTITION OF '
|| 'ev_ten' || t || ' FOR VALUES FROM (''' || d || ''') TO ('''|| d + INTERVAL '1 hour' ||''') USING columnar;';
END LOOP;
END LOOP;
END $$;
INSERT INTO ev
SELECT row_number() over () as id, t.targetid, t.tenant, t.datetime + floor(random() * 5)::int * interval '1 seconds' ,
(array['start','stop','break','engineon','engineoff'])[floor(random() * 5 + 1)] as code
FROM ( SELECT *, row_number() OVER(ORDER BY id ASC) AS row FROM tp) t
WHERE t.row % 5 = 0;
select count(*) from ev --535.680
--1.4sec:
SELECT tp.targetid, tp.datetime, tp.speed FROM tp WHERE tp.tenant = 'a' AND tp.datetime between '2023-12-01 00:00:00' AND '2023-12-31 23:59:59' AND tp.targetid IN (102)
--1 sec:
SELECT ev.targetid, ev.datetime, ev.code FROM ev WHERE ev.tenant = 'a' AND ev.datetime between '2023-12-01 00:00:00' AND '2023-12-31 23:59:59' AND ev.targetid IN (102)
--16sec:
SELECT ev.targetid, ev.datetime, ev.code, tp.speed
FROM ev LEFT JOIN tp on ev.targetid = tp.targetid AND ev.datetime = tp.datetime AND ev.tenant = tp.tenant
WHERE ev.tenant = 'a' AND ev.datetime between '2023-12-01 00:00:00' AND '2023-12-31 23:59:59' AND ev.targetid IN (102)
AND tp.tenant = 'a' AND tp.datetime between '2023-12-01 00:00:00' AND '2023-12-31 23:59:59' AND tp.targetid IN (102)
Do you need any more information from me ?