Add support for query filtering on array based properties
pavelhoral opened this issue · 1 comments
Our current data layer does not support query filters on array properties. For example the filter tags eq 'hello'
should be able to match object { "tags": ["hello", "world"] }
. This behavior is defined in wrensec-commons.
It is not that obvious such feature is necessary in Wren:IDM. That is why I am creating this GitHub issue to open the discussion.
Of course one of the main issues here is to actually come up with a way to generate database schema and SQL queries that would not bomb the performance while not introducing unnecessary complexity. I have played with few ideas in PostgreSQL before abandoning it... nevertheless here is my script that can provide a starting point in a potential future attempts:
-- Create basic schema for property objects
CREATE TABLE objects (
id BIGINT NOT NULL PRIMARY KEY,
type_id INTEGER NOT NULL,
objectid VARCHAR(255) NOT NULL,
CONSTRAINT idx_objects UNIQUE (type_id, objectid)
);
CREATE TABLE props (
id BIGINT NOT NULL,
propkey VARCHAR(255) NOT NULL,
proptype VARCHAR(32),
propvalue VARCHAR(65535),
CONSTRAINT fk_objects FOREIGN KEY (id) REFERENCES objects (id) ON DELETE CASCADE ON UPDATE NO ACTION
);
CREATE INDEX idx_prop_id ON props (id);
CREATE INDEX idx_prop_values ON props (propkey, propvalue);
-- Fill in few objects
INSERT INTO objects VALUES (1, 1, 'test');
INSERT INTO props VALUES
(1, '/foo', 'java.lang.String', 'FOOBAR'),
(1, '/bar/0', 'java.lang.String', 'first'),
(1, '/bar/1', 'java.lang.String', 'second'),
(1, '/bar/2', 'java.lang.String', 'third'),
(1, '/bar/4', 'java.lang.String', 'second'),
(1, '/baz/x', 'java.lang.String', 'tic'),
(1, '/baz/y', 'java.lang.String', 'tac'),
(1, '/baz/z', 'java.lang.String', 'toe');
-- Fill in a lot of random stuff so that we are not testing empty tables
INSERT INTO objects (SELECT generate_series(1,100000), 1, md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/0', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/1', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/2', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/3', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/4', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/5', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/6', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/7', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/8', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/9', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/10', 'java.lang.String', md5(random()::text));
-- This will produce duplicated result (this is what current data layer generates)
EXPLAIN ANALYZE SELECT obj.* FROM objects obj
LEFT JOIN props p1 ON p1.id = obj.id AND p1.propkey LIKE '/bar/%'
WHERE p1.proptype = 'java.lang.String' AND p1.propvalue LIKE 'sec%'
ORDER BY obj.objectid;
-- This works, but is not what current data layer generates
EXPLAIN ANALYZE SELECT obj.* FROM objects obj
WHERE
id IN (
SELECT id FROM props prop WHERE prop.propkey LIKE '/bar/%' AND
prop.proptype = 'java.lang.String' AND prop.propvalue LIKE 'sec%'
)
ORDER BY obj.objectid;
DROP TABLE props;
DROP TABLE objects;