Add support for query filtering on array based properties

Question

Add support for query filtering on array based properties

pavelhoral opened this issue a year ago · 1 comments

Our current data layer does not support query filters on array properties. For example the filter tags eq 'hello' should be able to match object { "tags": ["hello", "world"] }. This behavior is defined in wrensec-commons.

It is not that obvious such feature is necessary in Wren:IDM. That is why I am creating this GitHub issue to open the discussion.

Answer 1 · 2024-02-02T09:50:09.000Z

Of course one of the main issues here is to actually come up with a way to generate database schema and SQL queries that would not bomb the performance while not introducing unnecessary complexity. I have played with few ideas in PostgreSQL before abandoning it... nevertheless here is my script that can provide a starting point in a potential future attempts:

-- Create basic schema for property objects
CREATE TABLE objects (
  id BIGINT NOT NULL PRIMARY KEY,
  type_id INTEGER NOT NULL,
  objectid VARCHAR(255) NOT NULL,
  CONSTRAINT idx_objects UNIQUE (type_id, objectid)
);

CREATE TABLE props (
  id BIGINT NOT NULL,
  propkey VARCHAR(255) NOT NULL,
  proptype VARCHAR(32),
  propvalue VARCHAR(65535),
  CONSTRAINT fk_objects FOREIGN KEY (id) REFERENCES objects (id) ON DELETE CASCADE ON UPDATE NO ACTION
);
CREATE INDEX idx_prop_id ON props (id);
CREATE INDEX idx_prop_values ON props (propkey, propvalue);


-- Fill in few objects
INSERT INTO objects VALUES (1, 1, 'test');
INSERT INTO props VALUES
(1, '/foo', 'java.lang.String', 'FOOBAR'),
(1, '/bar/0', 'java.lang.String', 'first'),
(1, '/bar/1', 'java.lang.String', 'second'),
(1, '/bar/2', 'java.lang.String', 'third'),
(1, '/bar/4', 'java.lang.String', 'second'),
(1, '/baz/x', 'java.lang.String', 'tic'),
(1, '/baz/y', 'java.lang.String', 'tac'),
(1, '/baz/z', 'java.lang.String', 'toe');

-- Fill in a lot of random stuff so that we are not testing empty tables
INSERT INTO objects (SELECT generate_series(1,100000), 1, md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/0', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/1', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/2', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/3', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/4', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/5', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/6', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/7', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/8', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/9', 'java.lang.String', md5(random()::text));
INSERT INTO props (SELECT generate_series(1,100000), '/bar/10', 'java.lang.String', md5(random()::text));

-- This will produce duplicated result (this is what current data layer generates)
EXPLAIN ANALYZE SELECT obj.* FROM objects obj
LEFT JOIN props p1 ON p1.id = obj.id AND p1.propkey LIKE '/bar/%'
WHERE p1.proptype = 'java.lang.String' AND p1.propvalue LIKE 'sec%'
ORDER BY obj.objectid;

-- This works, but is not what current data layer generates
EXPLAIN ANALYZE SELECT obj.* FROM objects obj
WHERE 
  id IN (
    SELECT id FROM props prop WHERE prop.propkey LIKE '/bar/%' AND 
    prop.proptype = 'java.lang.String' AND prop.propvalue LIKE 'sec%'
  )
ORDER BY obj.objectid;

DROP TABLE props;
DROP TABLE objects;