Support for the STAC API free-text extension
hrodmn opened this issue · 2 comments
The STAC API free-text extension enables a user to provide a parameter q
with some free-text search query parameters. This is perhaps redundant with the filter extension, but it is much easier to write a text query using the q
parameter. e.g. landsat OR sentinel
to get records that contain the term sentinel
or landsat
, landsat AND sentinel
to get records that contain both terms.
Adding support for the q
parameter would be particularly useful for improving the collection search functionality!
Postgres has a rich set of functions for text search operations: https://www.postgresql.org/docs/current/functions-textsearch.html
websearch_to_tsquery
comes really close to doing all of the work for us, but it can't handle AND
operators or parenthesized queries. To solve this we could write a function that converts a q
query into a tsquery
object:
CREATE OR REPLACE FUNCTION q_to_tsquery (input text)
RETURNS tsquery
AS $$
DECLARE
processed_text text;
BEGIN
-- replace commas outside quoted text with " | "
processed_text := regexp_replace(input, ',(?=(?:[^"]*"[^"]*")*[^"]*$)', ' | ', 'g');
-- replace the logical operators with tsquery equivalents
processed_text := regexp_replace(processed_text, '\s+AND\s+', ' & ', 'g');
processed_text := regexp_replace(processed_text, '\s+OR\s+', ' | ', 'g');
-- surround quoted text contents with single quotes for to_tsquery syntax
processed_text := regexp_replace(processed_text, '"([^"]+)"', '''\1''', 'g');
RETURN to_tsquery(processed_text);
END;
$$
LANGUAGE plpgsql;
Then we could do a query on this simulated collections table like this:
SELECT
collections.id,
collections.title,
collections.description,
ts_rank(
setweight(
to_tsvector('english', coalesce(collections.title, '')), 'A'
) || setweight(
to_tsvector('english', coalesce(collections.description, '')), 'B'
),
query
) AS rank
FROM
collections,
q_to_tsquery (q) AS query
WHERE (
setweight(to_tsvector('english', coalesce(collections.title, '')), 'A') ||
setweight(to_tsvector('english', coalesce(collections.description, '')), 'B')
) @@ query
ORDER BY rank DESC;
CREATE OR REPLACE FUNCTION stac_search_to_where(j jsonb) RETURNS text AS $$
DECLARE
where_segments text[];
_where text;
dtrange tstzrange;
collections text[];
geom geometry;
sdate timestamptz;
edate timestamptz;
filterlang text;
filter jsonb = j->'filter';
ft_query tsquery;
BEGIN
RAISE DEBUG 'STAC SEARCH_TO_WHERE: %', j;
IF j ? 'ids' THEN
where_segments := where_segments || format('id = ANY (%L) ', to_text_array(j->'ids'));
END IF;
IF j ? 'collections' THEN
collections := to_text_array(j->'collections');
where_segments := where_segments || format('collection = ANY (%L) ', collections);
END IF;
IF j ? 'datetime' THEN
dtrange := parse_dtrange(j->'datetime');
sdate := lower(dtrange);
edate := upper(dtrange);
where_segments := where_segments || format(' datetime <= %L::timestamptz AND end_datetime >= %L::timestamptz ',
edate,
sdate
);
END IF;
IF j ? 'q' THEN
ft_query := q_to_tsquery(j->>'q');
where_segments := where_segments || format( $quote$
( to_tsvector('english', content->'properties'->>'description') ||
to_tsvector('english', content->'properties'->>'title') ||
to_tsvector('english', content->'properties'->'keywords')
) @@ %L
$quote$,
ft_query
);
END IF;
geom := stac_geom(j);
IF geom IS NOT NULL THEN
where_segments := where_segments || format('st_intersects(geometry, %L)',geom);
END IF;
filterlang := COALESCE(
j->>'filter-lang',
get_setting('default_filter_lang', j->'conf')
);
IF NOT filter @? '$.**.op' THEN
filterlang := 'cql-json';
END IF;
IF filterlang NOT IN ('cql-json','cql2-json') AND j ? 'filter' THEN
RAISE EXCEPTION '% is not a supported filter-lang. Please use cql-json or cql2-json.', filterlang;
END IF;
IF j ? 'query' AND j ? 'filter' THEN
RAISE EXCEPTION 'Can only use either query or filter at one time.';
END IF;
IF j ? 'query' THEN
filter := query_to_cql2(j->'query');
ELSIF filterlang = 'cql-json' THEN
filter := cql1_to_cql2(filter);
END IF;
RAISE DEBUG 'FILTER: %', filter;
where_segments := where_segments || cql2_query(filter);
IF cardinality(where_segments) < 1 THEN
RETURN ' TRUE ';
END IF;
_where := array_to_string(array_remove(where_segments, NULL), ' AND ');
IF _where IS NULL OR BTRIM(_where) = '' THEN
RETURN ' TRUE ';
END IF;
RETURN _where;
END;
$$ LANGUAGE PLPGSQL STABLE;