Find DBT queries
Opened this issue · 3 comments
bbrewington commented
In information schema jobs, identify the ones with the dbt comment header thing
and maybe parse it as Json? I need to make sure that if the dbt header fields change, it won’t goof up the output field schema as struct
maybe set up a macro to publish a UDF to parse the dbt stuff?
bbrewington commented
here's an example header for reference
(driven by this: https://github.com/bbrewington/gatech-cse6242-citibike/actions)
/* {"app": "dbt", "dbt_version": "1.3.0", "profile_name": "citibike_dbt", "target_name": "prod", "connection_name": "cse-6242-sp22-nyatl.information_schema"} */
with tables as (
select
sql stuff blah blah blah
bbrewington commented
Here's a reproducible example:
with j as (
select parse_json('{"app": "dbt", "dbt_version": "1.3.0", "profile_name": "citibike_dbt", "target_name": "prod", "connection_name": "cse-6242-sp22-nyatl.information_schema"}') as dbt_header
)
select
string(dbt_header.app) as app,
string(dbt_header.dbt_version) as dbt_version,
string(dbt_header.profile_name) as profile_name,
string(dbt_header.target_name) as target_name,
string(dbt_header.connection_name) as connection_name
from j
bbrewington commented
with a as (
select concat(
'{"app": "dbt", "dbt_version": "1.3.0", ',
'"profile_name": "citibike_dbt", "target_name": "prod", ',
'"connection_name": "cse-6242-sp22-nyatl.information_schema"}\n\n\n\n',
'select col1, col2 from table1 left join table2 on table2.a = table1.a'
) as object
),
split_vals as (
SELECT struct(
trim(split(b, ': ')[offset(0)], '"') as key,
trim(split(b, ': ')[offset(1)], '"') as value
) as dbt_header
from a
cross join unnest(REGEXP_EXTRACT_ALL(object, r'"[\w\d\.\-_]+": ?"[\w\d\.\-_]+"')) as b
)
select *
from split_vals