ldbc/ldbc_snb_docs

Add new data set sizes

szarnyasg opened this issue · 1 comments

Use something along the lines of:

import duckdb

con = duckdb.connect()

for entity in ["static/Organisation", "static/Place", "static/Tag", "static/TagClass", "dynamic/Comment", "dynamic/Comment_hasTag_Tag", "dynamic/Forum", "dynamic/Forum_hasMember_Person", "dynamic/Forum_hasTag_Tag", "dynamic/Person", "dynamic/Person_hasInterest_Tag", "dynamic/Person_knows_Person", "dynamic/Person_likes_Comment", "dynamic/Person_likes_Post", "dynamic/Person_studyAt_University", "dynamic/Person_workAt_Company", "dynamic/Post", "dynamic/Post_hasTag_Tag"]:
    print(entity.replace("_", "\_"), end=" ")
    for sf in [1, 3, 10, 30, 100, 300, 1000, 3000, 10000]:
    #for sf in [1, 3]:
        con.execute(f"select count(*) from read_parquet('bi-sf{sf}-raw/graphs/parquet/raw/composite-merged-fk/{entity}/*.parquet')")
        result = con.fetchone()
        print(f"& \\numprint{{{result[0]}}} ", end="")
    
    print("\\\\")

Resolved by bcef34a.