BigQuery library for python with pandas.DataFrame.
- Python 3.5 or later
bqlite.read_bq
is less memory intensive thanpandas.io.gbq.read_gbq
- About pandas(v0.19.2): Memory usage of
final_df = concat(dataframe_list, ignore_index=True)
part is very large
- If pandas.dataframe of loading has nan or None
bqlite.to_bq
load to bigQuery as NULL value. - About
google.cloud.bigquery.table.Table.insert_data
can not write NULL value.
# Master branch
$ git clone https://github.com/tomomotofactory/BigQuery-Pandas-Lite.git
$ python setup.py install
# Master branch
$ pip install git+https://github.com/tomomotofactory/BigQuery-Pandas-Lite.git
# Specific tag (or branch, commit hash)
$ pip install git+https://github.com/tomomotofactory/BigQuery-Pandas-Lite@v1.0.0
from bqlite import BQLite
bq = BQLite()
df = bq.read_bq(sql='SELECT * FROM `nyc-tlc.green.trips_2014`',
project='input_your_project_id')
# print pandas.DataFrame
print(df)
from bqlite import BQLite
bq = BQLite()
bq.to_bq(load_df=load_pandas_dataframe, project_name='your_project',
dataset_name='your_dataset', table_name='your_table')
from bqlite import BQLite
bq = BQLite()
bq.read_to_bq(sql='SELECT * FROM `nyc-tlc.green.trips_2014`',
project_name='input_your_project_id',
write_dataset_name='input_write_project_id',
write_table_name='input_write_table_name')
from bqlite import BQLite
bq = BQLite()
bq.create_table(schema_df=schema_pandas_dataframe, project_name='your_project',
dataset_name='your_dataset', table_name='your_table')
from bqlite import BQLite
bq = BQLite()
bq.delete_table(project_name='your_project', dataset_name='your_dataset', table_name='your_table')
from bqlite import BQLite
import google.auth
credentials, df_project = google.auth.default()
bq = BQLite(credentials=credentials)
df = bq.read_bq(sql='SELECT * FROM `nyc-tlc.green.trips_2014`',
project='input_your_project_id')
from bqlite import BQLite
bq = BQLite(json_key_filepath='bigquery-key.json')
df = bq.read_bq(sql='SELECT * FROM `nyc-tlc.green.trips_2014`',
project='input_your_project_id')