🐋 embulk-docker
Setup
local$ cp .env_sample .env
local$ docker-compose up -d
local$ docker-compose exec embulk bash
root@:~# cd embulk_bundle/ && embulk bundle
2021-08-18 23:42:27.228 +0000: Embulk v0.9.23
Don't run Bundler as root. Bundler can ask for sudo if it is needed, and
installing your bundle as root will break this application for all non-root
users on this machine.
Fetching gem metadata from https://rubygems.org/......
Fetching public_suffix 4.0.6
Installing public_suffix 4.0.6
...
...
...
Bundle complete! 4 Gemfile dependencies, 28 gems now installed.
Bundled gems are installed into `./vendor/bundle`
Quickstart
local$ docker-compose exec embulk bash
root@:~# embulk example ./try1
root@:~# embulk guess ./try1/seed.yml -o config.yml
root@:~# embulk preview config.yml
root@:~# embulk run config.yml
CSV to MySQL
local$ docker-compose exec embulk bash
root@:~# embulk run -b ./embulk_bundle ./examples/csv_to_mysql/config.yml
local$ docker-compose exec mysql bash
root@:~# mysql -u root -p
mysql> use embulk;
mysql> select * from csv_to_mysql \G;
*************************** 1. row ***************************
id: 1
account: 32864
time: 2015-01-27 19:23:49
purchase: 2015-01-27 00:00:00
comment: embulk
*************************** 2. row ***************************
id: 2
account: 14824
time: 2015-01-27 19:01:23
purchase: 2015-01-27 00:00:00
comment: embulk jruby
*************************** 3. row ***************************
id: 3
account: 27559
time: 2015-01-28 02:20:02
purchase: 2015-01-28 00:00:00
comment: Embulk "csv" parser plugin
*************************** 4. row ***************************
id: 4
account: 11270
time: 2015-01-29 11:54:36
purchase: 2015-01-29 00:00:00
comment: NULL
4 rows in set (0.00 sec)
Jsonl to MySQL
local$ docker-compose exec embulk bash
root@:~# embulk run -b ./embulk_bundle ./examples/jsonl_to_mysql/config.yml
local$ docker-compose exec mysql bash
root@:~# mysql -u root -p
mysql> use embulk;
mysql> select * from jsonl_to_mysql \G;
*************************** 1. row ***************************
id: 1
str: a
num: 1
bool: 1
time_at: 2017-01-01 00:00:00
*************************** 2. row ***************************
id: 2
str: b
num: 2
bool: 1
time_at: 2018-02-01 00:00:00
*************************** 3. row ***************************
id: 3
str: c
num: 3
bool: 0
time_at: 2019-03-01 00:00:00
*************************** 4. row ***************************
id: 4
str: d
num: 4
bool: 0
time_at: 2020-04-01 00:00:00
*************************** 5. row ***************************
id: 5
str: e
num: 5
bool: 1
time_at: 2021-05-01 00:00:00
5 rows in set (0.00 sec)
Jsonl to BigQuery
-
Create a GCP project in advance.
-
Set the GCP information in
.env
JSONL_TO_BIGQUERY_GCP_JSON_KEY_FILE_PATH
- Your service account key
JSONL_TO_BIGQUERY_GCP_PROJECT_ID
- Your project id
Create dataset
local$ docker-compose exec gcloud bash
# login Google Cloud SDK
root@:~# gcloud auth login
# set use project
root@:~# gcloud config set project your_project_id # <- Specify the project ID you created.
# create dataset
root@:~# root@8a5a0c8aa90d:~# bq mk embulk
Import BigQuery
local$ docker-compose exec embulk bash
root@:~# embulk run -b ./embulk_bundle ./examples/jsonl_to_bigquery/config.yml.liquid
local$ docker-compose exec gcloud bash
root@8a5a0c8aa90d:~# bq query "select * from embulk.jsonl_to_bigquery"
Waiting on bqjob_xxxxxxxxxxxxxxx ... (0s) Current status: DONE
+-----+------+
| str | num |
+-----+------+
| a | 1000 |
| b | 2000 |
| c | 3000 |
| d | 4000 |
| e | 5000 |
+-----+------+