France statistics using Postgresql and CubeJs on Node
- Lastest data update: 2020-07-31
Quick Start
- Clone git repo
git clone https://github.com/ng-galien/insee.git && cd insee
- Unzip the dump
unzip min-dump.sql.zip
- Create the database from minimal dump
psql -U postgres -d postgres -f min-dump.sql
- Create your credential file
touch .env && echo -e "CUBEJS_DB_HOST=localhost\nCUBEJS_DB_NAME=insee\nCUBEJS_DB_USER=postgres\nCUBEJS_DB_PASS=postgres\nCUBEJS_DB_TYPE=postgres\nCUBEJS_API_SECRET=secret" >> .env
Modify .env file to with your credentials, CUBEJS_DB_USER and CUBEJS_DB_PASS
CUBEJS_DB_HOST=localhost
CUBEJS_DB_NAME=insee
CUBEJS_DB_USER=my_user
CUBEJS_DB_PASS=my_user_pwd
CUBEJS_DB_TYPE=postgres
CUBEJS_API_SECRET=secret
- Run Node Server
npm install && npm run dev
- Open browser and play with measures, dimensions and filters
Available statistics
Granularity / Drill
- Day
- Week
- Month
- Quarter
- Year
Values
- Total death/day
- Total by age groups
- Total by sex
- Normalized by region population
Dimensions / Filters
- State: French department
- Region: French region
Manual installation and update
Create the database.
create database insee;
Table definitions are in schema.sql
Database for insee data source
drop table if exists insee_update;
create table insee_update
(
adec integer,
mdec integer,
jdec integer,
depdec varchar(50),
comdec varchar(50),
anaiss integer,
mnaiss integer,
jnaiss integer,
sexe varchar(50),
depdom varchar(50),
lieudec2 varchar(50)
);
Database for France states / regions
drop table if exists insee;
drop table if exists state;
create table state
(
id bigserial not null
constraint departement_pkey1
primary key,
code varchar(3) not null
constraint departement_code_key
unique,
name varchar(30) not null
constraint departement_name_key
unique,
h_pop_cat_1 integer,
h_pop_cat_2 integer,
h_pop_cat_3 integer,
h_pop_cat_4 integer,
h_pop_cat_5 integer,
h_pop integer,
f_pop_cat_1 integer,
f_pop_cat_2 integer,
f_pop_cat_3 integer,
f_pop_cat_4 integer,
f_pop_cat_5 integer,
f_pop integer,
tot_pop_cat_1 integer,
tot_pop_cat_2 integer,
tot_pop_cat_3 integer,
tot_pop_cat_4 integer,
tot_pop_cat_5 integer,
tot_pop integer,
diab integer,
region varchar(100)
);
create index state_code_idx
on state (code);
create index state_name_idx
on state (name);
create index state_region_idx
on state (region);
State inserts are located in update-states.sql
INSERT INTO public.state (id, code, name, h_pop_cat_1, h_pop_cat_2, h_pop_cat_3, h_pop_cat_4, h_pop_cat_5, h_pop,
f_pop_cat_1, f_pop_cat_2, f_pop_cat_3, f_pop_cat_4, f_pop_cat_5, f_pop, tot_pop_cat_1,
tot_pop_cat_2, tot_pop_cat_3, tot_pop_cat_4, tot_pop_cat_5, tot_pop, diab, region)
VALUES (100, '01', 'Ain', 167666, 151084, 179469, 104453, 54283, 656955, 86596, 75213, 89272, 50205, 22059, 323345,
81070, 75871, 90197, 54248, 32224, 333610, 26135, 'Auvergne-Rhône-Alpes');
...
Database for stats
DROP TYPE IF EXISTS location_type;
CREATE TYPE location_type AS ENUM (
'MEDIC', 'EHPAD', 'DOM', 'PUBLIC', 'NA'
);
DROP TYPE IF EXISTS sex_type;
CREATE TYPE sex_type AS ENUM (
'MALE', 'FEMALE'
);
create table if not exists insee
(
id bigserial not null
constraint insee_pkey
primary key,
death_state bigint not null references state (id),
dom_state bigint references state (id),
death_date date,
birth_date date,
age interval,
age_year integer,
age_group_5 integer,
age_group_10 integer,
age_cat_1 integer,
age_cat_2 integer,
age_cat_3 integer,
age_cat_4 integer,
age_cat_5 integer,
sex sex_type,
location location_type
);
create index insee_death_date_idx
on insee (death_date);
create index insee_birth_date_idx
on insee (birth_date);
create index insee_age_idx
on insee (age);
create index insee_age_year_idx
on insee (age_year);
create index insee_sex_idx
on insee (sex);
create index insee_location_idx
on insee (location);
Source
CSV data
Download and extract data from INSEE website
https://www.insee.fr/fr/statistiques/4487988?sommaire=4487854
Import with psql
With psql use \copy command to import csv files.
$ psql -U postgres -h localhost -d insee
\copy insee_update FROM '../2020-07-31_detail/DC_2018_det.csv' DELIMITER ';' CSV HEADER;
\copy insee_update FROM '../2020-07-31_detail/DC_2019_det.csv' DELIMITER ';' CSV HEADER;
\copy insee_update FROM '../2020-07-31_detail/DC_2020_det.csv' DELIMITER ';' CSV HEADER;
Update the statistics table
SQL statment in update-insee_db.sql
-- correct dates
update insee_update
set mdec = 1
where mdec = 0;
update insee_update
set jdec = 1
where jdec = 0;
update insee_update
set mnaiss = 1
where mnaiss = 0;
update insee_update
set jnaiss = 1
where jnaiss = 0;
delete
from insee
where id > 0;
/*
Populate insee
*/
insert into insee (death_state,
dom_state,
death_date,
birth_date,
age,
age_year,
age_group_5,
age_group_10,
age_cat_1,
age_cat_2,
age_cat_3,
age_cat_4,
age_cat_5,
sex,
location)
SELECT (select state.id from state state where depdec = state.code limit 1),
(select state.id from state state where depdom = state.code limit 1),
death,
birth,
age,
year,
trunc(year::float / 5.0),
trunc(year::float / 10.0),
case when year between 0 and 19 then 1 else 0 end,
case when year between 20 and 39 then 1 else 0 end,
case when year between 40 and 59 then 1 else 0 end,
case when year between 60 and 75 then 1 else 0 end,
case when year > 74 then 1 else 0 end,
case sexe
when 'M' then 'MALE'::sex_type
when 'F' then 'FEMALE'::sex_type
else Null::sex_type end,
case lieudec2
when 'HopCli' then 'MEDIC'::location_type
when 'HosMar' then 'EHPAD'::location_type
when 'Logem' then 'DOM'::location_type
when 'Autres' then 'PUBLIC'::location_type
else 'NA'::location_type
end
FROM (select depdec,
depdom,
make_date(adec, mdec, jdec) as death,
make_date(anaiss, mnaiss, jnaiss) as birth,
age(make_date(adec, mdec, jdec),
make_date(anaiss, mnaiss, jnaiss)) as age,
date_part(
'year',
age(make_date(adec, mdec, jdec), make_date(anaiss, mnaiss, jnaiss))) as year,
lieudec2,
sexe
from insee_update) u;
Check Data
Control the record's count in source and populated tables
SELECT c.year,
c.source,
c.count
FROM (
SELECT 'UPDATE' as source,
count(*) as count,
date_part('year', death_date) as year
from insee
GROUP BY date_part('year', death_date)
UNION
SELECT 'SOURCE',
count(*),
adec
from insee_update
group by adec) c
order by year;
Compact database
This database is used by CubeJS.
DROP table if exists insee_stat;
CREATE table insee_stat as
SELECT state.code || '-' || state.name as state,
state.region as region,
source.death_date as date,
source.sum_tot,
source.sum_cat_1,
source.sum_cat_2,
source.sum_cat_3,
source.sum_cat_4,
source.sum_cat_5,
source.sum_male,
source.sum_female,
round(source.sum_tot::numeric / state.tot_pop::numeric * 100000, 2) as norm_tot,
round(source.sum_cat_1::numeric / state.tot_pop_cat_1::numeric * 100000, 2) as norm_cat_1,
round(source.sum_cat_2::numeric / state.tot_pop_cat_2::numeric * 100000, 2) as norm_cat_2,
round(source.sum_cat_3::numeric / state.tot_pop_cat_3::numeric * 100000, 2) as norm_cat_3,
round(source.sum_cat_4::numeric / state.tot_pop_cat_4::numeric * 100000, 2) as norm_cat_4,
round(source.sum_cat_5::numeric / state.tot_pop_cat_5::numeric * 100000, 2) as norm_cat_5,
round(source.sum_male::numeric / state.f_pop::numeric * 100000, 2) as norm_male,
round(source.sum_female::numeric / state.h_pop::numeric * 100000, 2) as norm_female
FROM (SELECT ins.death_date,
ins.death_state,
count(ins.id) as sum_tot,
sum(ins.age_cat_1) as sum_cat_1,
sum(ins.age_cat_2) as sum_cat_2,
sum(ins.age_cat_3) as sum_cat_3,
sum(ins.age_cat_4) as sum_cat_4,
sum(ins.age_cat_5) as sum_cat_5,
sum(case when ins.sex = 'MALE' then 1 else 0 end) as sum_male,
sum(case when ins.sex = 'FEMALE' then 1 else 0 end) as sum_female
FROM insee ins
group by ins.death_date, ins.death_state, ins.sex
order by ins.death_date, ins.death_state, ins.sex) source
join state state on source.death_state = state.id;
Run Cube JS
Create a credential file
Credentials are stored in a .env file in the root directory
$ touch .env
$ echo "CUBEJS_DB_HOST=localhost
CUBEJS_DB_NAME=insee
CUBEJS_DB_USER=postgres
CUBEJS_DB_PASS=postgres
CUBEJS_DB_TYPE=postgres
CUBEJS_API_SECRET=secret" >> .env
Instal Cube and start Node
$ npm install
$ npm run dev