observation dimensions
graceave opened this issue · 1 comments
Hi,
I was wondering if you have considered amending the script to be able to do inference with multiple observations. Or if I am missing how to do this. Thank you for your help!
As a simple example, I'm simulating taking a sample from normal population with some mean mu and some standard deviation sigma.
The sample will have summary statistics mean xbar and standard deviation s.
The sample size is small (n=10), so we would benefit from the observed data being able to include multiple samples. When I try to make the observed data multiple samples, I get this error:
AssertionError Traceback (most recent call last)
in ()
10 n_mades=n_mades,
11 prior_norm=prior_norm,
---> 12 density=density)
13 # train
14 log, _, posterior = res.run(
~/delfi/delfi/inference/APT.py in init(self, generator, obs, prior_norm, pilot_samples, reg_lambda, seed, verbose, add_prior_precision, Ptol, **kwargs)
67 if self.obs.ndim == 1:
68 self.obs = self.obs.reshape(1, -1)
---> 69 assert self.obs.shape[0] == 1
70
71 if np.any(np.isnan(self.obs)):
AssertionError:
Here is the code I used to generate this:
import numpy as np
def SampleSimulator(parameters, N=1e6, seed=None):
""" Sample simulator
Simulates taking a sample of size 10 from population of size N with mean mu and standard dev sigma
Returns sample as 1d np.array of length 10
Parameters
-------------------
mu : float
pop mean
sigma : float
pop standard deviation
N : int
population size
seed : int
"""
if seed is not None:
np.random.seed(seed=seed)
else:
np.random.seed()
# generate population
mu, sigma = parameters
N = np.uint64(N)
population = np.random.normal(mu, sigma, N)
# take a random sample of size 10 from the population without replacement
sam = np.random.choice(population, 10, replace=False)
return sam
from delfi.simulator.BaseSimulator import BaseSimulator
class Sampler(BaseSimulator):
def __init__(self, N, seed=None):
""" Sample simulator
Simulates taking a sample of size 10 from population of size N with mean mu and standard dev sigma
Returns sample as 1d np.array of length 10
Parameters
-------------------
N : int
population size
seed : int or None
If set, randomness across runs is disabled
"""
dim_param = 2
super().__init__(dim_param=dim_param, seed=seed)
self.N = N
self.SampleSimulator = SampleSimulator
def gen_single(self, param_set):
"""Forward model for simulator for single parameter set
Parameters
----------
params : list or np.array, 1d of length dim_param
Parameter vector
Returns
-------
dict : dictionary with data
The dictionary must contain a key data that contains the results of
the forward run. Additional entries can be present.
"""
params = np.asarray(param_set)
assert params.ndim == 1, 'params.ndim must be 1'
sim_seed = self.gen_newseed()
states = self.SampleSimulator(param_set, N, seed=sim_seed)
return {'data': states,
'N': self.N}
## priors ##
import delfi.distribution as dd
seed_p = 2
prior_min = np.array([0,0])
prior_max = np.array([40,8])
prior = dd.Uniform(lower=prior_min, upper=prior_max,seed=seed_p)
## summary stats ##
from delfi.summarystats.BaseSummaryStats import BaseSummaryStats
from scipy import stats as spstats
class SampleStats(BaseSummaryStats):
"""SummaryStats class for the sample from a normal distribution
Calculates summary statistics
"""
def __init__(self, n_summary=2, seed=None):
"""See SummaryStats.py for docstring"""
super(SampleStats, self).__init__(seed=seed)
self.n_summary = n_summary
def calc(self, repetition_list):
"""Calculate summary statistics
Parameters
----------
repetition_list : list of dictionaries, one per repetition
data list, returned by `gen` method of Simulator instance
Returns
-------
np.array, 2d with n_reps x n_summary
"""
stats = []
for r in range(len(repetition_list)):
samp = np.transpose(repetition_list[r]['data'])
xbar = np.mean(samp)
s = np.std(samp)
ss = np.array([xbar, s])
stats.append(ss) ##TEST THIS ALL OUT
return np.asarray(stats)
## generator ##
import delfi.generator as dg
N = 1e6
# summary statistics hyperparameters
n_summary = 2
seed_m = 3
m = Sampler(N, seed=seed_m)
s = SampleStats(n_summary = n_summary)
g = dg.Default(model=m, prior=prior, summary=s)
## true parameters and respective labels ##
true_params = np.array([28, 2.9])
labels_params = ['mu', 'sigma']
# observed data: simulation given true parameters
#### this is the multiple observations #####
obs = []
for i in range(0,25):
obs.append(m.gen_single(true_params))
## summary stats for >1 observation ##
obs_stats = s.calc(obs)
## Hyperparameters ##
seed_inf = 1
pilot_samples = 2000
# training schedule
n_train = 2000
n_rounds = 1
# fitting setup
minibatch = 256
epochs = 100
val_frac = 0.05
# network setup
n_hiddens = [50,50]
# convenience
prior_norm = True
# MAF parameters
density = 'maf'
n_mades = 5
## Inference ##
import delfi.inference as infer
# inference object
res = infer.APT(g,
obs=obs_stats,
n_hiddens=n_hiddens,
seed=seed_inf,
pilot_samples=pilot_samples,
n_mades=n_mades,
prior_norm=prior_norm,
density=density)
# train
log, _, posterior = res.run(
n_train=n_train,
n_rounds=n_rounds,
minibatch=minibatch,
epochs=epochs,
silent_fail=False,
proposal='prior',
val_frac=val_frac,
verbose=True,)
@graceave, multiple observations are not supported by delfi
. However, in the meantime we have shifted efforts to developing a new framework, sbi
, which is based on PyTorch rather than theano
(which ran out of support). Since its latest release it does support multiple IID observations for some inference methods, see: https://github.com/mackelab/sbi/releases/tag/v0.16.0