mackelab/delfi

observation dimensions

graceave opened this issue · 1 comments

Hi,
I was wondering if you have considered amending the script to be able to do inference with multiple observations. Or if I am missing how to do this. Thank you for your help!

As a simple example, I'm simulating taking a sample from normal population with some mean mu and some standard deviation sigma.
The sample will have summary statistics mean xbar and standard deviation s.
The sample size is small (n=10), so we would benefit from the observed data being able to include multiple samples. When I try to make the observed data multiple samples, I get this error:


AssertionError Traceback (most recent call last)
in ()
10 n_mades=n_mades,
11 prior_norm=prior_norm,
---> 12 density=density)
13 # train
14 log, _, posterior = res.run(

~/delfi/delfi/inference/APT.py in init(self, generator, obs, prior_norm, pilot_samples, reg_lambda, seed, verbose, add_prior_precision, Ptol, **kwargs)
67 if self.obs.ndim == 1:
68 self.obs = self.obs.reshape(1, -1)
---> 69 assert self.obs.shape[0] == 1
70
71 if np.any(np.isnan(self.obs)):

AssertionError:

Here is the code I used to generate this:

import numpy as np

def SampleSimulator(parameters, N=1e6, seed=None):
    """ Sample simulator
    Simulates taking a sample of size 10 from population of size N with mean mu and standard dev sigma 
    Returns sample as 1d np.array of length 10

    Parameters
    ------------------- 
    mu : float
        pop mean  
    sigma : float 
        pop standard deviation   
    N : int
        population size 
    seed : int
    """
    if seed is not None:
        np.random.seed(seed=seed)
    else:
        np.random.seed()
    
    # generate population
    mu, sigma = parameters
    N = np.uint64(N)
    population = np.random.normal(mu, sigma, N)
    
    # take a random sample of size 10 from the population without replacement
    sam = np.random.choice(population, 10, replace=False)

    return sam

from delfi.simulator.BaseSimulator import BaseSimulator

class Sampler(BaseSimulator):
    def __init__(self, N, seed=None):
        """ Sample simulator
        Simulates taking a sample of size 10 from population of size N with mean mu and standard dev sigma 
        Returns sample as 1d np.array of length 10
    
        Parameters
        -------------------
        N : int
            population size  
        seed : int or None
            If set, randomness across runs is disabled
        """
        dim_param = 2

        super().__init__(dim_param=dim_param, seed=seed)
        self.N = N
        self.SampleSimulator = SampleSimulator

    def gen_single(self, param_set):
        """Forward model for simulator for single parameter set

        Parameters
        ----------
        params : list or np.array, 1d of length dim_param
            Parameter vector

        Returns
        -------
        dict : dictionary with data
            The dictionary must contain a key data that contains the results of
            the forward run. Additional entries can be present.
        """
        params = np.asarray(param_set)

        assert params.ndim == 1, 'params.ndim must be 1'

        sim_seed = self.gen_newseed()
        states = self.SampleSimulator(param_set, N, seed=sim_seed)
        
        return {'data': states,
                'N': self.N}

## priors ##
import delfi.distribution as dd

seed_p = 2
prior_min = np.array([0,0])
prior_max = np.array([40,8])
prior = dd.Uniform(lower=prior_min, upper=prior_max,seed=seed_p)

## summary stats ##
from delfi.summarystats.BaseSummaryStats import BaseSummaryStats
from scipy import stats as spstats

class SampleStats(BaseSummaryStats):
    """SummaryStats class for the sample from a normal distribution

    Calculates summary statistics
    """
    def __init__(self, n_summary=2, seed=None):
        """See SummaryStats.py for docstring"""
        super(SampleStats, self).__init__(seed=seed)
        self.n_summary = n_summary

    def calc(self, repetition_list):
        """Calculate summary statistics

        Parameters
        ----------
        repetition_list : list of dictionaries, one per repetition
            data list, returned by `gen` method of Simulator instance

        Returns
        -------
        np.array, 2d with n_reps x n_summary
        """
        stats = []
        for r in range(len(repetition_list)):
            samp = np.transpose(repetition_list[r]['data'])
            xbar = np.mean(samp)
            s = np.std(samp)
            ss = np.array([xbar, s])
            stats.append(ss) ##TEST THIS ALL OUT

        return np.asarray(stats)

## generator ##
import delfi.generator as dg

N = 1e6

# summary statistics hyperparameters
n_summary = 2

seed_m = 3
m = Sampler(N, seed=seed_m)
s = SampleStats(n_summary = n_summary)
g = dg.Default(model=m, prior=prior, summary=s)

## true parameters and respective labels ##
true_params = np.array([28, 2.9])       
labels_params = ['mu', 'sigma']

# observed data: simulation given true parameters
#### this is the multiple observations #####
obs = []
for i in range(0,25):
    obs.append(m.gen_single(true_params))

## summary stats for >1 observation ##
obs_stats = s.calc(obs)

## Hyperparameters ##
seed_inf = 1
pilot_samples = 2000
# training schedule
n_train = 2000
n_rounds = 1
# fitting setup
minibatch = 256
epochs = 100
val_frac = 0.05
# network setup
n_hiddens = [50,50]
# convenience
prior_norm = True
# MAF parameters
density = 'maf'
n_mades = 5

## Inference ##
import delfi.inference as infer

# inference object
res = infer.APT(g,
                  obs=obs_stats,
                  n_hiddens=n_hiddens,
                  seed=seed_inf,
                  pilot_samples=pilot_samples,
                  n_mades=n_mades,
                  prior_norm=prior_norm,
                  density=density)
# train
log, _, posterior = res.run(
                    n_train=n_train,
                    n_rounds=n_rounds,
                    minibatch=minibatch,
                    epochs=epochs,
                    silent_fail=False,
                    proposal='prior',
                    val_frac=val_frac,
                    verbose=True,)

@graceave, multiple observations are not supported by delfi. However, in the meantime we have shifted efforts to developing a new framework, sbi, which is based on PyTorch rather than theano (which ran out of support). Since its latest release it does support multiple IID observations for some inference methods, see: https://github.com/mackelab/sbi/releases/tag/v0.16.0