|
def _extend_data_with_sampled_characteristics(df, optim_paras, options): |
|
"""Sample initial observations from initial conditions. |
|
|
|
The function iterates over all state space dimensions and replaces NaNs with values |
|
sampled from initial conditions. In the case of an n-step-ahead simulation with |
|
sampling all state space dimensions are sampled. For the other two simulation |
|
methods, potential NaNs in the data are replaced with sampled characteristics. |
|
|
|
Characteristics are sampled regardless of the simulation type which keeps randomness |
|
across the types constant. |
|
|
|
Parameters |
|
---------- |
|
df : pandas.DataFrame |
|
A pandas DataFrame which contains only an index for n-step-ahead simulation with |
|
sampling. For the other simulation methods, it contains information on |
|
individuals which is allowed to have missing information in the first period. |
|
optim_paras : dict |
|
options : dict |
|
|
|
Returns |
|
------- |
|
df : pandas.DataFrame |
|
A pandas DataFrame with no missings at all. |
|
|
|
""" |
|
# Sample characteristics only for the first period. |
|
fp = df.query("period == 0").copy() |
|
index = fp.index |
|
|
|
for observable in optim_paras["observables"]: |
|
level_dict = optim_paras["observables"][observable] |
|
sampled_char = _sample_characteristic(fp, options, level_dict, use_keys=False) |
|
fp[observable] = fp[observable].fillna( |
|
pd.Series(data=sampled_char, index=index), downcast="infer" |
|
) |
|
|
|
for choice in optim_paras["choices_w_exp"]: |
|
level_dict = optim_paras["choices"][choice]["start"] |
|
sampled_char = _sample_characteristic(fp, options, level_dict, use_keys=True) |
|
fp[f"exp_{choice}"] = fp[f"exp_{choice}"].fillna( |
|
pd.Series(data=sampled_char, index=index), downcast="infer" |
|
) |
|
|
|
for lag in reversed(range(1, optim_paras["n_lagged_choices"] + 1)): |
|
level_dict = optim_paras[f"lagged_choice_{lag}"] |
|
sampled_char = _sample_characteristic(fp, options, level_dict, use_keys=False) |
|
fp[f"lagged_choice_{lag}"] = fp[f"lagged_choice_{lag}"].fillna( |
|
pd.Series(data=sampled_char, index=index), downcast="infer" |
|
) |
|
|
|
# Sample types and map them to individuals for all periods. |
|
if optim_paras["n_types"] >= 2: |
|
level_dict = optim_paras["type_prob"] |
|
types = _sample_characteristic(fp, options, level_dict, use_keys=False) |
|
fp["type"] = fp["type"].fillna( |
|
pd.Series(data=types, index=index), downcast="infer" |
|
) |
|
|
|
# Update data in the first period with sampled characteristics. |
|
df = df.combine_first(fp) |
|
|
|
# Types are invariant and we have to fill the DataFrame for one-step-ahead. |
|
if optim_paras["n_types"] >= 2: |
|
df["type"] = df["type"].fillna(method="ffill") |
|
|
|
return df |