stability-selection is a Python implementation of the stability selection algorithm1, following
the scikit-learn Estimator
API.
Before installing the module you will need numpy
, matplotlib
, and sklearn
.
To install the module, clone the repository
git clone https://github.com/thuijskens/stability-selection.git
and execute the following in the project directory:
python setup.py install
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state
from stability_selection import StabilitySelection
def _generate_dummy_classification_data(p=1000, n=1000, k=5, random_state=123321):
rng = check_random_state(random_state)
X = rng.normal(loc=0.0, scale=1.0, size=(n, p))
betas = np.zeros(p)
important_betas = np.sort(rng.choice(a=np.arange(p), size=k))
betas[important_betas] = rng.uniform(size=k)
probs = 1 / (1 + np.exp(-1 * np.matmul(X, betas)))
y = (probs > 0.5).astype(int)
return X, y, important_betas
n, p, k = 500, 1000, 5
X, y, important_betas = _generate_dummy_classification_data(n=n, k=k)
base_estimator = Pipeline([
('scaler', StandardScaler()),
('model', LogisticRegression(penalty='l1'))
])
selector = StabilitySelection(base_estimator=base_estimator, lambda_name='model__C',
lambda_grid=np.logspace(-5, -1, 50))
print(selector.get_support(indices=True))
See the documentation
Footnotes
-
Meinshausen, N. and Buhlmann, P., 2010. Stability selection. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 72(4), pp.417-473. ↩