Selective is a white-box feature selection library that supports unsupervised and supervised selection methods for classification and regression tasks.
The library provides:
Simple to complex selection methods: Variance, Correlation, Statistical, Linear, Tree-based, or Custom
Interoperable with data frames as the input
Automated task detection. No need to know what feature selection method works with what machine learning task
Benchmarking with multiple selectors
Inspection of results and feature importance
# Import Selective and SelectionMethod
from sklearn.datasets import load_boston
from feature.utils import get_data_label
from feature.selector import Selective, SelectionMethod
# Data
data, label = get_data_label(load_boston())
# Feature selectors from simple to more complex
selector = Selective(SelectionMethod.Variance(threshold=0.0))
selector = Selective(SelectionMethod.Correlation(threshold=0.5, method="pearson"))
selector = Selective(SelectionMethod.Statistical(num_features=3, method="anova"))
selector = Selective(SelectionMethod.Linear(num_features=3, regularization="none"))
selector = Selective(SelectionMethod.TreeBased(num_features=3))
# Feature reduction
subset = selector.fit_transform(data, label)
print("Reduction:", list(subset.columns))
print("Scores:", list(selector.get_absolute_scores()))
# Imports
from sklearn.datasets import load_boston
from feature.utils import get_data_label
from xgboost import XGBClassifier, XGBRegressor
from feature.selector import SelectionMethod, benchmark, calculate_statistics
# Data
data, label = get_data_label(load_boston())
# Selectors
corr_threshold = 0.5
num_features = 3
tree_params = {"n_estimators": 50, "max_depth": 5, "random_state": 111, "n_jobs": 4}
selectors = {
# Correlation methods
"corr_pearson": SelectionMethod.Correlation(corr_threshold, method="pearson"),
"corr_kendall": SelectionMethod.Correlation(corr_threshold, method="kendall"),
"corr_spearman": SelectionMethod.Correlation(corr_threshold, method="spearman"),
# Statistical methods
"stat_anova": SelectionMethod.Statistical(num_features, method="anova"),
"stat_chi_square": SelectionMethod.Statistical(num_features, method="chi_square"),
"stat_mutual_info": SelectionMethod.Statistical(num_features, method="mutual_info"),
# Linear methods
"linear": SelectionMethod.Linear(num_features, regularization="none"),
"lasso": SelectionMethod.Linear(num_features, regularization="lasso", alpha=1000),
"ridge": SelectionMethod.Linear(num_features, regularization="ridge", alpha=1000),
# Non-linear tree-based methods
"random_forest": SelectionMethod.TreeBased(num_features),
"xgboost_classif": SelectionMethod.TreeBased(num_features, estimator=XGBClassifier(**tree_params)),
"xgboost_regress": SelectionMethod.TreeBased(num_features, estimator=XGBRegressor(**tree_params))
# Benchmark
score_df, selected_df, runtime_df = benchmark(selectors, data, label)
print(score_df, "\n\n", selected_df, "\n\n", runtime_df)
# Get benchmark statistics by feature
stats_df = calculate_statistics(score_df, selected_df)
import pandas as pd
from sklearn.datasets import load_boston
from feature.utils import get_data_label
from feature.selector import SelectionMethod, Selective, plot_importance
# Data
data, label = get_data_label(load_boston())
# Feature Selector
selector = Selective(SelectionMethod.Linear(num_features=10, regularization="none"))
subset = selector.fit_transform(data, label)
# Plot Feature Importance
df = pd.DataFrame(selector.get_absolute_scores(), index=data.columns)
The library requires Python 3.6+. See requirements.txt
for necessary packages.
After installing the requirements, you can install the library from the provided wheel package using the following commands:
pip install dist/selective-X.X.X-py3-none-any.whl
Note: Don't forget to replace X.X.X
with the current version number.
Alternatively, you can build a wheel package on your platform from scratch using the source code:
pip install setuptools wheel # if wheel is not installed
python bdist_wheel
pip install dist/selective-X.X.X-py3-none-any.whl
To confirm successful cloning and setup, run the tests. All tests should pass.
python -m unittest discover -v tests
To upgrade to the latest version of the library, run git pull origin master
in the repo folder,
and then run pip install --upgrade --no-cache-dir dist/selective-X.X.X-py3-none-any.whl
Please submit bug reports and feature requests as Issues. You can also submit any additional questions or feedback as issues.
Selective is licensed under the GNU GPL 3.0.