may you check results for introspect_sklearn.py
Opened this issue · 0 comments
Sandy4321 commented
after slight change results are
is it working as expected?
GradientBoostingRegressor 0.8295(+-0.0387) 0.07m
ExtraTreesRegressor 0.8220(+-0.0485) 0.07m
BaggingRegressor 0.7951(+-0.0435) 0.08m
RandomForestRegressor 0.7938(+-0.0446) 0.08m
AdaBoostRegressor 0.7448(+-0.0311) 0.07m
ExtraTreeRegressor 0.7230(+-0.0636) 0.06m
LinearRegression 0.6812(+-0.0225) 0.06m
RidgeCV 0.6806(+-0.0228) 0.07m
LassoLarsCV 0.6795(+-0.0218) 0.06m
Lars 0.6765(+-0.0181) 0.07m
Ridge 0.6759(+-0.0235) 0.07m
LarsCV 0.6701(+-0.0199) 0.06m
LassoLarsIC 0.6678(+-0.0297) 0.06m
BayesianRidge 0.6652(+-0.0215) 0.07m
ARDRegression 0.6623(+-0.0239) 0.10m
DecisionTreeRegressor 0.6592(+-0.0570) 0.06m
LassoCV 0.6310(+-0.0220) 0.06m
TheilSenRegressor 0.6257(+-0.0236) 0.09m
OrthogonalMatchingPursuitCV 0.6247(+-0.0297) 0.06m
ElasticNet 0.6225(+-0.0200) 0.06m
Lasso 0.6146(+-0.0208) 0.06m
LinearSVR 0.6071(+-0.0385) 0.07m
ElasticNetCV 0.6036(+-0.0230) 0.06m
HuberRegressor 0.5641(+-0.0184) 0.07m
GaussianProcess 0.5381(+-0.0321) 0.07m
OrthogonalMatchingPursuit 0.5126(+-0.0178) 0.06m
KNeighborsRegressor 0.4587(+-0.0687) 0.07m
RANSACRegressor 0.4418(+-0.0801) 0.07m
LassoLars -0.0140(+-0.0056) 0.06m
SVR -0.0204(+-0.0183) 0.06m
NuSVR -0.0331(+-0.0231) 0.06m
MLPRegressor -0.4991(+-0.1958) 0.07m
PassiveAggressiveRegressor -1.5901(+-0.8248) 0.06m
GaussianProcessRegressor -5.5622(+-0.2112) 0.07m
GaussianMixture -36.3100(+-0.1037) 0.08m
BayesianGaussianMixture -36.4831(+-0.1084) 0.06m
SGDRegressor-740255963447501177432834048.0000(+-721608378456158132773060608.0000) 0.07m
[{'name': 'GradientBoostingRegressor', 'score': 0.8294675950620558, 'sem': 0.038697277160232024, 'took': 0.06543353001276651}, {'name': 'ExtraTreesRegressor', 'score': 0.8219932768079022, 'sem': 0.04854419090176812, 'took': 0.07448914845784506}, {'name': 'BaggingRegressor', 'score': 0.7951410144472711, 'sem': 0.04345846631050684, 'took': 0.07726001342137655}, {'name': 'RandomForestRegressor', 'score': 0.7938016078449767, 'sem': 0.04461968326284518, 'took': 0.07662835915883383}, {'name': 'AdaBoostRegressor', 'score': 0.7447621032864981, 'sem': 0.031141049023248508, 'took': 0.07199073632558187}, {'name': 'ExtraTreeRegressor', 'score': 0.7230142892238222, 'sem': 0.06363436756236208, 'took': 0.05977343320846558}, {'name': 'LinearRegression', 'score': 0.6811986578778075, 'sem': 0.022541292446391012, 'took': 0.06334726413091024}, {'name': 'RidgeCV', 'score': 0.68062737759137, 'sem': 0.02279257522997783, 'took': 0.0659236749013265}, {'name': 'LassoLarsCV', 'score': 0.6795132272324104, 'sem': 0.021822428857612756, 'took': 0.06067108710606893}, {'name': 'Lars', 'score': 0.6765114810873696, 'sem': 0.018053891175682096, 'took': 0.06525879303614299}, {'name': 'Ridge', 'score': 0.6759086828183462, 'sem': 0.023513062050043766, 'took': 0.07132590214411418}, {'name': 'LarsCV', 'score': 0.6700565280793053, 'sem': 0.019870880775850946, 'took': 0.06492633024851481}, {'name': 'LassoLarsIC', 'score': 0.6678156115866583, 'sem': 0.02974701929494405, 'took': 0.06329733530680338}, {'name': 'BayesianRidge', 'score': 0.6651651366473242, 'sem': 0.021496925505518193, 'took': 0.06761914094289144}, {'name': 'ARDRegression', 'score': 0.6623191491579764, 'sem': 0.023881799569544726, 'took': 0.09591010411580404}, {'name': 'DecisionTreeRegressor', 'score': 0.6591940650045099, 'sem': 0.057038232161180744, 'took': 0.06289846499760945}, {'name': 'LassoCV', 'score': 0.6310094141412307, 'sem': 0.02202262657221789, 'took': 0.06336384216944377}, {'name': 'TheilSenRegressor', 'score': 0.6257405152620453, 'sem': 0.02361953451321954, 'took': 0.09195405642191569}, {'name': 'OrthogonalMatchingPursuitCV', 'score': 0.6246585582406875, 'sem': 0.029688398100226585, 'took': 0.060205602645874025}, {'name': 'ElasticNet', 'score': 0.6224674699789806, 'sem': 0.019954922816295302, 'took': 0.0626823623975118}, {'name': 'Lasso', 'score': 0.6146425212051473, 'sem': 0.02082079523461588, 'took': 0.06097027063369751}, {'name': 'LinearSVR', 'score': 0.6071304632076193, 'sem': 0.03851599977382228, 'took': 0.07217363119125367}, {'name': 'ElasticNetCV', 'score': 0.6035630964040316, 'sem': 0.023006730065823905, 'took': 0.06472687323888143}, {'name': 'HuberRegressor', 'score': 0.5641180697854233, 'sem': 0.018390157383013184, 'took': 0.06544162034988403}, {'name': 'GaussianProcess', 'score': 0.5381169234593121, 'sem': 0.03210310436991657, 'took': 0.07119294007619222}, {'name': 'OrthogonalMatchingPursuit', 'score': 0.5126458830625192, 'sem': 0.017778179873312067, 'took': 0.0615354061126709}, {'name': 'KNeighborsRegressor', 'score': 0.4587164178672096, 'sem': 0.06868620796189272, 'took': 0.06512523492177327}, {'name': 'RANSACRegressor', 'score': 0.441831451671089, 'sem': 0.08014354503424943, 'took': 0.07194092671076456}, {'name': 'LassoLars', 'score': -0.014021478460640205, 'sem': 0.0056452484821212226, 'took': 0.05821096499760946}, {'name': 'SVR', 'score': -0.020417639894389383, 'sem': 0.018253507675483005, 'took': 0.05942434867223104}, {'name': 'NuSVR', 'score': -0.03306591529425958, 'sem': 0.023059489071394088, 'took': 0.0626325249671936}, {'name': 'MLPRegressor', 'score': -0.49910976628295933, 'sem': 0.195822539180817, 'took': 0.06647159655888875}, {'name': 'PassiveAggressiveRegressor', 'score': -1.590134321109338, 'sem': 0.8248219779802669, 'took': 0.059042084217071536}, {'name': 'GaussianProcessRegressor', 'score': -5.562202602883623, 'sem': 0.21117314890579691, 'took': 0.07112640937169393}, {'name': 'GaussianMixture', 'score': -36.30995722491594, 'sem': 0.10368584439449659, 'took': 0.07692754666010539}, {'name': 'BayesianGaussianMixture', 'score': -36.4831297985314, 'sem': 0.10839611693811571, 'took': 0.06073753039042155}, {'name': 'SGDRegressor', 'score': -7.402559634475012e+26, 'sem': 7.216083784561581e+26, 'took': 0.06557461420694986}]
changed code is
from __future__ import print_function, absolute_import
import inspect, warnings, sklearn, psutil, numpy, re, time
import numpy as np
from misc import * # July29
from OverridePredictFunctionClassifier import * #July29
from sklearn import cluster, covariance, \
decomposition, ensemble, feature_extraction, feature_selection, \
gaussian_process, isotonic, kernel_approximation, learning_curve, \
linear_model, manifold, mixture, multiclass, naive_bayes, \
neighbors, neural_network, cross_decomposition, preprocessing, \
random_projection, semi_supervised, svm, tree, datasets # July29 remmoved lda, qda,
def get_python_processes():
def is_python_process(p):
try: return 'python' in p.name
except: return false
#return len([p for p in psutil.get_process_list() if is_python_process]) # July29 process_iter()
return len([p for p in psutil.process_iter() if is_python_process])
def get_classifiers(module=None, done=[]):
if module is None: module = sklearn
ignores = ['MemmapingPool', 'PicklingPool', 'externals',
'datasets', 'EllipticEnvelope', 'OneClassSVM']
classifiers = []
X, y = sklearn.datasets.make_regression(20, 5)
for name, cls in inspect.getmembers(module):
if name in ignores: continue
if inspect.ismodule(cls):
if cls.__name__.startswith('_') or \
cls.__name__.endswith('_') or \
not cls.__name__.startswith('sklearn') or\
cls.__name__ in done or \
any([t in ignores for t in cls.__name__.split('.')]): continue
done.append(cls.__name__)
classifiers += get_classifiers(cls, done)
if inspect.isclass(cls):
if '_' in name or name[0].islower(): continue
if cls.__module__.startswith('_') or \
cls.__module__.endswith('_') or \
not cls.__module__.startswith('sklearn'): continue
pre_processes_length = get_python_processes()
full_name = cls.__module__ + '.' + cls.__name__
if full_name in done: continue
done.append(full_name)
try: cls().fit(X, y).predict(X)
except: cls = None
post_processes_length = get_python_processes()
diff = post_processes_length - pre_processes_length
#July29 if diff > 1: raise Exception('After[%s] Processes increased by: %s' % (full_name, diff))
if cls: classifiers.append(cls)
return classifiers
all_scores = []
cached_classifiers = None
def try_all_classifiers(X, y, classifiers=None, scoring=None,
ignore=[], classification=None, use_proba=False, classifier_transform=None):
global all_scores, cached_classifiers
all_scores = []
if classifiers is None:
print('calling get_classifiers')
if cached_classifiers is None:
classifiers = get_classifiers(sklearn)
cached_classifiers = classifiers
else:
classifiers = cached_classifiers
print('got ' + repr(len(classifiers)) + ' classifiers')
for classifier in classifiers:
if classifier.__name__ in ignore: continue
try:
print('testing classifier:', classifier.__name__)
t0 = time.time()
clf = classifier()
if classification == True and not isinstance(clf, sklearn.base.ClassifierMixin):
print('is classification and classifier is not a ClassifierMixin')
continue
if classification == False and not isinstance(clf, sklearn.base.RegressorMixin):
print('is NOT classification and classifier is not a RegressorMixin')
continue
if hasattr(clf, 'n_estimators'): clf.n_estimators = 200
if use_proba and not hasattr(clf, 'predict_proba'):
func = 'decision_function' if hasattr(clf, 'decision_function') else 'predict'
clf = OverridePredictFunctionClassifier(clf, func)
if classifier_transform is not None: clf = classifier_transform(clf)
score, sem = do_cv(clf, X.copy(), y, len(y), n_iter=3, scoring=scoring, quiet=True)
took = (time.time() - t0) / 60.
all_scores.append({'name':classifier.__name__, 'score': score, 'sem': sem, 'took': took})
print('classifier:', classifier.__name__, 'score:', score, 'sem:', sem, 'took: %.1fm' % took)
except Exception as e: #July29
print('classifier:', classifier.__name__, 'error - not included in results - ' + str(e))
all_scores = sorted(all_scores, key=lambda t: t['score'], reverse=True)
my_print_results = map(lambda d: '{:>35}{:10.4f}(+-{:5.4f}){:10.2f}m'.format(d['name'], d['score'], d['sem'], d['took']), all_scores) # July29
# July29 print('\t\tsuccessfull classifiers\n', '\n').join( map(lambda d: '{:>35}{:10.4f}(+-{:5.4f}){:10.2f}m'.format(d['name'], d['score'], d['sem'], d['took']), all_scores))
print('\t\tsuccessfull classifiers\n', '\n')
[print(x) for x in my_print_results]
print (all_scores)
def parse_classifier_meta(classifier):
doc = classifier.__doc__
lines = filter(None, [s.strip() for s in re.sub('-+', '\n', doc).split('\n')])
args = []
started = False
curr_arg = None
for l in lines:
if not started and l == 'Parameters': started = True
elif started and l == 'See Also': break
elif started:
if ':' in l:
name_type = map(lambda s: s.strip(), l.split(':'))
curr_arg = { 'name': name_type[0], 'type': name_type[1], 'description': '' }
args.append(curr_arg)
elif l:
if not curr_arg: print('invalid line [%s] doc: %s' % (l, doc))
curr_arg['description'] += l
return {'classifier': classifier, 'args': args }
def parse_float_type(t):
q = '.* ([0-9.]+) \< .* \< ([0-9.]+)'
r = re.search(q, t)
if r: return np.linspace(float(r.group(1)), float(r.group(2)), 100)
return np.linspace(-100, 100, 100)
def parse_range_type(t):
matches = re.findall('([A-z0-9\.]+)', t, re.DOTALL)
matches = [None if m == 'None' else m for m in matches]
matches = [True if m == 'True' else m for m in matches]
matches = [False if m == 'False' else m for m in matches]
l = list(set(matches))
#l.sort() July29
return l
def parse_string_type(t, d):
d = d.replace('\n', ' ')
matches = re.findall('[\'"]([A-z0-9]+)[\'"]', t + ' ' + d, re.DOTALL)
l = list(set(matches))
#l.sort() July29
return l
def get_val_for_type(name, t, desc):
ignores_names = ['base_estimator', 'class_weight']
if name in ignores_names or not(t) or 'array' in t: return None
if t.startswith('class') or t.startswith('ref') \
or t.startswith('meth') or t.startswith('callable'): return None
if name.startswith('_') or name.endswith('_'): return None
if (t.startswith('bool') or t.startswith('Bool')): return [True, False]
if (t.startswith('float')): return parse_float_type(t)
if (t.startswith('int')): return range(0, 2000, 10)
if (t.startswith('str')): return parse_string_type(t, desc)
if (t.startswith('{')): return parse_range_type(t)
if (t.startswith('double')): return np.linspace(-100, 100, 100)
return None
def try_classifier_with_arg_customisation(meta):
clf = meta['classifier']
gs_args = {}
for a in meta['args']:
vals = get_val_for_type(a['name'], a['type'], a['description'])
if vals != None: gs_args[a['name']] = vals
if (1==2 and len(gs_args) > 0):
boston_data = datasets.load_boston()
X = boston_data['data']
y = boston_data['target']
do_gs(clf(), X, y, gs_args)
if __name__ == '__main__':
boston_data = datasets.load_boston()
X = boston_data['data']
y = boston_data['target']
try_all_classifiers(X, y)
# metas = [parse_classifier_meta(clf) for clf in classifiers]
# ignore = [test_classifier_with_arg_customisation(m) for m in metas]