Error when exporting SVC fitted using sparse matrix
AMR-KELEG opened this issue · 2 comments
AMR-KELEG commented
I have an svm/svc classifier trained using sparse matrix as follows:
from sklearn_porter import Porter
from sklearn import svm
# load data and train the classifier:
clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3, gamma=1/X_train_transformed.shape[1], kernel='linear',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
clf.fit(X_train_transformed, X_train['label'])
type(X_train_transformed)
----------------------------------------
scipy.sparse.csr.csr_matrix
The problem is that exporting fails with the errors shown bellow:
# export:
porter = Porter(clf, language='java')
output = porter.export(embed_data=False, details=False)
with open('SVC.java', 'w') as f:
f.writelines(output)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-34-e7d647ff66cd> in <module>()
1 # export:
2 porter = Porter(clf, language='java')
----> 3 output = porter.export(embed_data=False, details=False)
4 with open('SVC.java', 'w') as f:
5 f.writelines(output)
~/.conda/envs/ml/lib/python3.6/site-packages/sklearn_porter/Porter.py in export(self, class_name, method_name, num_format, details, **kwargs)
187
188 output = self.template.export(class_name=class_name,
--> 189 method_name=method_name, **kwargs)
190 if not details:
191 return output
~/.conda/envs/ml/lib/python3.6/site-packages/sklearn_porter/estimator/classifier/SVC/__init__.py in export(self, class_name, method_name, export_data, export_dir, export_filename, export_append_checksum, **kwargs)
131 self.params = params
132
--> 133 self.n_features = len(est.support_vectors_[0])
134 self.svs_rows = est.n_support_
135 self.n_svs_rows = len(est.n_support_)
~/.conda/envs/ml/lib/python3.6/site-packages/scipy/sparse/base.py in __len__(self)
264 # non-zeros is more important. For now, raise an exception!
265 def __len__(self):
--> 266 raise TypeError("sparse matrix length is ambiguous; use getnnz()"
267 " or shape[0]")
268
TypeError: sparse matrix length is ambiguous; use getnnz() or shape[0]
nok commented
Hello @AMR-KELEG, thanks, can you provide some data or an official dataset for testing?
AMR-KELEG commented
from sklearn.feature_extraction.text import CountVectorizer
from sklearn_porter import Porter
cv = CountVectorizer()
l = ['Pattern 1', 'Pattern 2', 'Pattern 3']
X = cv.fit_transform(l)
y = [1, 2, 3]
clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3, gamma=1/X.shape[1], kernel='linear',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
clf.fit(X, y)
porter = Porter(clf, language='java')
output = porter.export(embed_data=False, details=False)
with open('SVC.java', 'w') as f:
f.writelines(output)
The problem is that CountVectorizer returns a sparse matrix which doesn't support basic len function.
I will try to fix this problem and create a pull request.