shenweichen/DeepCTR

官网deepfm example运行报错AttributeError

SYSU-Linxp opened this issue · 1 comments

Describe the bug(问题描述)
如题,官网example代码https://deepctr-doc.readthedocs.io/en/latest/Examples.html:
import pandas as pd
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names

if name == "main":
data = pd.read_csv('./criteo_sample.txt')

sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]

data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']

# 1.do simple Transformation for dense features
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])

# 2.set hashing space for each sparse field,and record dense feature field name

fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=1000, embedding_dim=4, use_hash=True, dtype='string')
                          # since the input is string
                          for feat in sparse_features] + [DenseFeat(feat, 1, )
                                                          for feat in dense_features]

linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns, )

# 3.generate input data for model

train, test = train_test_split(data, test_size=0.2, random_state=2020)

train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}

# 4.Define Model,train,predict and evaluate
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
model.compile("adam", "binary_crossentropy",
              metrics=['binary_crossentropy'], )

history = model.fit(train_model_input, train[target].values,
                    batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))

To Reproduce(复现步骤)
直接运行

Operating environment(运行环境):

  • python version [e.g. 3.8]
  • tensorflow version [e.g. 2.5.0]
  • deepctr version [e.g. 0.9.1,]

Additional context
AttributeError Traceback (most recent call last)
in
40
41 # 4.Define Model,train,predict and evaluate
---> 42 model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
43 model.compile("adam", "binary_crossentropy",
44 metrics=['binary_crossentropy'], )

~/anaconda3/lib/python3.8/site-packages/deepctr/models/deepfm.py in DeepFM(linear_feature_columns, dnn_feature_columns, fm_group, dnn_hidden_units, l2_reg_linear, l2_reg_embedding, l2_reg_dnn, seed, dnn_dropout, dnn_activation, dnn_use_bn, task)
44
45 inputs_list = list(features.values())
---> 46
47 linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
48 l2_reg=l2_reg_linear)

~/anaconda3/lib/python3.8/site-packages/deepctr/feature_column.py in get_linear_logit(features, feature_columns, units, use_bias, seed, prefix, l2_reg)
147 raise TypeError("Invalid feature column type,got", type(fc))
148
--> 149 return input_features
150
151

~/anaconda3/lib/python3.8/site-packages/deepctr/feature_column.py in (.0)
147 raise TypeError("Invalid feature column type,got", type(fc))
148
--> 149 return input_features
150
151

~/anaconda3/lib/python3.8/site-packages/deepctr/feature_column.py in input_from_feature_columns(features, feature_columns, l2_reg, seed, prefix, seq_mask_zero, support_dense, support_group)
181 sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
182 [sparse_input, sparse_feat_refine_weight])
--> 183 linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input)
184 elif len(dense_input_list) > 0:
185 dense_input = concat_func(dense_input_list)

~/anaconda3/lib/python3.8/site-packages/deepctr/inputs.py in embedding_lookup(sparse_embedding_dict, sparse_input_dict, sparse_feature_columns, return_feat_list, mask_feat_list, to_list)
81 if (len(return_feat_list) == 0 or feature_name in return_feat_list):
82 if fc.use_hash:
---> 83 lookup_idx = Hash(fc.vocabulary_size, mask_zero=(feature_name in mask_feat_list), vocabulary_path=fc.vocabulary_path)(
84 sparse_input_dict[feature_name])
85 else:

AttributeError: 'SparseFeat' object has no attribute 'vocabulary_path'

好像是重装deepctr版本没有加载导致