The reproduction results of the DLPFC dataset are inconsistent with those recorded in the paper
Opened this issue · 0 comments
My code:
samples = ["151507", "151508", "151509",
"151510",` "151669", "151670",
"151671", "151672", "151673",
"151674", "151675", "151676"]
n_clusters = [7] * 4 + [5] * 4 + [7] * 4
ARIs = []
refinement = True
for i in range(12):
dataset = samples[i]
file_fold = "/media/s5/gj/nodecode/data/DLPFC/{}".format(dataset)
adata = sc.read_visium(file_fold, count_file='{}_filtered_feature_bc_matrix.h5'.format(dataset), load_images=True)
adata.var_names_make_unique()
# define model
model = GraphST.GraphST(adata, device=device)
# train model
adata = model.train()
# clustering
# For DLPFC dataset, we use optional refinement step.
clustering(adata, n_clusters[i], radius=50, method='mclust', refinement=refinement)
# add ground_truth
df_meta = pd.read_csv(file_fold + '/gt/{}_truth.txt'.format(dataset), sep='\t', header=None, index_col=0)
adata.obs['ground_truth'] = df_meta.values
# filter out NA nodes
adata = adata[~pd.isnull(adata.obs['ground_truth'])]
# calculate metric ARI
ARI = metrics.adjusted_rand_score(adata.obs['domain'], adata.obs['ground_truth'])
ARIs.append(round(ARI, 2))
print(ARIs)
print("mean:", round(np.mean(ARIs), 2))
My reproduction results:
[0.43, 0.5, 0.53, 0.47, 0.59, 0.43, 0.61, 0.64, 0.64, 0.62, 0.6, 0.43]
mean: 0.54