benedekrozemberczki/ClusterGCN

ppi dataset

CFF-Dream opened this issue · 0 comments

`
import torch
import time
import torch.nn as nn
import torch.nn.functional as F
import os.path as osp
from torch_geometric.datasets import PPI
from ppi_cluster import ClusterData, ClusterLoader
from torch_geometric.nn import SAGEConv, ChebConv
from sklearn.metrics import f1_score

path = osp.join(osp.dirname(osp.realpath(file)), '..', 'data', 'PPI')
dataset = PPI(path)
train_dataset = PPI(path, split='train') #20graphs
val_dataset = PPI(path, split='val') #2graphs
test_dataset = PPI(path, split='test') #2graphs

print('Partioning the graph... (this may take a while)')
train_dataset_list = []
val_dataset_list = []
test_dataset_list = []
dataset_list = []
train_dataset_index = test_dataset_index = val_dataset_index = 0

for data in train_dataset:
cluster_data = ClusterData(data, 'train', train_dataset_index, num_parts=2, recursive=False,
save_dir=dataset.processed_dir)
loader = ClusterLoader(cluster_data, batch_size=20, shuffle=True,
num_workers=0)
train_dataset_list.append(loader)
dataset_list.append(loader)
train_dataset_index += 1

for data in test_dataset:
cluster_data = ClusterData(data, 'test', test_dataset_index, num_parts=2, recursive=False,
save_dir=dataset.processed_dir)
loader = ClusterLoader(cluster_data, batch_size=20, shuffle=True,
num_workers=0)
test_dataset_list.append(loader)
dataset_list.append(loader)
test_dataset_index += 1

for data in val_dataset:
cluster_data = ClusterData(data, 'val', val_dataset_index, num_parts=2, recursive=False,
save_dir=dataset.processed_dir)
loader = ClusterLoader(cluster_data, batch_size=20, shuffle=True,
num_workers=0)
val_dataset_list.append(loader)
dataset_list.append(loader)
val_dataset_index += 1

print('Done!')
`