Fix for `deepfake_asvspoof_dataset.py`
Closed this issue · 0 comments
chandlerbing65nm commented
Code Changes:
From:
def read_protocol(self):
samples = {
"sample_name": [],
"label": [],
"path": [],
"attack_type": [],
}
real_samples = []
fake_samples = []
with open(Path(self.path) / self.protocol_file_name, "r") as file:
for line in file:
label = line.strip().split(" ")[5]
if label == "bonafide":
real_samples.append(line)
elif label == "spoof":
fake_samples.append(line)
fake_samples = self.split_samples(fake_samples)
for line in fake_samples:
samples = self.add_line_to_samples(samples, line)
real_samples = self.split_samples(real_samples)
for line in real_samples:
samples = self.add_line_to_samples(samples, line)
return pd.DataFrame(samples)
def add_line_to_samples(self, samples, line):
_, sample_name, _, _, _, label, _, _ = line.strip().split(" ")
samples["sample_name"].append(sample_name)
samples["label"].append(label)
samples["attack_type"].append(label)
sample_path = self.flac_paths[sample_name]
assert sample_path.exists()
samples["path"].append(sample_path)
return samples
To:
def read_protocol(self):
samples = {
"sample_name": [],
"label": [],
"path": [],
"attack_type": [],
}
real_samples = []
fake_samples = []
with open(Path(self.path) / self.protocol_file_name, "r") as file:
for line in file:
label = line.strip().split(" ")[5]
if label == "bonafide":
real_samples.append(line)
elif label == "spoof":
fake_samples.append(line)
fake_samples = self.split_samples(fake_samples)
for line in fake_samples:
new_samples = self.add_line_to_samples(samples, line)
if new_samples is not None:
samples = new_samples
real_samples = self.split_samples(real_samples)
for line in real_samples:
new_samples = self.add_line_to_samples(samples, line)
if new_samples is not None:
samples = new_samples
return pd.DataFrame(samples)
def add_line_to_samples(self, samples, line):
try:
split_line = line.strip().split(" ")
if len(split_line) < 8:
print(f"Error: Not enough elements to unpack from line '{line}'. Skipping.")
return
# Assuming that sample_name is always the second element and label is always the sixth element
sample_name = split_line[1]
label = split_line[5]
except Exception as e:
print(f"Error: {e}. Couldn't process line '{line}'. Skipping.")
return
samples["sample_name"].append(sample_name)
samples["label"].append(label)
samples["attack_type"].append(label)
sample_path = self.flac_paths.get(sample_name)
if sample_path is None or not sample_path.exists():
print(f"Error: Sample path for '{sample_name}' does not exist. Skipping.")
return
samples["path"].append(sample_path)
return samples
Dataset file structure should be like this:
.ASVspoof2021
└── DF
├── ASVspoof2021.DF.cm.eval.trl.txt
├── ASVspoof2021_DF_eval_part00
│ └── ASVspoof2021_DF_eval
│ └── flac
├── ASVspoof2021_DF_eval_part01
│ └── ASVspoof2021_DF_eval
│ └── flac
├── ASVspoof2021_DF_eval_part02
│ └── ASVspoof2021_DF_eval
│ └── flac
├── ASVspoof2021_DF_eval_part03
│ └── ASVspoof2021_DF_eval
│ └── flac
├── keys
│ ├── CM
│ │ ├── CQCC-GMM
│ │ │ └── score.txt
│ │ ├── LFCC-GMM
│ │ │ └── score.txt
│ │ ├── LFCC-LCNN
│ │ │ └── score.txt
│ │ ├── RawNet2
│ │ │ └── score.txt
│ │ └── trial_metadata.txt
│ └── README.txt
├── LICENSE.DF.txt
└── README.DF.txt