facebookresearch/fairseq

generated an exception: Failed to decode audio While running asr_prep_json.py on custom dataset

3sakshij opened this issue · 0 comments

What is your question?

While running the asr_prep_json.py code in fairseq/examples/datasets I am getting exception of failed to decode the audio for few of .wav files. I don't understand why I am getting this error.I tried searching it in the existing issues and also on torchaudio but I still don't find kindly let me know how to fixthis.

Code

from future import absolute_import, division, print_function, unicode_literals

import argparse
import concurrent.futures
import json
import multiprocessing
import os
from collections import namedtuple
from itertools import chain

import sentencepiece as spm
from fairseq.data import Dictionary

MILLISECONDS_TO_SECONDS = 0.001

def process_sample(aud_path, lable, utt_id, sp, tgt_dict):
import torchaudio

input = {}
output = {}
si= t(aud_path)
print(si)
input["length_ms"] = int(si.num_frames / si.num_channels / si.sample_rate / MILLISECONDS_TO_SECONDS)
input["path"] = aud_path
#print(input['path'])


token = " ".join(sp.EncodeAsPieces(lable))
ids = tgt_dict.encode_line(token, append_eos=False)
output["text"] = lable
output["token"] = token
output["tokenid"] = ", ".join(map(str, [t.tolist() for t in ids]))
return {utt_id: {"input": input, "output": output}}

def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--audio-dirs",
nargs="+",
default=["-"],
required=True,
help="input directories with audio files",
)
parser.add_argument(
"--labels",
required=True,
help="aggregated input labels with format per line",
type=argparse.FileType("r", encoding="UTF-8"),
)
parser.add_argument(
"--spm-model",
required=True,
help="sentencepiece model to use for encoding",
type=argparse.FileType("r", encoding="UTF-8"),
)
parser.add_argument(
"--dictionary",
required=True,
help="file to load fairseq dictionary from",
type=argparse.FileType("r", encoding="UTF-8"),
)
parser.add_argument("--audio-format", choices=["flac", "wav"], default="wav")
parser.add_argument(
"--output",
required=True,
type=argparse.FileType("w"),
help="path to save json output",
)
args = parser.parse_args()

sp = spm.SentencePieceProcessor()
sp.Load(args.spm_model.name)

tgt_dict = Dictionary.load(args.dictionary)

labels = {}
for line in args.labels:
    #print(line.split(" ", 1))
    (utt_id, label) = line.split(" ", 1)
    labels[utt_id] = label
if len(labels) == 0:
    raise Exception("No labels found in ", args.labels_path)

Sample = namedtuple("Sample", "aud_path utt_id")
samples = []
for path, _, files in chain.from_iterable(
    os.walk(path) for path in args.audio_dirs
):
    for f in files:
        if f.endswith(args.audio_format):
            if len(os.path.splitext(f)) != 2:
                raise Exception("Expect <utt_id.extension> file name. Got: ", f)
            utt_id = os.path.splitext(f)[0]
            if utt_id not in labels:
                continue
            samples.append(Sample(os.path.join(path, f), utt_id))

utts = {}
num_cpu = multiprocessing.cpu_count()
with concurrent.futures.ThreadPoolExecutor(max_workers=num_cpu) as executor:
    future_to_sample = {
        executor.submit(
            process_sample, s.aud_path, labels[s.utt_id], s.utt_id, sp, tgt_dict
        ): s
        for s in samples
    }
    for future in concurrent.futures.as_completed(future_to_sample):
        url = future_to_sample[future]

        try:
            data = future.result()
        except Exception as exc:
            #print(data)
            print("generated an exception: ", exc,url)
        else:
            utts.update(data)
json.dump({"utts": utts}, args.output, indent=4)

if name == "main":
main()

What have you tried?

I tried printing the audios which is causing this issue of failing to decode. but I didn't get why I am getting the issue.