microsoft/MS-SNSD

Different audio Lengths cause error broadcasting

HamzaFouad opened this issue · 0 comments

if the passed arguments (clean&noise) were with different lengths, it would lead to the following error :
ValueError: operands could not be broadcast together with shapes

it could be solved by increasing the shorter length to be equal with the greater one like this

def snr_mixer(clean, noise, snr):
    clean_len = len(clean)
    noise_len = len(noise)
    if clean_len < noise_len:
        rep_time = int(np.floor(noise_len / audio_len))
        left_len = noise_len - clean_len * rep_time
        tmp = np.tile(clean, [1, rep_time])
        tmp.shape = (tmp.shape[1], )
        clean = np.hstack((tmp, clean[:left_len]))
        noise = np.array(noise)

    else:
        rep_time = int(np.floor(clean_len / noise_len))
        left_len = clean_len - noise_len * rep_time
        tmp = np.tile(noise, [1, rep_time])
        tmp.shape = (tmp.shape[1], )
        noise = np.hstack((tmp, noise[:left_len]))
        clean = np.array(clean)
    
    # Normalizing to -25 dB FS
    rmsclean = (clean**2).mean()**0.5
    scalarclean = 10 ** (-25 / 20) / rmsclean
    clean = clean * scalarclean
    rmsclean = (clean**2).mean()**0.5

    rmsnoise = (noise**2).mean()**0.5
    scalarnoise = 10 ** (-25 / 20) /rmsnoise
    noise = noise * scalarnoise
    rmsnoise = (noise**2).mean()**0.5
    
    # Set the noise level for a given SNR
    noisescalar = np.sqrt(rmsclean / (10**(snr/20)) / rmsnoise)
    noisenewlevel = noise * noisescalar
    noisyspeech = clean + noisenewlevel
    return clean, noisenewlevel, noisyspeech

clean, noisenewlevel, noisyspeech = snr_mixer(audio_org, noise_org, 2)