ewan-xu/LibrosaCpp

wav_read_open 读取之后,通过 librosa::Feature::melspectrogram 得到的结果,和直接使用librosa.load 通过librosa.feature.melspectrogram 得到结果不一样

LoveChina-3000 opened this issue · 0 comments

wav_read_open 读取之后,通过 librosa::Feature::melspectrogram 得到的结果,和直接使用librosa.load 通过librosa.feature.melspectrogram 得到结果不一样

void* h_x = wav_read_open("E:\wav\aa.wav");
int format, channels, sr, bits_per_sample;
unsigned int data_length;
int res = wav_get_header(h_x, &format, &channels, &sr, &bits_per_sample, &data_length);

int samples = data_length * 8 / bits_per_sample /channels ;
std::vector<int16_t> tmp(samples);
res = wav_read_data(h_x, reinterpret_cast<unsigned char*>(tmp.data()), data_length / channels);

td::vector<std::vector> mels = librosa::Feature::melspectrogram(x, sr, n_fft, n_hop, "hann", true, "reflect", 2.f, n_mel, fmin, fmax);

double sums = 0;
float maxi = -INT_MAX, mini = INT_MAX;
for (auto &arr : mels) {
for (auto k : arr) {
sums += k;
maxi = max(maxi, k);
mini = min(mini, k);
}
}
cout << "sums: " << sums << "\n";
cout << "mini: " << mini << "\n";
cout << "maxi: " << maxi << "\n";

python:
wav, sr_ret = librosa.load(audio_path, sr=None)

features = librosa.feature.melspectrogram(y=wav, sr=sr, n_fft=2048, n_mels=80, hop_length=160, win_length=400,pad_mode='reflect',htk=False)
print(features)
print(features.shape)
print('sum: ', features.sum())
print('min: ', features.min())
print('max: ', features.max())