Different results on different platforms
IvanProg00 opened this issue · 1 comments
IvanProg00 commented
I've used base model https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin
.
I've tested on macOS m2 PRO Sonoma and Debian 12 Intel x86-64.
And I received different text and plog
using the same code.
My code:
use std::env;
use hound::WavReader;
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
fn main() {
let args: Vec<String> = env::args().collect();
// load a context and model
let ctx = WhisperContext::new_with_params("ggml-base.bin", WhisperContextParameters::default())
.expect("failed to load model");
let mut state = ctx.create_state().unwrap();
// let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
let mut params = FullParams::new(SamplingStrategy::BeamSearch { beam_size: 5, patience:1.0 });
params.set_translate(false);
params.set_detect_language(false);
params.set_language(Some("es"));
params.set_suppress_non_speech_tokens(true);
params.set_suppress_blank(true);
params.set_single_segment(true);
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let prompt = args.get(2).expect("args [file] [prompt] are required");
let prompt = format!("[system] {} [user] ", prompt);
let vector = ctx.tokenize(&prompt, prompt.len()).unwrap();
let tokens = vector.as_slice();
params.set_tokens(tokens);
let path = args.get(1).unwrap();
let mut reader = WavReader::open(path).unwrap();
let audio = whisper_rs::convert_integer_to_float_audio(
&reader
.samples::<i16>()
.collect::<Result<Vec<_>, hound::Error>>()
.unwrap(),
);
state.full(params, &audio[..]).unwrap();
println!("===== output =====");
let phrase = state.full_get_segment_text(0).unwrap();
println!("phrase: {}", phrase);
let eot = ctx.token_eot();
for i in 0..state.full_n_tokens(0).unwrap() {
let id = state.full_get_token_id(0, i).unwrap();
let data = state.full_get_token_data(0, i).unwrap();
let token_text = state.full_get_token_text(0, i).unwrap();
let is_special = id >= eot;
if !is_special {
println!("token: {}", token_text);
println!("plog: {}", data.plog);
}
}
}
tazz4843 commented
I think whisper.cpp has some decent level of hardware nondeterminism, as well as possibly different APIs being used which also influence the result. Gonna call this not a bug and sorta unfixable unfortunately.