whipser

use openai whisper

install

pip install git+https://github.com/openai/whisper.git 
choco install ffmpeg

model

Path: C:\users\Ping\.cache\whisper

Size	Parameters	English-only model	Multilingual model	Required VRAM	Relative speed
tiny	39 M	`tiny.en`	`tiny`	~1 GB	~32x
base	74 M	`base.en`	`base`	~1 GB	~16x
small	244 M	`small.en`	`small`	~2 GB	~6x
medium	769 M	`medium.en`	`medium`	~5 GB	~2x
large	1550 M	N/A	`large`	~10 GB	1x

run

whisper input1 input2 --model base
whisper input1 input2 --model medium
whisper input1 input2 --model large

import whisper

model = whisper.load_model("base")

# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio("audio.mp3")
audio = whisper.pad_or_trim(audio)

# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)

# detect the spoken language
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")

# decode the audio
options = whisper.DecodingOptions()
result = whisper.decode(model, mel, options)

# print the recognized text
print(result.text)

import whisper

model = whisper.load_model("base")
result = model.transcribe("audio.mp3")
print(result["text"])

ffmpeg -i meeting.wav -ss 00:00:00 -to 00:01:00 1min.wav
whipser --model base --language Chinese 1min.wav

wpwupingwp/whipser

whipser

install

model

run