huggingface/transformers.js

Error: cannot release session. invalid session id

Opened this issue · 1 comments

Question

I'm trying to get ASR + segmentation to run on a mobile phone (Pixel 6A, 6GB ram). This time on Brave mobile ;-)

ASR alone works fine. But I have a question about also getting the speaker recognition to run (segmentation+verification).

In the example implementation a promiseAll is used to run both ASR and Segmentation in paralel. For my implementation I've tried to run them one after the other, hoping that this would mean less memory is needed. E.g:

  • Create ASR instance
    -- Get text and chunks from audio

  • Dispose of ASR instance

  • Create segmentation instance
    -- Get segments from audio

  • Dispose of segmentation instance

  • Create verification instance
    -- Run verification on chunks of audio from each segment

  • Dispose of verification instance

I don't know if it's related, but I noticed the error below:

Screenshot 2024-10-09 at 15 11 13

My questions are:

  • Is it a valid assumption that doing things consequtively will allow this cascade to run on devices with less memory? Or was there a good reason that a promiseAll was used?
  • What does the error mean?
  • Is running them consecutively part of why the error occurs?
  • Can I use quantized with the segmentation and verification models in order to save memory? Currently the ASR (tiny-whisper.en_timestamped) is 114MB, and then the segmentation and verification seem to be 512 MB together.

I haven't split up loading the segmentation and verification instances yet, as I thought I'd get your opinion first.

class SegmentationSingleton {
    
    static instance = null;
	
    static segmentation_model_id = 'onnx-community/pyannote-segmentation-3.0';
    static segmentation_instance = null;
    static segmentation_processor = null;
	static loaded_segmentation = false;
	
	static verification_model_id = 'Xenova/wavlm-base-plus-sv'; // Xenova/wavlm-base-plus-sv
    //static verification_model_id = 'onnx-community/wespeaker-voxceleb-resnet34-LM';
    static verification_instance = null;
    static verification_processor = null;
	
	static instance_exists(){
		return this.segmentation_instance != null;
	}
	
	static set_to_null(var_to_null=null){
		if(typeof var_to_null == 'string' && typeof this[var_to_null] != 'undefined'){
			this[var_to_null] = null;
			//console.log("SegmentationSingleton: set_to_null: ", var_to_null);
		}
	}


    //static async getInstance(progress_callback=null,model_name='onnx-community/whisper-base_timestamped',preferences={},load_segmentation=true) {
	static async getInstance(progress_callback=null,preferences={}) {
		//console.log("Whisper_worker: SegmentationSingleton: getInstance");
		
		if(self.is_mobile){
			console.log("mobile, so setting quantized to true for segmentation AI's");
			preferences['quantized'] = true;
			
		}
		
		this.loaded_segmentation = true

		console.log("segmentationSingleton: creating segmentation instances");
		
        this.segmentation_processor ??= AutoProcessor.from_pretrained(this.segmentation_model_id, {
			...preferences,
            progress_callback,
        });
		
        this.segmentation_instance ??= AutoModelForAudioFrameClassification.from_pretrained(this.segmentation_model_id, {
            // NOTE: WebGPU is not currently supported for this model
            // See https://github.com/microsoft/onnxruntime/issues/21386
            device: 'wasm',
            //dtype: 'fp32',
			dtype: 'q8',
			...preferences,
            progress_callback,
        });
	
		if(this.verification_model_id.endsWith('wespeaker-voxceleb-resnet34-LM')){
			self.similarity_threshold = 0.5;
			self.perfect_simillarity_threshold = 0.7;
		}
		else{
			self.similarity_threshold = 0.95;
			self.perfect_simillarity_threshold = 0.98;
		}
	
        this.verification_processor ??= AutoProcessor.from_pretrained(this.verification_model_id, {
            device: 'wasm',
            dtype: 'fp32',
			//device: 'webgpu',
			//dtype: 'q8',
			...preferences,
            progress_callback,
        });
	
        this.verification_instance ??= AutoModel.from_pretrained(this.verification_model_id, {
            device: 'wasm',
            dtype: 'fp32',
			//device: 'webgpu',
			//dtype: 'q8',
			...preferences,
            progress_callback,
        });

        return Promise.all([this.segmentation_processor, this.segmentation_instance, this.verification_processor, this.verification_instance]);
        
    }
}

From trying it (seeing a barage of errors), I'm concluding that the segmentation model is dependent on ASR being loaded.

Maybe I could still split the verification model and reduce memory a little that way?