import { MicVAD, utils } from '@ricky0123/vad-web';
import * as ort from 'onnxruntime-web';
import { getMicrophoneSampleRate } from '@/utils/audio';
import { trackStopSegmentRecording } from '@/services/analytics';

ort.env.wasm.wasmPaths = {
  'ort-wasm-simd-threaded.wasm': '/ort-wasm-simd-threaded.wasm',
  'ort-wasm-simd.wasm': '/ort-wasm-simd.wasm',
  'ort-wasm.wasm': '/ort-wasm.wasm',
  'ort-wasm-threaded.wasm': '/ort-wasm-threaded.wasm',
};

const hostname = window.location.hostname;
const port = window.location.port;
const baseURL = `${window.location.protocol}//${hostname}${port ? `:${port}` : ''}`;

const silenceDuration = 0.2; // 200ms

const createVAD = async (
  stream: MediaStream,
  onSpeechEnd: (blob: Blob) => Promise<void>,
  setIsSpeaking: (arg0: boolean) => void,
  trackSettings: MediaTrackSettings,
) => {
  const { sampleRate, channelCount } = trackSettings;
  return await MicVAD.new({
    workletURL: `${baseURL}/vad.worklet.bundle.min.js`,
    modelURL: `${baseURL}/silero_vad.onnx`,
    submitUserSpeechOnPause: true,
    onSpeechStart: () => setIsSpeaking(true),
    onSpeechEnd: async (audio) => {
      setIsSpeaking(false);
      trackStopSegmentRecording();
      const silenceSamples = (16000 * silenceDuration) / channelCount;
      const audioWithSilence = new Float32Array(audio.length + silenceSamples);
      audioWithSilence.set(audio, silenceSamples);
      // remove last 500ms of audio
      audioWithSilence.fill(0, audio.length - silenceSamples);
      const wavBuffer = utils.encodeWAV(audioWithSilence);
      let blob = new Blob([wavBuffer], { type: 'audio/wav' });
      onSpeechEnd(blob);
    },
    stream,
    minSpeechFrames: 1,
    redemptionFrames: 12,
    positiveSpeechThreshold: 0.7,
    negativeSpeechThreshold: 0.55,
  });
};

export default createVAD;
