import * as speechsdk from 'microsoft-cognitiveservices-speech-sdk'
import { toast } from 'react-toastify'

import { DEFAULT_LANGUAGE, Language } from '../constants'
import {
    TranscribedContent,
    UpdateMediaDeviceState,
    UpdateRecordingState,
} from '../services/event-bus/events'
import { servicesService } from '../services/http/services.service'
import { transcriptionsService } from '../services/http/transcriptions.service'
import { Transcript } from '../services/models/Visit.model'
import { audio } from './audio'
import { screenLock } from './screenLock'
import { timer } from './timer'

const isDev =
    window.location.hostname === 'localhost' ||
    window.location.hostname === 'dev.fluent.health'

const MAX_RETRIES = 3

export class AzureSTT {
    visitId: string | null = null
    deviceId?: string
    tokenObj: {
        token: string
        region: string
    } | null = null
    recognizer:
        | speechsdk.TranslationRecognizer
        | speechsdk.SpeechRecognizer
        | null = null
    languageSource?: Language
    languageTarget?: Language

    private retryCount: number = 0

    constructor(
        visitId: string,
        deviceId?: string,
        languageSource?: Language,
        languageTarget?: Language
    ) {
        this.visitId = visitId
        this.deviceId = deviceId
        this.languageSource = languageSource
        this.languageTarget = languageTarget ?? DEFAULT_LANGUAGE
    }

    async start() {
        await audio.getStream(this.deviceId)

        if (!this.tokenObj) {
            UpdateMediaDeviceState.emit({ isAvailable: false })
            this.tokenObj = await servicesService.getAzureSSTToken()
        }

        const audioConfig = this.deviceId
            ? speechsdk.AudioConfig.fromMicrophoneInput(this.deviceId)
            : speechsdk.AudioConfig.fromDefaultMicrophoneInput()

        // Multiple language translation while transcribing
        if (this.languageSource) {
            const translationConfig =
                speechsdk.SpeechTranslationConfig.fromAuthorizationToken(
                    this.tokenObj.token,
                    this.tokenObj.region
                )
            translationConfig.speechRecognitionLanguage =
                this.languageSource ?? DEFAULT_LANGUAGE
            translationConfig.addTargetLanguage(
                this.languageTarget ?? DEFAULT_LANGUAGE
            )

            this.recognizer = new speechsdk.TranslationRecognizer(
                translationConfig,
                audioConfig
            )
        }

        // Single language transcription
        else {
            const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(
                this.tokenObj.token,
                this.tokenObj.region
            )
            speechConfig.speechRecognitionLanguage =
                this.languageTarget ?? DEFAULT_LANGUAGE

            this.recognizer = new speechsdk.SpeechRecognizer(
                speechConfig,
                audioConfig
            )
        }

        this.recognizer.recognizing = (_: any, e: any) => {
            const content = e.result.text

            let translatedContent: string | null = null
            const languageTarget =
                e.result.translations?.languages?.[0] ?? undefined
            if (languageTarget) {
                translatedContent = e.result.translations.get(languageTarget)
            }

            // Emit transcript content for component consumption
            TranscribedContent.emit({
                visitId: this.visitId!,
                transcript: {
                    content,
                    timestamp: new Date(),
                    isPartial: true,
                    translation: (() => {
                        if (translatedContent && languageTarget) {
                            return {
                                languageTarget:
                                    this.languageTarget ?? DEFAULT_LANGUAGE,
                                languageSource:
                                    this.languageSource ?? DEFAULT_LANGUAGE,
                                content: translatedContent,
                            }
                        }
                        return undefined
                    })(),
                },
            })
        }

        this.recognizer.recognized = (_: any, e: any) => {
            if (
                e.result.reason === speechsdk.ResultReason.RecognizedSpeech ||
                e.result.reason === speechsdk.ResultReason.TranslatedSpeech
            ) {
                const content = e.result.text

                let translatedContent: string | null = null
                const languageTarget =
                    e.result.translations?.languages?.[0] ?? undefined
                if (languageTarget) {
                    translatedContent =
                        e.result.translations.get(languageTarget)
                }

                const transcript: Transcript = {
                    content,
                    timestamp: new Date(),
                    translation: (() => {
                        if (translatedContent && languageTarget) {
                            return {
                                languageTarget:
                                    this.languageTarget ?? DEFAULT_LANGUAGE,
                                languageSource:
                                    this.languageSource ?? DEFAULT_LANGUAGE,
                                content: translatedContent,
                            }
                        }
                        return undefined
                    })(),
                }

                // Emit transcript content for component consumption
                TranscribedContent.emit({
                    visitId: this.visitId!,
                    transcript,
                })

                // Save transcript to database
                addTranscriptToVisit(this.visitId!, transcript)
            }
        }

        this.recognizer.sessionStarted = () => {
            // Emit media device state
            UpdateMediaDeviceState.emit({ isAvailable: true })

            if (isDev) {
                toast.success('Connected to Azure transcription service')
            }

            // Reset retry count on successful reconnection
            this.retryCount = 0
        }

        this.recognizer.canceled = (_: any, e: any) => {
            if (e.reason === speechsdk.CancellationReason.Error) {
                console.error('AzureSTT session cancellation error details:')
                console.error({
                    error: e,
                })
                this.handleReconnection()
            } else {
                this.recognizer?.stopContinuousRecognitionAsync()
            }
        }

        this.recognizer.sessionStopped = () => {
            this.recognizer?.stopContinuousRecognitionAsync()
        }

        // Start streaming audio for transcription
        this.recognizer.startContinuousRecognitionAsync()
    }

    private async handleReconnection() {
        if (this.retryCount < MAX_RETRIES) {
            this.retryCount++
            const delaySeconds = this.retryCount // 1s, 2s, 3s
            console.error(
                `Attempting to reconnect to Azure STT (Attempt ${this.retryCount}/${MAX_RETRIES}) after ${delaySeconds}s delay`
            )

            try {
                this.stop()
                // Add delay before retry
                await new Promise((resolve) =>
                    setTimeout(resolve, delaySeconds * 1000)
                )
                this.start()
            } catch (error) {
                console.error('Reconnection attempt failed:', error)
                this.handleReconnection() // Try again
            }
        } else {
            console.error(
                'Max retries reached. Unable to reconnect to Azure STT.'
            )
            toast.error(
                'Unable to reconnect to transcription service. Please try again later.'
            )
            this.stop()

            // Emit recording state to stop recording state
            UpdateRecordingState.emit({ isRecording: false })

            // Emit media device state to disable loading state
            UpdateMediaDeviceState.emit({ isAvailable: true })
        }
    }

    stop() {
        this.recognizer?.stopContinuousRecognitionAsync()
        this.recognizer = null
        this.tokenObj = null

        screenLock.unlock()
        audio.stopStream()
    }

    setToken(tokenObj: { token: string; region: string }) {
        this.tokenObj = tokenObj
    }
}

const addTranscriptToVisit = async (
    visitId: string,
    transcript: Transcript
): Promise<void> => {
    if (!transcript.content || !transcript.timestamp || !visitId) {
        return Promise.resolve()
    }
    const duration = timer.getSeconds()
    return transcriptionsService
        .createTranscript(visitId, transcript, duration)
        .catch((error) => {
            console.error('Error saving transcript to database:', error)
        })
}
