slowlydev · kyujin-cho · Jun 30, 2024 · Jun 30, 2024 · Jun 30, 2024 · Jul 6, 2024
@@ -9,6 +9,7 @@ use tracing::info;
 
 use crate::{LiveEvent, LiveState};
 
+mod audio;
 mod cors;
 mod drivers;
 mod health;
@@ -58,6 +59,7 @@ pub async fn init(
     let app = Router::new()
         .route("/api/sse", get(live::sse_handler))
         .route("/api/health", get(health::check))
+        .route("/api/audio", get(audio::get_audio))
         .route("/api/drivers", get(drivers::get_drivers))
         .layer(cors)
         .layer(governor)

@@ -0,0 +1,29 @@
+use axum::{extract::Query, http::StatusCode, response::IntoResponse};
+use serde::Deserialize;
+use std::env;
+use tracing::error;
+
+#[derive(Deserialize)]
+pub struct Params {
+    path: String,
+}
+
+pub async fn get_audio(Query(params): Query<Params>) -> Result<impl IntoResponse, StatusCode> {
+    let Ok(_) = env::var("ENABLE_AUDIO_FETCH") else {
+        return Err(StatusCode::NOT_IMPLEMENTED);
+    };
+
+    let audio_url = format!("https://livetiming.formula1.com/static/{}", params.path);
+
+    let Ok(response) = reqwest::get(&audio_url).await else {
+        error!("Failed to retrieve audio data from {}", audio_url);
+        return Err(StatusCode::INTERNAL_SERVER_ERROR);
+    };
+
+    let Ok(bytes) = response.bytes().await else {
+        error!("Failed to decode response from {}", audio_url);
+        return Err(StatusCode::INTERNAL_SERVER_ERROR);
+    };
+
+    Ok(bytes.as_ref().to_vec())
+}
@@ -5,6 +5,16 @@ await import("./src/env.mjs");
 /** @type {import("next").NextConfig} */
 const config = {
 	reactStrictMode: false,
+	webpack: (
+		config,
+	) => {
+        config.resolve.alias = {
+            ...config.resolve.alias,
+            "sharp$": false,
+            "onnxruntime-node$": false,
+        }
+		return config
+	},	
 	output: process.env.BUILD_STANDALONE === "1" ? "standalone" : undefined,
 	env: {
 		version: pack.version,

@@ -12,6 +12,7 @@
 	"dependencies": {
 		"@fireworks-js/react": "2.10.8",
 		"@headlessui/react": "2.2.0",
+		"@xenova/transformers": "2.7.0",
 		"clsx": "2.1.1",
 		"framer-motion": "11.11.17",
 		"geist": "1.3.1",

@@ -18,13 +18,19 @@ import Toggle from "@/components/Toggle";
 import Footer from "@/components/Footer";
 import Slider from "@/components/Slider";
 import Input from "@/components/Input";
+import Select from "@/components/Select";
+
 
 import { useSettingsStore } from "@/stores/useSettingsStore";
+import { models, useTranscriptionStore } from "@/stores/useTranscriptionStore";
 
 import { env } from "@/env.mjs";
 
 export default function SettingsPage() {
 	const settings = useSettingsStore();
+
+	const transcription = useTranscriptionStore();
+
 	return (
 		<div className="container mx-auto max-w-screen-lg px-4">
 			<h1 className="my-4 text-3xl">Settings</h1>
@@ -117,6 +123,20 @@ export default function SettingsPage() {
 				Reset delay
 			</Button>
 
+			<h2 className="my-4 text-2xl">Enable Radio Transcription</h2>
+
+			<p className="mb-4">Only available when the corresponding feature is enabled from server.</p>
+
+			<div className="flex gap-2">
+				<Toggle enabled={transcription.enabled} setEnabled={(v) => transcription.setEnabled(v)} />
+				<p className="text-zinc-500">Enable Radio Transcription</p>
+			</div>
+
+			<div className="flex gap-2">
+				<Select placeholder="Model" options={models} selected={transcription.model ?? models[0].value} setSelected={(v) => transcription.setModel(v ?? models[0].value)} />
+				<p className="text-zinc-500">Transcription Mode</p>
+			</div>
+
 			<Footer />
 		</div>
 	);

@@ -0,0 +1,118 @@
+/* eslint-disable camelcase */
+// from https://github.com/xenova/whisper-web/blob/main/src/worker.js
+import { pipeline, env } from "@xenova/transformers";
+
+// Disable local models
+env.allowLocalModels = false;
+
+// Define model factories
+// Ensures only one model is created of each type
+class PipelineFactory {
+	static task = null;
+	static model = null;
+	static quantized = null;
+	static instance = null;
+
+	constructor(model, quantized) {
+		this.model = model;
+		this.quantized = quantized;
+	}
+
+	static async getInstance(progress_callback = null) {
+		if (this.instance === null) {
+			this.instance = pipeline(this.task, this.model, {
+				quantized: this.quantized,
+				progress_callback,
+				// For medium models, we need to load the `no_attentions` revision to avoid running out of memory
+				revision: this.model.includes("/whisper-medium") ? "no_attentions" : "main",
+			});
+		}
+
+		return this.instance;
+	}
+}
+
+self.addEventListener("message", async (event) => {
+	const message = event.data;
+
+	// Do some work...
+	// TODO use message data
+	let transcript;
+	try {
+		transcript = await transcribe(
+			message.audio,
+			message.model,
+			message.multilingual,
+			message.quantized,
+			message.subtask,
+			message.language,
+		);
+	} catch (e) {
+		console.warn("Error while transcribing: " + e);
+		transcript = {
+			text: "",
+			chunks: [],
+		};
+	}
+
+	// Send the result back to the main thread
+	self.postMessage({
+		status: "complete",
+		task: "automatic-speech-recognition",
+		key: message.key,
+		data: transcript,
+	});
+});
+
+class AutomaticSpeechRecognitionPipelineFactory extends PipelineFactory {
+	static task = "automatic-speech-recognition";
+	static model = null;
+	static quantized = null;
+}
+
+const transcribe = async (audio, model, multilingual, quantized, subtask, language) => {
+	const isDistilWhisper = model.startsWith("distil-whisper/");
+
+	let modelName = model;
+	if (!isDistilWhisper && !multilingual) {
+		modelName += ".en";
+	}
+
+	const p = AutomaticSpeechRecognitionPipelineFactory;
+	if (p.model !== modelName || p.quantized !== quantized) {
+		// Invalidate model if different
+		p.model = modelName;
+		p.quantized = quantized;
+
+		if (p.instance !== null) {
+			(await p.getInstance()).dispose();
+			p.instance = null;
+		}
+	}
+
+	// Load transcriber model
+	let transcriber = await p.getInstance((data) => {
+		self.postMessage(data);
+	});
+
+	// Actually run transcription
+	let output = await transcriber(audio, {
+		// Greedy
+		top_k: 0,
+		do_sample: false,
+
+		// Sliding window
+		chunk_length_s: isDistilWhisper ? 20 : 30,
+		stride_length_s: isDistilWhisper ? 3 : 5,
+
+		// Language and task
+		language: language,
+		task: subtask,
+
+		// Return timestamps
+		return_timestamps: true,
+		force_full_sequences: false,
+	});
+
+	return output;
+};
@@ -1,6 +1,7 @@
-import { useRef, useState } from "react";
+import { useMemo, useRef, useState } from "react";
 import { motion } from "framer-motion";
 import { utc } from "moment";
+import clsx from "clsx";
 
 import { useSettingsStore } from "@/stores/useSettingsStore";
 
@@ -9,22 +10,36 @@ import PlayControls from "./PlayControls";
 import AudioProgress from "./AudioProgress";
 
 import { Driver, RadioCapture } from "@/types/state.type";
-import clsx from "clsx";
 
 type Props = {
 	driver: Driver;
 	capture: RadioCapture;
 	basePath: string;
+	transcription?: string;
 };
 
-export default function TeamRadioMessage({ driver, capture, basePath }: Props) {
+export default function TeamRadioMessage({ driver, capture, basePath, transcription }: Props) {
 	const audioRef = useRef<HTMLAudioElement | null>(null);
 	const intervalRef = useRef<NodeJS.Timeout | null>(null);
 
 	const [playing, setPlaying] = useState<boolean>(false);
 	const [duration, setDuration] = useState<number>(10);
 	const [progress, setProgress] = useState<number>(0);
 
+	const transcriptionElement = useMemo(() => {
+		if (transcription === undefined) {
+			return <></>;
+		} else if (transcription === "") {
+			return <SkeletonTranscription />;
+		} else {
+			return (
+				<p className="font-small text-sm" style={{ whiteSpace: "pre-wrap" }}>
+					{transcription}
+				</p>
+			);
+		}
+	}, [transcription]);
+
 	const loadMeta = () => {
 		if (!audioRef.current) return;
 		setDuration(audioRef.current.duration);
@@ -106,6 +121,13 @@ export default function TeamRadioMessage({ driver, capture, basePath }: Props) {
 					/>
 				</div>
 			</div>
+			<div className="gap-1">{transcriptionElement}</div>
 		</motion.li>
 	);
 }
+
+const SkeletonTranscription = () => {
+	const animateClass = "h-6 animate-pulse rounded-md bg-zinc-800";
+
+	return <div className={clsx(animateClass, "!h-8 w-80")} />;
+};