Spaces:
Running
on
Zero
Running
on
Zero
File size: 19,661 Bytes
6f523af fc64c8b 0c03412 fc64c8b d296c7b fc64c8b 6f523af d296c7b 6f523af 8417fa3 6f523af f482080 6f523af d296c7b 6f523af d296c7b 6f523af d296c7b f482080 d296c7b f482080 d296c7b f482080 d296c7b f482080 d296c7b f482080 6f523af f482080 fc64c8b d296c7b fc64c8b d296c7b fc64c8b d296c7b b6a38da d296c7b b6a38da d296c7b b6a38da d296c7b b6a38da d296c7b b6a38da 0c03412 b6a38da d296c7b b6a38da d296c7b fc64c8b f482080 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 |
from app.logger_config import logger as logging
import gradio as gr
from pathlib import Path
import os
from app.utils import (
remove_active_task_flag_file,
remove_chunk_folder,
task_fake,
is_active_task,
is_active_stream,
task
)
# from app.utils import (
# raise_error,
# READ_SIZE,
# generate_coturn_config,
# read_and_stream_audio,
# stop_streaming,
# task,
# task_fake
# )
DEFAULT_CONFIG = {
"task_type": "Transcription",
"lang_source": "French",
"lang_target": "English",
"chunk_secs": 1.0,
"left_context_secs": 20.0,
"right_context_secs": 0.5,
"streaming_policy": "alignatt",
"alignatt_thr": 8,
"waitk_lagging": 2,
"exclude_sink_frames": 8,
"xatt_scores_layer": -2,
"hallucinations_detector": True,
}
EXAMPLE_CONFIGS = {
"data/english_meeting.wav": {
"task_type": "Transcription", "lang_source": "English", "lang_target": "English",
"chunk_secs": 1.0, "left_context_secs": 20.0, "right_context_secs": 0.5,
"streaming_policy": "waitk", "alignatt_thr": 8, "waitk_lagging": 2,
"exclude_sink_frames": 8, "xatt_scores_layer": -2, "hallucinations_detector": True
},
"data/french_news.wav": {
"task_type": "Transcription", "lang_source": "French", "lang_target": "French",
"chunk_secs": 1.0, "left_context_secs": 15.0, "right_context_secs": 0.5,
"streaming_policy": "alignatt", "alignatt_thr": 8.0, "waitk_lagging": 3,
"exclude_sink_frames": 8, "xatt_scores_layer": -2, "hallucinations_detector": True
},
"data/spanish_podcast.wav": {
"task_type": "Translation", "lang_source": "Spanish", "lang_target": "English",
"chunk_secs": 1.5, "left_context_secs": 25.0, "right_context_secs": 0.4,
"streaming_policy": "waitk", "alignatt_thr": 7, "waitk_lagging": 1,
"exclude_sink_frames": 8, "xatt_scores_layer": -2, "hallucinations_detector": False
}
}
# ========== FONCTIONS UTILITAIRES ==========
def to_updates(cfg):
"""Map dict -> gr.update list dans l'ordre des sorties."""
return [
gr.update(value=cfg["task_type"]),
gr.update(value=cfg["lang_source"]),
gr.update(
value=cfg["lang_target"],
visible=(cfg["task_type"] == "Translation")
),
gr.update(value=cfg["chunk_secs"]),
gr.update(value=cfg["left_context_secs"]),
gr.update(value=cfg["right_context_secs"]),
gr.update(value=cfg["streaming_policy"]),
gr.update(value=cfg["alignatt_thr"]),
gr.update(value=cfg["waitk_lagging"]),
gr.update(value=cfg["exclude_sink_frames"]),
gr.update(value=cfg["xatt_scores_layer"]),
gr.update(value=cfg["hallucinations_detector"]),
]
def apply_preset_if_example(filepath, auto_apply):
"""Si fichier = exemple ET auto_apply=True -> applique preset. Sinon, ne rien changer."""
if not filepath or not auto_apply:
updates = [gr.update() for _ in range(12)]
updates.append(gr.update())
return tuple(updates)
# On compare uniquement le nom de fichier, pas le chemin complet
file_name = Path(filepath).name
# Recherche dans EXAMPLE_CONFIGS par nom de fichier
cfg = next(
(config for path, config in EXAMPLE_CONFIGS.items() if Path(path).name == file_name),
None
)
if not cfg:
updates = [gr.update() for _ in range(12)]
updates.append(gr.update())
return tuple(updates)
updates = to_updates(cfg)
updates.append(gr.update(value=f"Preset applied for: {file_name}"))
return tuple(updates)
def reset_to_defaults():
"""Réinitialise tous les champs aux valeurs par défaut."""
updates = to_updates(DEFAULT_CONFIG) # 12 champs
# Ajout du résumé (13e sortie)
updates.append(gr.update(value="Defaults restored."))
return tuple(updates)
def summarize_config(
task, src, tgt,
chunk, left, right,
policy, thr, lag, sink, xatt, halluc
):
txt = f"🧠 **Task:** {task}\n🌐 **Source language:** {src}"
if task == "Translation":
txt += f"\n🎯 **Target language:** {tgt}"
txt += (
f"\n\n### ⚙️ Advanced Parameters:\n"
f"- chunk_secs = {chunk}\n"
f"- left_context_secs = {left}\n"
f"- right_context_secs = {right}\n"
f"- decoding.streaming_policy = {policy}\n"
f"- decoding.alignatt_thr = {thr}\n"
f"- decoding.waitk_lagging = {lag}\n"
f"- decoding.exclude_sink_frames = {sink}\n"
f"- decoding.xatt_scores_layer = {xatt}\n"
f"- decoding.hallucinations_detector = {halluc}"
)
return txt
def handle_additional_outputs(webrtc_stream, msg):
"""
Updates UI elements based on streaming state.
Improvements:
- Uses centralized state logic to avoid code duplication.
- Handles default values to reduce 'if/else' complexity.
- Secures reading of dictionary keys.
"""
# 1. Default state initialization (Neutral or State Conservation Mode)
# By default, return gr.update() which means "do nothing"
# This avoids specifying the state of every button each time
start_btn = gr.update()
stop_btn = gr.update()
start_task_btn = gr.update()
go_to_task_btn = gr.update()
audio_step = gr.update()
slider = gr.update()
walkthrough = gr.update()
status_msg = gr.update(visible=False, value="")
# Safety: if msg is not a valid dictionary
if not isinstance(msg, dict):
return (start_btn, stop_btn, start_task_btn, go_to_task_btn, audio_step, slider, walkthrough, status_msg)
session_hash = msg.get("session_hash_code", "")
# --- CASE 1: ERROR ---
if msg.get("errored"):
error_val = msg.get("value", "Unknown error")
logging.error(f"[stream_ui] Client-side error: {error_val}")
start_btn = gr.update(visible=True)
stop_btn = gr.update(visible=False)
start_task_btn = gr.update(visible=False)
go_to_task_btn = gr.update(visible=False)
audio_step = gr.update(interactive=True)
slider = gr.update(visible=False, value=0)
status_msg = gr.update(value=f"⚠️ **Error:** {error_val}", visible=True)
# --- CASE 2: MANUAL STOP ---
# Note: Kept key "stoped" (with one p), but added "stopped" just in case backend is fixed
elif msg.get("stoped") or msg.get("stopped"):
start_btn = gr.update(visible=True)
stop_btn = gr.update(visible=False)
start_task_btn = gr.update(visible=False)
go_to_task_btn = gr.update(visible=False)
audio_step = gr.update(interactive=True)
slider = gr.update(visible=True, value=0)
status_msg = gr.update(value="ℹ️ Stream stopped by user.", visible=True)
# --- CASE 3: PROGRESS ---
elif msg.get("progressed"):
progress = float(msg.get("value", 0))
# Common logic for progress (active or finished)
start_btn = gr.update(visible=False) # Hide Start during stream
stop_btn = gr.update(visible=True) # Show Stop during stream
audio_step = gr.update(interactive=False) # Lock input
slider = gr.update(visible=True, value=progress)
# Sub-case: Streaming finished (100%)
if progress >= 100.0:
start_btn = gr.update(visible=True)
stop_btn = gr.update(visible=False)
start_task_btn = gr.update(visible=False)
go_to_task_btn = gr.update(visible=True)
audio_step = gr.update(interactive=True)
# Status message remains hidden and empty (default values)
# Sub-case: Streaming in progress (<100%)
else:
go_to_task_btn = gr.update(visible=True)
# Your specific logic for start_task_button
# If task is active, do not touch (empty gr.update), otherwise show
if (not is_active_task(session_hash)) and is_active_stream(session_hash):
start_task_btn = gr.update(visible=True)
# No status message during normal progress
# --- SINGLE RETURN ---
# Order must match EXACTLY your outputs=[...] list in Gradio
return (
start_btn, # 1. start_stream_button
stop_btn, # 2. stop_stream_button
start_task_btn, # 3. start_task_button
go_to_task_btn, # 4. go_to_task
audio_step, # 5. audio_source_step
slider, # 6. status_slider
walkthrough, # 7. walkthrough
status_msg # 8. status_message (Markdown/HTML)
)
# def handle_additional_outputs(webrtc_stream, msg):
# """
# Update UI elements based on streaming progress or errors.
# Controls button states, audio visibility, and progress slider.
# """
# # ui_components = [start_stream_button, stop_stream_button,start_task_button,go_to_task, audio_source_step, status_slider,walkthrough]
# progress = float(0)
# # Handle structured error message
# if isinstance(msg, dict) and msg.get("errored"):
# value = msg.get("value", "Unknown error.")
# logging.error(f"[stream_ui] Client-side error: {value}")
# return (
# gr.update(visible=True), # start_stream_button enabled
# gr.update(visible=False), # stop_stream_button disabled
# gr.update(visible=False), #start_task_button
# gr.update(visible=False), # go_to_task disabled
# gr.update(interactive=True), # audio_source_step re-shown
# gr.update(visible=False, value=0), # slider hidden
# gr.update(), #walkthrough
# gr.update(value=f"**Error:** {value}", visible=True)
# )
# elif msg.get("progressed") :
# value = msg.get("value", 0)
# progress = float(value)
# if progress == 100.00 :
# return (
# gr.update(visible=True), # start_stream_button disabled
# gr.update(visible=False), # stop_stream_button enabled
# gr.update(visible=False), #start_task_button
# gr.update(visible=True), # go_to_task enabled
# gr.update(interactive=True), # hide audio_source_step
# gr.update(visible=True, value=progress), # show progress
# gr.update(), #walkthrough
# gr.update(value="", visible=False)
# )
# else :
# return (
# gr.update(visible=False), # start_stream_button disabled
# gr.update(visible=True), # stop_stream_button enabled
# gr.update() if is_active_task(msg.get("session_hash_code")) else gr.update(visible=True), #start_task_button
# gr.update(visible=True), # go_to_task enabled
# gr.update(interactive=False), # hide audio_source_step
# gr.update(visible=True, value=progress), # show progress
# gr.update(), #walkthrough
# gr.update(value="", visible=False)
# )
# elif msg.get("stoped") :
# return (
# gr.update(visible=True), # start_stream_button disabled
# gr.update(visible=False), # stop_stream_button enabled
# gr.update(visible=False), #start_task_button
# gr.update(visible=False), # go_to_task enabled
# gr.update(interactive=True), # hide audio_source_step
# gr.update(visible=True, value=0), # show progress
# gr.update(), #walkthrough
# gr.update(value="ℹStream stopped by user.", visible=True)
# )
def on_file_load(filepath):
"""
Update active audio path or reset".
"""
# Si un fichier est chargé (upload, micro, ou exemple),
# audio_path ne sera pas None.
is_visible = filepath is not None
return filepath, gr.update(visible=is_visible)
def get_custom_theme() :
# === Thème personnalisé (studio néon) ===
theme = gr.themes.Base(
primary_hue="blue",
secondary_hue="indigo",
).set(
body_background_fill="#F7F8FA",
body_text_color="#222222",
block_border_color="#D0D3D9",
button_primary_background_fill="#3B82F6",
button_primary_background_fill_hover="#2563EB",
button_primary_text_color="#FFFFFF",
)
css_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "assets", "custom_style.css")
with open(css_path, encoding="utf-8") as f:
css_style = f.read()
return theme, css_style
########## task
# def start_task_asr_ast(
# session_hash_code,
# task_type, lang_source, lang_target,
# chunk_secs, left_context_secs, right_context_secs,
# streaming_policy, alignatt_thr, waitk_lagging,
# exclude_sink_frames, xatt_scores_layer, hallucinations_detector
# ):
# """Stream transcription or translation results in real time."""
# accumulated = ""
# # Boucle sur le générateur de `task2()`
# # outputs=[task_output,status_message_task,start_task_button,stop_task_button,config_step]
# for result, status, current_chunk in task_fake(
# session_hash_code,
# task_type, lang_source, lang_target,
# chunk_secs, left_context_secs, right_context_secs,
# streaming_policy, alignatt_thr, waitk_lagging,
# exclude_sink_frames, xatt_scores_layer, hallucinations_detector
# ):
# if status == "success":
# yield (accumulated + result, #task_output
# gr.update(visible=True,value=current_chunk,elem_classes=[status]),#status_message_task
# gr.update(visible=False),#start_task_button
# gr.update(visible=True), #stop_task_button
# gr.update(interactive=False) # config_step
# )
# accumulated += result
# elif status in ["warning","info" ]:
# yield (accumulated, #task_output
# gr.update(visible=True,value=result , elem_classes=[status]),#status_message_task
# gr.update(visible=False),#start_task_button
# gr.update(visible=True),#stop_task_button
# gr.update(interactive=False) # config_step
# )
# elif status in [ "done"]:
# yield (accumulated, #task_output
# gr.update(visible=True,value=result , elem_classes=[status]),#status_message_task
# gr.update(visible=True) if is_active_stream(session_hash_code) else gr.update(visible=False),#start_task_button
# gr.update(visible=False),#stop_task_button
# gr.update(interactive=True) # config_step
# )
def start_task_asr_ast(
session_hash_code,
task_type, lang_source, lang_target,
chunk_secs, left_context_secs, right_context_secs,
streaming_policy, alignatt_thr, waitk_lagging,
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
):
"""
Manages streaming of transcription (ASR) or translation (AST) results.
Orchestrates real-time UI updates (text, status, buttons).
"""
accumulated_text = ""
# Call task generator (backend)
task_generator = task_fake(
session_hash_code,
task_type, lang_source, lang_target,
chunk_secs, left_context_secs, right_context_secs,
streaming_policy, alignatt_thr, waitk_lagging,
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
)
try:
# Loop over partial results
# result_data: can be transcribed text OR an info message depending on status
for result_data, status, debug_info in task_generator:
# 1. Default states for this iteration ('In Progress' mode)
# By default, lock config and allow stopping
start_btn = gr.update(visible=False)
stop_btn = gr.update(visible=True)
config_step = gr.update(interactive=False)
# Status message and main text depend on return type
status_msg = gr.update(visible=True)
main_output = accumulated_text
# --- CASE 1: SUCCESS (New text segment) ---
if status == "success":
# result_data is the new text chunk here
partial_text = result_data
# Update accumulator
accumulated_text += partial_text
main_output = accumulated_text
# Status message displays chunk info (e.g., timestamps)
status_msg = gr.update(visible=True, value=debug_info, elem_classes=[status])
# --- CASE 2: WARNING / INFO (System message) ---
elif status in ["warning", "info"]:
# result_data is the error or info message here
# Do not touch accumulated_text
status_msg = gr.update(visible=True, value=result_data, elem_classes=[status])
# --- CASE 3: DONE / ERROR---
elif status in ["done", "error"]:
logging.error(f"[ui] error ")
# Re-enable controls
is_streaming = is_active_stream(session_hash_code)
start_btn = gr.update(visible=is_streaming) # Show Start only if audio stream is active
stop_btn = gr.update(visible=False)
config_step = gr.update(interactive=True)
# result_data is the completion message
status_msg = gr.update(visible=True, value=result_data, elem_classes=[status])
# 2. Single dispatch to UI
# Expected order: [task_output, status_message_task, start_task_button, stop_task_button, config_step]
yield (
main_output,
status_msg,
start_btn,
stop_btn,
config_step
)
except Exception as e:
# --- ERROR HANDLING (GPU ABORT / RUNTIME ERROR) ---
error_msg = str(e)
logging.error(f"Task Error for {session_hash_code}: {error_msg}", exc_info=True)
remove_active_task_flag_file(session_hash_code)
remove_chunk_folder(session_hash_code)
# Detect specific Hugging Face / GPU errors
if "GPU task aborted" in error_msg or "CUDA out of memory" in error_msg or "Device" in error_msg:
display_msg = f"🛑 **System Error:** GPU Task Aborted. The model may have run out of memory. ({error_msg})"
else:
display_msg = f"⚠️ **Task Error:** {error_msg}"
# Update UI to reflect the crash
yield (
accumulated_text, # Keep what we managed to generate so far
gr.update(visible=True, value=display_msg, elem_classes=["error"]), # Show error in status box
gr.update(visible=True), # Re-enable Start button to allow retry
gr.update(visible=False), # Hide Stop button
gr.update(interactive=True) # Unlock config
)
def stop_task_fn(session_hash_code):
remove_active_task_flag_file(session_hash_code)
yield "Task stopped by user."
# # --------------------------------------------------------
def raise_error(message="Une erreur est survenue."):
raise gr.Error(message)
|