Usar Faster Whisper en macOS

1. Abrir la terminal y activar ambiente

source whisper-env/bin/activate

2. Abrir script

nano faster_transcribe.py

3.1. Editar script si se requiere transcripción desde audio, (tamaño del modelo (Ln 5) y/o ruta y nombre del audio(Ln 8))

from faster_whisper import WhisperModel
import os

# Load the model
model = WhisperModel(ā€œlarge-v3ā€, device="auto", compute_type="int8")

# Path to your audio file
audio_path = "/Users/nombreUsuario/Desktop/Audio.m4a"

# Transcribe
segments, info = model.transcribe(audio_path)

# Print language
print("Detected language:", info.language)

# Build transcript text
transcript_lines = []
for segment in segments:
    line = f"[{segment.start:.2f} - {segment.end:.2f}] {segment.text}"
    print(line)
    transcript_lines.append(line)
    
# Save to a .txt file in the same folder as the audio file
base_filename = os.path.splitext(os.path.basename(audio_path))[0]
output_path = os.path.join(os.path.dirname(audio_path), f"{base_filename}_transcript.txt")

with open(output_path, "w", encoding="utf-8") as f:
    f.write("\n".join(transcript_lines))
print(f"\nāœ… Transcript saved to: {output_path}")

3.2. Editar script si se requiere transcripción desde video (tamaño del modelo (Ln 6) y/o ruta y nombre del video (Ln 9))

from faster_whisper import WhisperModel
import subprocess
import os

# Load the model
model = WhisperModel("large-v3", device="auto", compute_type="int8")

# Path to your video file
video_path = "/Users/nombreUsuario/Desktop/Video.mp4"

# Extraer el audio como archivo temporal (.wav por compatibilidad)
audio_path = os.path.splitext(video_path)[0] + "_temp_audio.wav"
subprocess.run([
    "ffmpeg", "-i", video_path,
    "-vn",              # no video
    "-acodec", "pcm_s16le",  # formato de audio compatible
    "-ar", "16000",     # frecuencia de muestreo recomendada por Whisper
    "-ac", "1",         # un solo canal (mono)
    audio_path,
    "-y"                # sobrescribe si ya existe
], check=True)

# Transcribir el audio extraĆ­do
segments, info = model.transcribe(audio_path)

# Mostrar idioma detectado
print("Detected language:", info.language)

# Construir texto transcrito
transcript_lines = []
for segment in segments:
    line = f"[{segment.start:.2f} - {segment.end:.2f}] {segment.text}"
    print(line)
    transcript_lines.append(line)

# Guardar transcripción como .txt junto al video original
base_filename = os.path.splitext(os.path.basename(video_path))[0]
output_path = os.path.join(os.path.dirname(video_path), f"{base_filename}_transcript.txt")
with open(output_path, "w", encoding="utf-8") as f:
    f.write("\n".join(transcript_lines))

print(f"\nāœ… Transcript saved to: {output_path}")

# (Opcional) Eliminar archivo temporal de audio
os.remove(audio_path)

4. Guardar

  • CTRL + O, Enter

  • CTRL + X to exit Nano

5. Ejecutar script

python faster_transcribe.py

6. Obtención de resultado

  • Transcripción en terminal

  • Archivo txt guardado en escritorio

Last updated