Using Faster Whisper on macOS

1. Open the terminal and activate the environment

source whisper-env/bin/activate

2. Open the script

nano faster_transcribe.py

3.1. Edit the script if transcription from audio is required (model size on Line 5 and/or audio file path and name on Line 8)

from faster_whisper import WhisperModel
import os

# Load the model
model = WhisperModel(“large-v3”, device="auto", compute_type="int8")

# Path to your audio file
audio_path = "/Users/nombreUsuario/Desktop/Audio.m4a"

# Transcribe
segments, info = model.transcribe(audio_path)

# Print language
print("Detected language:", info.language)

# Build transcript text
transcript_lines = []
for segment in segments:
    line = f"[{segment.start:.2f} - {segment.end:.2f}] {segment.text}"
    print(line)
    transcript_lines.append(line)
    
# Save to a .txt file in the same folder as the audio file
base_filename = os.path.splitext(os.path.basename(audio_path))[0]
output_path = os.path.join(os.path.dirname(audio_path), f"{base_filename}_transcript.txt")

with open(output_path, "w", encoding="utf-8") as f:
    f.write("\n".join(transcript_lines))
print(f"\nâś… Transcript saved to: {output_path}")

3.2. Edit the script if transcription from video is required (model size on Line 6 and/or video file path and name on Line 9)

from faster_whisper import WhisperModel
import subprocess
import os

# Load the model
model = WhisperModel("large-v3", device="auto", compute_type="int8")

# Path to your video file
video_path = "/Users/nombreUsuario/Desktop/Video.mp4"

# Extract audio as a temporary file (.wav for compatibility)
audio_path = os.path.splitext(video_path)[0] + "_temp_audio.wav"
subprocess.run([
    "ffmpeg", "-i", video_path,
    "-vn",              # no video
    "-acodec", "pcm_s16le",  # compatible audio format
    "-ar", "16000",     # sample rate recommended by Whisper
    "-ac", "1",         # single channel (mono)
    audio_path,
    "-y"                # overwrite if it already exists
], check=True)

# Transcribe the extracted audio
segments, info = model.transcribe(audio_path)

# Display detected language
print("Detected language:", info.language)

# Build transcript text
transcript_lines = []
for segment in segments:
    line = f"[{segment.start:.2f} - {segment.end:.2f}] {segment.text}"
    print(line)
    transcript_lines.append(line)

# Save transcript as .txt next to the original video
base_filename = os.path.splitext(os.path.basename(video_path))[0]
output_path = os.path.join(os.path.dirname(video_path), f"{base_filename}_transcript.txt")
with open(output_path, "w", encoding="utf-8") as f:
    f.write("\n".join(transcript_lines))

print(f"\nâś… Transcript saved to: {output_path}")

# (Optional) Delete temporary audio file
os.remove(audio_path)

4. Save

  • Press CTRL + O, then Enter

  • Press CTRL + X to exit Nano

5. Run the script

python faster_transcribe.py

6. Output

  • Transcription will appear in the terminal

  • A .txt file will be saved on the Desktop

Last updated