You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
131 lines
3.4 KiB
Bash
131 lines
3.4 KiB
Bash
#!/bin/bash
|
|
# Script that uses `whispercpp` to tranascribe audio and video files automatically and outputs the results in the same directory
|
|
|
|
# Configuración
|
|
REPO_DIR=~/code/whisper.cpp
|
|
WHISPER_CLI="$REPO_DIR/build/bin/whisper-cli"
|
|
TEMP_DIR=$(mktemp -d)
|
|
LANGUAGE="en"
|
|
OUTPUT_FORMAT="txt"
|
|
|
|
# Rutas de los modelos
|
|
MODEL_LARGE="/Users/borjarobert/code/whisper.cpp/models/ggml-large-v3-turbo.bin"
|
|
MODEL_MEDIUM="/Users/borjarobert/code/whisper.cpp/models/ggml-medium.bin"
|
|
MODEL_SMALL="/Users/borjarobert/code/whisper.cpp/models/ggml-base.bin"
|
|
MODEL_PATH="$MODEL_LARGE" # Valor por defecto
|
|
|
|
# Función para mostrar uso
|
|
usage() {
|
|
echo "Uso: whisper [-u] [-t archivo] [-m modelo] [-l idioma] [-f formato]"
|
|
echo "Opciones:"
|
|
echo " -u Actualiza y recompila whispercpp"
|
|
echo " -t archivo Transcribe el archivo de video"
|
|
echo " -m modelo Especifica el modelo (large, medium, small)"
|
|
echo " -l idioma Especifica el idioma (por defecto: en)"
|
|
echo " -f formato Especifica el formato de salida (txt, srt, json, vtt, lrc, csv)"
|
|
exit 1
|
|
}
|
|
|
|
# Manejo de argumentos
|
|
while getopts ":ut:m:l:f:" opt; do
|
|
case $opt in
|
|
u)
|
|
echo "Actualizando whispercpp..."
|
|
cd "$REPO_DIR" || exit
|
|
git pull
|
|
make
|
|
echo "Actualización completa."
|
|
exit 0
|
|
;;
|
|
t)
|
|
FILE="$OPTARG"
|
|
;;
|
|
m)
|
|
case $OPTARG in
|
|
large)
|
|
MODEL_PATH="$MODEL_LARGE"
|
|
;;
|
|
medium)
|
|
MODEL_PATH="$MODEL_MEDIUM"
|
|
;;
|
|
small)
|
|
MODEL_PATH="$MODEL_SMALL"
|
|
;;
|
|
*)
|
|
echo "Modelo no válido: $OPTARG. Usando el modelo por defecto (large)."
|
|
;;
|
|
esac
|
|
;;
|
|
l)
|
|
LANGUAGE="$OPTARG"
|
|
;;
|
|
f)
|
|
OUTPUT_FORMAT="$OPTARG"
|
|
;;
|
|
\?)
|
|
echo "Opción inválida: -$OPTARG" >&2
|
|
usage
|
|
;;
|
|
:)
|
|
echo "La opción -$OPTARG requiere un argumento." >&2
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Verificar si se proporcionó un archivo para transcribir
|
|
if [ -z "$FILE" ]; then
|
|
usage
|
|
fi
|
|
|
|
# Verificar si whisper-cli está disponible
|
|
if ! command -v "$WHISPER_CLI" &>/dev/null; then
|
|
echo "Error: whisper-cli no se encuentra en $WHISPER_CLI"
|
|
exit 1
|
|
fi
|
|
|
|
# Convertir archivo a WAV
|
|
WAV_FILE="$TEMP_DIR/$(basename "$FILE" .mp4).wav"
|
|
ffmpeg -i "$FILE" -acodec pcm_s16le -ac 1 -ar 16000 "$WAV_FILE" -y || {
|
|
echo "Error al convertir a WAV"
|
|
rm -rf "$TEMP_DIR"
|
|
exit 1
|
|
}
|
|
|
|
# Preparar nombres de salida
|
|
OUTPUT_BASE="$(basename "$FILE" .mp4)_${LANGUAGE}"
|
|
OUTPUT_DIR="$(pwd)" # Directorio actual
|
|
OUTPUT_FILE="$OUTPUT_DIR/$OUTPUT_BASE"
|
|
|
|
# Transcribir
|
|
case "$OUTPUT_FORMAT" in
|
|
txt)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -otxt -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
srt)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -osrt -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
json)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -oj -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
vtt)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -ovtt -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
lrc)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -olrc -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
csv)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -ocsv -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
*)
|
|
echo "Formato de salida no soportado: $OUTPUT_FORMAT"
|
|
rm -rf "$TEMP_DIR"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
# Limpiar archivos temporales
|
|
rm -rf "$TEMP_DIR"
|
|
|
|
echo "Transcripción completada. Archivos generados: ${OUTPUT_FILE}_timestamped.txt y ${OUTPUT_FILE}.${OUTPUT_FORMAT}."
|