You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
145 lines
3.9 KiB
Bash
145 lines
3.9 KiB
Bash
#!/bin/bash
|
|
# Script that uses `whispercpp` to tranascribe audio and video files automatically and outputs the results in the same directory
|
|
|
|
# Configuración
|
|
REPO_DIR="/Users/borjarobert/code/whisper.cpp"
|
|
WHISPER_CLI="$REPO_DIR/build/bin/whisper-cli"
|
|
TEMP_DIR=$(mktemp -d)
|
|
LANGUAGE="auto"
|
|
OUTPUT_FORMAT="txt"
|
|
|
|
# Rutas de los modelos
|
|
MODEL_LARGE="/Users/borjarobert/code/whisper.cpp/models/ggml-large-v3-turbo.bin"
|
|
MODEL_MEDIUM="/Users/borjarobert/code/whisper.cpp/models/ggml-medium.bin"
|
|
MODEL_SMALL="/Users/borjarobert/code/whisper.cpp/models/ggml-base.bin"
|
|
MODEL_PATH="$MODEL_LARGE" # Valor por defecto
|
|
|
|
# Función para mostrar uso
|
|
usage() {
|
|
echo "Uso: whisper [-u] [-t archivo] [-m modelo] [-l idioma] [-f formato]"
|
|
echo "Opciones:"
|
|
echo " -u Actualiza y recompila whispercpp"
|
|
echo " -t archivo Transcribe el archivo de video"
|
|
echo " -m modelo Especifica el modelo (large, medium, small)"
|
|
echo " -l idioma Especifica el idioma (por defecto: en)"
|
|
echo " -f formato Especifica el formato de salida (txt, srt, json, vtt, lrc, csv)"
|
|
exit 1
|
|
}
|
|
|
|
# Manejo de argumentos
|
|
while getopts ":ut:m:l:f:" opt; do
|
|
case $opt in
|
|
u)
|
|
echo "Actualizando whispercpp..."
|
|
cd "$REPO_DIR" || exit
|
|
git pull
|
|
make
|
|
echo "Actualización completa."
|
|
exit 0
|
|
;;
|
|
t)
|
|
FILE="$OPTARG"
|
|
;;
|
|
m)
|
|
case $OPTARG in
|
|
large)
|
|
MODEL_PATH="$MODEL_LARGE"
|
|
;;
|
|
medium)
|
|
MODEL_PATH="$MODEL_MEDIUM"
|
|
;;
|
|
small)
|
|
MODEL_PATH="$MODEL_SMALL"
|
|
;;
|
|
*)
|
|
echo "Modelo no válido: $OPTARG. Usando el modelo por defecto (large)."
|
|
;;
|
|
esac
|
|
;;
|
|
l)
|
|
LANGUAGE="$OPTARG"
|
|
;;
|
|
f)
|
|
OUTPUT_FORMAT="$OPTARG"
|
|
;;
|
|
\?)
|
|
echo "Opción inválida: -$OPTARG" >&2
|
|
usage
|
|
;;
|
|
:)
|
|
echo "La opción -$OPTARG requiere un argumento." >&2
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Verificar si se proporcionó un archivo para transcribir
|
|
if [ -z "$FILE" ]; then
|
|
usage
|
|
fi
|
|
|
|
EXTENSION="${FILE##*.}"
|
|
EXTENSION=$(echo "${FILE##*.}" | tr '[:upper:]' '[:lower:]')
|
|
VALID_EXTENSIONS="mp3 wav aac ogg flac m4a wma opus aiff alac ape wv amr ac3 dts mka mp4 mkv mov avi webm m4v wmv flv mpg mpeg vob ts mts m2ts 3gp ogv"
|
|
|
|
if [[ ! " $VALID_EXTENSIONS " =~ " $EXTENSION " ]]; then
|
|
echo "Error: formato no soportado (.${EXTENSION})"
|
|
exit 1
|
|
fi
|
|
|
|
# Verificar si whisper-cli está disponible
|
|
if ! command -v "$WHISPER_CLI" &>/dev/null; then
|
|
echo "Error: whisper-cli no se encuentra en $WHISPER_CLI"
|
|
exit 1
|
|
fi
|
|
|
|
# Convertir archivo a WAV
|
|
# WAV_FILE="$TEMP_DIR/$(basename "$FILE" .mp4).wav"
|
|
FILENAME="$(basename "$FILE")"
|
|
NAME="${FILENAME%.*}"
|
|
WAV_FILE="$TEMP_DIR/$NAME.wav"
|
|
ffmpeg -i "$FILE" -acodec pcm_s16le -ac 1 -ar 16000 "$WAV_FILE" -y || {
|
|
echo "Error al convertir a WAV"
|
|
rm -rf "$TEMP_DIR"
|
|
exit 1
|
|
}
|
|
|
|
# Preparar nombres de salida
|
|
# OUTPUT_BASE="$(basename "$FILE" .mp4)_${LANGUAGE}"
|
|
# OUTPUT_DIR="$(pwd)" # Directorio actual
|
|
OUTPUT_BASE="${NAME}_${LANGUAGE}"
|
|
OUTPUT_DIR="$(dirname "$FILE")"
|
|
OUTPUT_FILE="$OUTPUT_DIR/$OUTPUT_BASE"
|
|
|
|
# Transcribir
|
|
case "$OUTPUT_FORMAT" in
|
|
txt)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -otxt -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
srt)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -osrt -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
json)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -oj -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
vtt)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -ovtt -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
lrc)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -olrc -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
csv)
|
|
$WHISPER_CLI -m "$MODEL_PATH" -l "$LANGUAGE" -ocsv -of "$OUTPUT_FILE" "$WAV_FILE" | tee "$OUTPUT_FILE"_timestamped.txt
|
|
;;
|
|
*)
|
|
echo "Formato de salida no soportado: $OUTPUT_FORMAT"
|
|
rm -rf "$TEMP_DIR"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
# Limpiar archivos temporales
|
|
rm -rf "$TEMP_DIR"
|
|
|
|
echo "Transcripción completada. Archivos generados: ${OUTPUT_FILE}_timestamped.txt y ${OUTPUT_FILE}.${OUTPUT_FORMAT}."
|