|
|
|
@ -2,10 +2,10 @@
|
|
|
|
# Script that uses `whispercpp` to tranascribe audio and video files automatically and outputs the results in the same directory
|
|
|
|
# Script that uses `whispercpp` to tranascribe audio and video files automatically and outputs the results in the same directory
|
|
|
|
|
|
|
|
|
|
|
|
# Configuración
|
|
|
|
# Configuración
|
|
|
|
REPO_DIR=~/code/whisper.cpp
|
|
|
|
REPO_DIR="/Users/borjarobert/code/whisper.cpp"
|
|
|
|
WHISPER_CLI="$REPO_DIR/build/bin/whisper-cli"
|
|
|
|
WHISPER_CLI="$REPO_DIR/build/bin/whisper-cli"
|
|
|
|
TEMP_DIR=$(mktemp -d)
|
|
|
|
TEMP_DIR=$(mktemp -d)
|
|
|
|
LANGUAGE="en"
|
|
|
|
LANGUAGE="auto"
|
|
|
|
OUTPUT_FORMAT="txt"
|
|
|
|
OUTPUT_FORMAT="txt"
|
|
|
|
|
|
|
|
|
|
|
|
# Rutas de los modelos
|
|
|
|
# Rutas de los modelos
|
|
|
|
@ -78,6 +78,15 @@ if [ -z "$FILE" ]; then
|
|
|
|
usage
|
|
|
|
usage
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
EXTENSION="${FILE##*.}"
|
|
|
|
|
|
|
|
EXTENSION="${EXTENSION,,}"
|
|
|
|
|
|
|
|
VALID_EXTENSIONS="mp3 wav aac ogg flac m4a wma opus aiff alac ape wv amr ac3 dts mka mp4 mkv mov avi webm m4v wmv flv mpg mpeg vob ts mts m2ts 3gp ogv"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if [[ ! " $VALID_EXTENSIONS " =~ " $EXTENSION " ]]; then
|
|
|
|
|
|
|
|
echo "Error: formato no soportado (.${EXTENSION})"
|
|
|
|
|
|
|
|
exit 1
|
|
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# Verificar si whisper-cli está disponible
|
|
|
|
# Verificar si whisper-cli está disponible
|
|
|
|
if ! command -v "$WHISPER_CLI" &>/dev/null; then
|
|
|
|
if ! command -v "$WHISPER_CLI" &>/dev/null; then
|
|
|
|
echo "Error: whisper-cli no se encuentra en $WHISPER_CLI"
|
|
|
|
echo "Error: whisper-cli no se encuentra en $WHISPER_CLI"
|
|
|
|
@ -85,7 +94,10 @@ if ! command -v "$WHISPER_CLI" &>/dev/null; then
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# Convertir archivo a WAV
|
|
|
|
# Convertir archivo a WAV
|
|
|
|
WAV_FILE="$TEMP_DIR/$(basename "$FILE" .mp4).wav"
|
|
|
|
# WAV_FILE="$TEMP_DIR/$(basename "$FILE" .mp4).wav"
|
|
|
|
|
|
|
|
FILENAME="$(basename "$FILE")"
|
|
|
|
|
|
|
|
NAME="${FILENAME%.*}"
|
|
|
|
|
|
|
|
WAV_FILE="$TEMP_DIR/$NAME.wav"
|
|
|
|
ffmpeg -i "$FILE" -acodec pcm_s16le -ac 1 -ar 16000 "$WAV_FILE" -y || {
|
|
|
|
ffmpeg -i "$FILE" -acodec pcm_s16le -ac 1 -ar 16000 "$WAV_FILE" -y || {
|
|
|
|
echo "Error al convertir a WAV"
|
|
|
|
echo "Error al convertir a WAV"
|
|
|
|
rm -rf "$TEMP_DIR"
|
|
|
|
rm -rf "$TEMP_DIR"
|
|
|
|
@ -93,8 +105,10 @@ ffmpeg -i "$FILE" -acodec pcm_s16le -ac 1 -ar 16000 "$WAV_FILE" -y || {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Preparar nombres de salida
|
|
|
|
# Preparar nombres de salida
|
|
|
|
OUTPUT_BASE="$(basename "$FILE" .mp4)_${LANGUAGE}"
|
|
|
|
# OUTPUT_BASE="$(basename "$FILE" .mp4)_${LANGUAGE}"
|
|
|
|
OUTPUT_DIR="$(pwd)" # Directorio actual
|
|
|
|
# OUTPUT_DIR="$(pwd)" # Directorio actual
|
|
|
|
|
|
|
|
OUTPUT_BASE="${NAME}_${LANGUAGE}"
|
|
|
|
|
|
|
|
OUTPUT_DIR="$(dirname "$FILE")"
|
|
|
|
OUTPUT_FILE="$OUTPUT_DIR/$OUTPUT_BASE"
|
|
|
|
OUTPUT_FILE="$OUTPUT_DIR/$OUTPUT_BASE"
|
|
|
|
|
|
|
|
|
|
|
|
# Transcribir
|
|
|
|
# Transcribir
|
|
|
|
|