whi2html, marker para convertir pdfs a texto y mas o menos eso

main
borja 4 months ago
parent 7a9dfa0332
commit 084a2967ef

@ -9,11 +9,16 @@
processes=(
"/Applications/Stats.app/Contents/MacOS/Stats"
)
# # Function to check if a process is running
# check_process() {
# local process=$1
# pgrep -f "$process" >/dev/null
# }
# Function to check if a process is running
# Function returns true if it finds at least 2 processes running, false if 1 or 0
check_process() {
local process=$1
pgrep -f "$process" >/dev/null
pgrep -f "$process" | wc -l
}
# Check each process and restart Stats.app if necessary

@ -0,0 +1 @@
/Users/borjarobert/.local/pipx/venvs/marker-pdf/bin/marker

@ -0,0 +1 @@
/Users/borjarobert/.local/pipx/venvs/marker-pdf/bin/marker_chunk_convert

@ -0,0 +1 @@
/Users/borjarobert/.local/pipx/venvs/marker-pdf/bin/marker_gui

@ -0,0 +1 @@
/Users/borjarobert/.local/pipx/venvs/marker-pdf/bin/marker_server

@ -0,0 +1 @@
/Users/borjarobert/.local/pipx/venvs/marker-pdf/bin/marker_single

@ -0,0 +1 @@
/Users/borjarobert/.local/pipx/venvs/streamlit/bin/streamlit

@ -0,0 +1 @@
/Users/borjarobert/.local/pipx/venvs/streamlit/bin/streamlit.cmd

@ -0,0 +1,97 @@
#!/usr/bin/env bun
// Import the required modules
import { readFile, writeFile } from 'fs/promises';
import { argv } from 'process';
// Function to generate HTML from the input text
async function generateHTML(inputFile) {
try {
// Read the input file
const data = await readFile(inputFile, 'utf-8');
// Split the data into lines
const lines = data.split('\n').filter(line => line.trim() !== '');
// Start building the HTML content
let htmlContent = `
<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>${inputFile}</title>
<style>
body {
font-family: Arial, sans-serif;
font-size: 12px;
margin: 16px;
}
table {
width: 100%;
border-collapse: collapse;
}
th, td {
padding: 8px;
text-align: left;
}
tr:nth-child(even) {
background-color: #f2f2f2;
}
.timestamp {
font-family: monospace;
font-size: 0.7em;
color: gray;
width: 30ch;
}
.text {
display: inline-block;
/* width: calc(100% - 120px); */
}
</style>
</head>
<body>
<table>
<tbody>
`;
// Process each line and add it to the HTML content
lines.forEach(line => {
// Split the line at the first closing bracket
const parts = line.split(']');
if (parts.length >= 2) {
const timestamp = parts[0].replace('[', '').trim(); // Clean the timestamp
const text = parts[1].trim(); // Clean the text
htmlContent += `
<tr>
<td class="timestamp">${timestamp}</td>
<td class="text">${text}</td>
</tr>
`;
}
});
// Close the HTML tags
htmlContent += `
</tbody>
</table>
</body>
</html>
`;
// Write the HTML content to a file
const outputFile = inputFile.replace('.txt', '.html');
await writeFile(outputFile, htmlContent);
console.log(`HTML file created: ${outputFile}`);
} catch (error) {
console.error('Error:', error);
}
}
// Get the input file from command line arguments
const inputFile = argv[2];
if (!inputFile) {
console.error('Please provide a text file as an argument.');
} else {
generateHTML(inputFile);
}
Loading…
Cancel
Save