whi2html, marker para convertir pdfs a texto y mas o menos eso
parent
7a9dfa0332
commit
084a2967ef
@ -0,0 +1 @@
|
||||
/Users/borjarobert/.local/pipx/venvs/marker-pdf/bin/marker
|
@ -0,0 +1 @@
|
||||
/Users/borjarobert/.local/pipx/venvs/marker-pdf/bin/marker_chunk_convert
|
@ -0,0 +1 @@
|
||||
/Users/borjarobert/.local/pipx/venvs/marker-pdf/bin/marker_gui
|
@ -0,0 +1 @@
|
||||
/Users/borjarobert/.local/pipx/venvs/marker-pdf/bin/marker_server
|
@ -0,0 +1 @@
|
||||
/Users/borjarobert/.local/pipx/venvs/marker-pdf/bin/marker_single
|
@ -0,0 +1 @@
|
||||
/Users/borjarobert/.local/pipx/venvs/streamlit/bin/streamlit
|
@ -0,0 +1 @@
|
||||
/Users/borjarobert/.local/pipx/venvs/streamlit/bin/streamlit.cmd
|
@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
// Import the required modules
|
||||
import { readFile, writeFile } from 'fs/promises';
|
||||
import { argv } from 'process';
|
||||
|
||||
// Function to generate HTML from the input text
|
||||
async function generateHTML(inputFile) {
|
||||
try {
|
||||
// Read the input file
|
||||
const data = await readFile(inputFile, 'utf-8');
|
||||
|
||||
// Split the data into lines
|
||||
const lines = data.split('\n').filter(line => line.trim() !== '');
|
||||
|
||||
// Start building the HTML content
|
||||
let htmlContent = `
|
||||
<!DOCTYPE html>
|
||||
<html lang="es">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>${inputFile}</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
font-size: 12px;
|
||||
margin: 16px;
|
||||
}
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th, td {
|
||||
padding: 8px;
|
||||
text-align: left;
|
||||
}
|
||||
tr:nth-child(even) {
|
||||
background-color: #f2f2f2;
|
||||
}
|
||||
.timestamp {
|
||||
font-family: monospace;
|
||||
font-size: 0.7em;
|
||||
color: gray;
|
||||
width: 30ch;
|
||||
}
|
||||
.text {
|
||||
display: inline-block;
|
||||
/* width: calc(100% - 120px); */
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<table>
|
||||
<tbody>
|
||||
`;
|
||||
|
||||
// Process each line and add it to the HTML content
|
||||
lines.forEach(line => {
|
||||
// Split the line at the first closing bracket
|
||||
const parts = line.split(']');
|
||||
if (parts.length >= 2) {
|
||||
const timestamp = parts[0].replace('[', '').trim(); // Clean the timestamp
|
||||
const text = parts[1].trim(); // Clean the text
|
||||
htmlContent += `
|
||||
<tr>
|
||||
<td class="timestamp">${timestamp}</td>
|
||||
<td class="text">${text}</td>
|
||||
</tr>
|
||||
`;
|
||||
}
|
||||
});
|
||||
|
||||
// Close the HTML tags
|
||||
htmlContent += `
|
||||
</tbody>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
// Write the HTML content to a file
|
||||
const outputFile = inputFile.replace('.txt', '.html');
|
||||
await writeFile(outputFile, htmlContent);
|
||||
console.log(`HTML file created: ${outputFile}`);
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Get the input file from command line arguments
|
||||
const inputFile = argv[2];
|
||||
if (!inputFile) {
|
||||
console.error('Please provide a text file as an argument.');
|
||||
} else {
|
||||
generateHTML(inputFile);
|
||||
}
|
Loading…
Reference in New Issue