Spaces:
Build error
Build error
Update public/index.html
Browse files- public/index.html +118 -515
public/index.html
CHANGED
|
@@ -49,542 +49,145 @@
|
|
| 49 |
border-radius: 8px;
|
| 50 |
margin: 20px 0;
|
| 51 |
}
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
}
|
| 58 |
-
|
| 59 |
-
.option-item {
|
| 60 |
-
display: flex;
|
| 61 |
-
align-items: center;
|
| 62 |
-
gap: 10px;
|
| 63 |
-
}
|
| 64 |
-
|
| 65 |
-
.button-group {
|
| 66 |
-
display: flex;
|
| 67 |
-
gap: 10px;
|
| 68 |
-
margin: 15px 0;
|
| 69 |
-
}
|
| 70 |
-
|
| 71 |
-
button {
|
| 72 |
-
background: var(--primary-color);
|
| 73 |
-
color: white;
|
| 74 |
-
padding: 8px 16px;
|
| 75 |
-
border: none;
|
| 76 |
-
border-radius: 4px;
|
| 77 |
-
cursor: pointer;
|
| 78 |
-
transition: background 0.2s;
|
| 79 |
-
}
|
| 80 |
-
|
| 81 |
-
button:hover {
|
| 82 |
-
background: #0056b3;
|
| 83 |
-
}
|
| 84 |
-
|
| 85 |
-
.results-container {
|
| 86 |
-
margin-top: 30px;
|
| 87 |
-
}
|
| 88 |
-
|
| 89 |
-
.results-tabs {
|
| 90 |
-
display: flex;
|
| 91 |
-
gap: 10px;
|
| 92 |
-
margin-bottom: 15px;
|
| 93 |
-
}
|
| 94 |
-
|
| 95 |
-
.tab {
|
| 96 |
-
padding: 8px 16px;
|
| 97 |
-
cursor: pointer;
|
| 98 |
-
border: 1px solid var(--border-color);
|
| 99 |
-
border-radius: 4px;
|
| 100 |
-
transition: all 0.2s;
|
| 101 |
-
}
|
| 102 |
-
|
| 103 |
-
.tab.active {
|
| 104 |
-
background: var(--primary-color);
|
| 105 |
-
color: white;
|
| 106 |
-
}
|
| 107 |
-
|
| 108 |
-
.result-panel {
|
| 109 |
-
border: 1px solid var(--border-color);
|
| 110 |
-
border-radius: 4px;
|
| 111 |
-
overflow: hidden;
|
| 112 |
-
}
|
| 113 |
-
|
| 114 |
-
.result-header {
|
| 115 |
-
display: flex;
|
| 116 |
-
justify-content: space-between;
|
| 117 |
-
align-items: center;
|
| 118 |
-
padding: 10px;
|
| 119 |
-
background: var(--background-color);
|
| 120 |
-
border-bottom: 1px solid var(--border-color);
|
| 121 |
-
}
|
| 122 |
-
|
| 123 |
-
.result-content {
|
| 124 |
-
padding: 15px;
|
| 125 |
-
overflow: auto;
|
| 126 |
-
max-height: 500px;
|
| 127 |
-
}
|
| 128 |
-
|
| 129 |
-
.stats-grid {
|
| 130 |
-
display: grid;
|
| 131 |
-
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
| 132 |
-
gap: 15px;
|
| 133 |
-
margin: 20px 0;
|
| 134 |
-
}
|
| 135 |
-
|
| 136 |
-
.stat-item {
|
| 137 |
-
background: white;
|
| 138 |
-
padding: 15px;
|
| 139 |
-
border-radius: 4px;
|
| 140 |
-
border: 1px solid var(--border-color);
|
| 141 |
-
}
|
| 142 |
-
|
| 143 |
-
.stat-value {
|
| 144 |
-
font-size: 1.2em;
|
| 145 |
-
font-weight: bold;
|
| 146 |
-
color: var(--primary-color);
|
| 147 |
-
}
|
| 148 |
-
|
| 149 |
-
.copy-feedback {
|
| 150 |
-
position: fixed;
|
| 151 |
-
bottom: 20px;
|
| 152 |
-
right: 20px;
|
| 153 |
-
background: var(--success-color);
|
| 154 |
-
color: white;
|
| 155 |
-
padding: 10px 20px;
|
| 156 |
-
border-radius: 4px;
|
| 157 |
-
display: none;
|
| 158 |
-
}
|
| 159 |
-
|
| 160 |
-
.operation-status {
|
| 161 |
-
margin: 20px 0;
|
| 162 |
-
padding: 15px;
|
| 163 |
-
border: 1px solid var(--border-color);
|
| 164 |
-
border-radius: 4px;
|
| 165 |
-
}
|
| 166 |
-
|
| 167 |
-
.status-grid {
|
| 168 |
-
display: grid;
|
| 169 |
-
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
| 170 |
-
gap: 10px;
|
| 171 |
-
margin-top: 10px;
|
| 172 |
-
}
|
| 173 |
-
|
| 174 |
-
.status-item {
|
| 175 |
-
display: flex;
|
| 176 |
-
align-items: center;
|
| 177 |
-
gap: 8px;
|
| 178 |
-
padding: 8px;
|
| 179 |
-
border-radius: 4px;
|
| 180 |
-
background: var(--background-color);
|
| 181 |
}
|
| 182 |
|
| 183 |
-
.
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
align-items: center;
|
| 189 |
-
justify-content: center;
|
| 190 |
-
color: white;
|
| 191 |
-
font-size: 12px;
|
| 192 |
}
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
}
|
| 197 |
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
| 200 |
}
|
| 201 |
|
| 202 |
-
.
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
|
|
|
|
|
|
| 206 |
}
|
| 207 |
-
|
| 208 |
-
pre {
|
| 209 |
-
margin: 0;
|
| 210 |
-
border-radius: 4px;
|
| 211 |
-
}
|
| 212 |
-
|
| 213 |
-
code {
|
| 214 |
-
font-family: 'Monaco', 'Menlo', monospace;
|
| 215 |
-
font-size: 14px;
|
| 216 |
-
}
|
| 217 |
-
/* Add to the existing style section */
|
| 218 |
-
.extraction-container {
|
| 219 |
-
margin: 20px 0;
|
| 220 |
-
padding: 20px;
|
| 221 |
-
background: var(--background-color);
|
| 222 |
-
border-radius: 8px;
|
| 223 |
-
}
|
| 224 |
-
|
| 225 |
-
.extraction-container textarea {
|
| 226 |
-
height: 100px;
|
| 227 |
-
margin-bottom: 10px;
|
| 228 |
-
}
|
| 229 |
-
|
| 230 |
-
#executeExtraction:disabled {
|
| 231 |
-
background: var(--secondary-color);
|
| 232 |
-
cursor: not-allowed;
|
| 233 |
-
}
|
| 234 |
-
</style>
|
| 235 |
</head>
|
| 236 |
<body>
|
| 237 |
<div class="container">
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
</div>
|
| 255 |
-
<div class="option-item">
|
| 256 |
-
<input type="checkbox" id="removeScripts" name="removeScripts" checked>
|
| 257 |
-
<label for="removeScripts">Remove scripts</label>
|
| 258 |
-
</div>
|
| 259 |
-
<div class="option-item">
|
| 260 |
-
<input type="checkbox" id="removeStyles" name="removeStyles" checked>
|
| 261 |
-
<label for="removeStyles">Remove styles</label>
|
| 262 |
-
</div>
|
| 263 |
-
<div class="option-item">
|
| 264 |
-
<input type="checkbox" id="handleRepeatingElements" name="handleRepeatingElements" checked>
|
| 265 |
-
<label for="handleRepeatingElements">Handle repeating elements</label>
|
| 266 |
-
</div>
|
| 267 |
-
<div class="option-item">
|
| 268 |
-
<input type="checkbox" id="truncateText" name="truncateText" checked>
|
| 269 |
-
<label for="truncateText">Truncate text</label>
|
| 270 |
-
</div>
|
| 271 |
-
<div class="option-item">
|
| 272 |
-
<label for="truncateLength">Max text length:</label>
|
| 273 |
-
<input type="number" id="truncateLength" name="truncateLength" value="100" min="10" max="1000">
|
| 274 |
-
</div>
|
| 275 |
-
<div class="option-item">
|
| 276 |
-
<input type="checkbox" id="minifyHtml" name="minifyHtml" checked>
|
| 277 |
-
<label for="minifyHtml">Minify HTML</label>
|
| 278 |
-
</div>
|
| 279 |
-
<div class="option-item">
|
| 280 |
-
<input type="checkbox" id="removeMedia" name="removeMedia" checked>
|
| 281 |
-
<label for="removeMedia">Remove media</label>
|
| 282 |
-
</div>
|
| 283 |
-
</div>
|
| 284 |
-
</div>
|
| 285 |
-
|
| 286 |
-
<div class="extraction-container">
|
| 287 |
-
<h3>Data Extraction</h3>
|
| 288 |
-
<textarea
|
| 289 |
-
id="extractionQuery"
|
| 290 |
-
placeholder="Enter your extraction query (e.g., 'extract product title and price')"
|
| 291 |
-
></textarea>
|
| 292 |
-
<div class="button-group">
|
| 293 |
-
<button type="button" id="generateScript">Generate Extraction Script</button>
|
| 294 |
-
<button type="button" id="executeExtraction" disabled>Execute Extraction</button>
|
| 295 |
-
</div>
|
| 296 |
-
</div>
|
| 297 |
-
|
| 298 |
-
<div class="button-group">
|
| 299 |
-
<input type="file" accept=".html,.htm" id="fileInput">
|
| 300 |
-
<button type="submit">Process HTML</button>
|
| 301 |
-
</div>
|
| 302 |
-
</form>
|
| 303 |
-
|
| 304 |
-
<div id="operationStatus" class="operation-status" style="display: none;">
|
| 305 |
-
<h3>Operation Status</h3>
|
| 306 |
-
<div class="status-grid"></div>
|
| 307 |
-
</div>
|
| 308 |
-
|
| 309 |
-
<div id="stats" class="stats-grid" style="display: none;"></div>
|
| 310 |
-
|
| 311 |
-
<div class="results-container" style="display: none;">
|
| 312 |
-
<div class="results-tabs">
|
| 313 |
-
<div class="tab active" data-view="html">Compressed HTML</div>
|
| 314 |
-
<div class="tab" data-view="json">JSON Structure</div>
|
| 315 |
-
<div class="tab" data-view="extraction">Extraction Results</div>
|
| 316 |
-
</div>
|
| 317 |
-
|
| 318 |
-
<div class="result-panel" id="htmlView">
|
| 319 |
-
<div class="result-header">
|
| 320 |
-
<h3>HTML Output</h3>
|
| 321 |
<div class="button-group">
|
| 322 |
-
|
| 323 |
-
|
| 324 |
</div>
|
| 325 |
-
</div>
|
| 326 |
-
<div class="result-content">
|
| 327 |
-
<pre><code class="language-html" id="htmlOutput"></code></pre>
|
| 328 |
-
</div>
|
| 329 |
</div>
|
| 330 |
-
|
| 331 |
-
<div class="
|
| 332 |
-
|
| 333 |
-
<
|
| 334 |
-
<div class="button-group">
|
| 335 |
-
<button onclick="copyResult('json')">Copy</button>
|
| 336 |
-
<button onclick="downloadResult('json')">Download</button>
|
| 337 |
-
</div>
|
| 338 |
-
</div>
|
| 339 |
-
<div class="result-content">
|
| 340 |
-
<pre><code class="language-json" id="jsonOutput"></code></pre>
|
| 341 |
-
</div>
|
| 342 |
</div>
|
| 343 |
-
|
| 344 |
-
<div class="
|
| 345 |
-
|
| 346 |
-
<h3>Extraction Results</h3>
|
| 347 |
-
<div class="button-group">
|
| 348 |
-
<button onclick="copyResult('extraction')">Copy</button>
|
| 349 |
-
<button onclick="downloadResult('extraction')">Download</button>
|
| 350 |
-
</div>
|
| 351 |
-
</div>
|
| 352 |
-
<div class="result-content">
|
| 353 |
<pre><code class="language-json" id="extractionOutput"></code></pre>
|
| 354 |
-
</div>
|
| 355 |
</div>
|
| 356 |
-
</div>
|
| 357 |
-
|
| 358 |
-
<div class="copy-feedback">Copied to clipboard!</div>
|
| 359 |
</div>
|
| 360 |
-
|
| 361 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/prism.min.js"></script>
|
| 362 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-markup.min.js"></script>
|
| 363 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-json.min.js"></script>
|
| 364 |
<script>
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
});
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
'x-api-key': API_KEY,
|
| 418 |
-
'Content-Type': 'application/json',
|
| 419 |
-
},
|
| 420 |
-
body: JSON.stringify({
|
| 421 |
-
html: htmlContent,
|
| 422 |
-
user_input: userInput
|
| 423 |
-
})
|
| 424 |
-
});
|
| 425 |
-
|
| 426 |
-
const data = await response.json();
|
| 427 |
-
|
| 428 |
-
if (data.status === 'success') {
|
| 429 |
-
currentCheerioScript = data.cheerio_script;
|
| 430 |
-
document.getElementById('executeExtraction').disabled = false;
|
| 431 |
-
|
| 432 |
-
// Show the script in the JSON view
|
| 433 |
-
document.getElementById('jsonOutput').textContent = JSON.stringify({
|
| 434 |
-
cheerio_script: currentCheerioScript
|
| 435 |
-
}, null, 2);
|
| 436 |
-
Prism.highlightAll();
|
| 437 |
-
} else {
|
| 438 |
-
alert('Failed to generate extraction script');
|
| 439 |
-
}
|
| 440 |
-
} catch (error) {
|
| 441 |
-
alert('Error generating script: ' + error.message);
|
| 442 |
-
}
|
| 443 |
-
}
|
| 444 |
-
|
| 445 |
-
// Execute extraction
|
| 446 |
-
async function executeExtraction() {
|
| 447 |
-
if (!currentCheerioScript) {
|
| 448 |
-
alert('Please generate an extraction script first');
|
| 449 |
-
return;
|
| 450 |
-
}
|
| 451 |
-
|
| 452 |
-
const htmlContent = document.getElementById('htmlOutput').textContent;
|
| 453 |
-
|
| 454 |
-
try {
|
| 455 |
-
const response = await fetch('/extract', {
|
| 456 |
-
method: 'POST',
|
| 457 |
-
headers: {
|
| 458 |
-
'Content-Type': 'application/json'
|
| 459 |
-
},
|
| 460 |
-
body: JSON.stringify({
|
| 461 |
-
html: htmlContent,
|
| 462 |
-
script: currentCheerioScript
|
| 463 |
-
})
|
| 464 |
-
});
|
| 465 |
-
|
| 466 |
-
const data = await response.json();
|
| 467 |
-
|
| 468 |
-
document.getElementById('extractionOutput').textContent =
|
| 469 |
-
JSON.stringify(data, null, 2);
|
| 470 |
-
Prism.highlightAll();
|
| 471 |
-
|
| 472 |
-
// Switch to extraction view
|
| 473 |
-
document.querySelector('[data-view="extraction"]').click();
|
| 474 |
-
} catch (error) {
|
| 475 |
-
alert('Error executing extraction: ' + error.message);
|
| 476 |
-
}
|
| 477 |
-
}
|
| 478 |
-
|
| 479 |
-
// Form submission
|
| 480 |
-
form.addEventListener('submit', async (e) => {
|
| 481 |
-
e.preventDefault();
|
| 482 |
-
|
| 483 |
-
const formData = new FormData(form);
|
| 484 |
-
|
| 485 |
-
// Add checkbox states
|
| 486 |
-
document.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
|
| 487 |
-
formData.set(checkbox.name, checkbox.checked);
|
| 488 |
});
|
| 489 |
-
|
| 490 |
-
try {
|
| 491 |
-
const response = await fetch('/process', {
|
| 492 |
-
method: 'POST',
|
| 493 |
-
body: formData,
|
| 494 |
-
});
|
| 495 |
-
|
| 496 |
-
const data = await response.json();
|
| 497 |
-
|
| 498 |
-
if (data.error) {
|
| 499 |
-
alert(data.error);
|
| 500 |
-
return;
|
| 501 |
-
}
|
| 502 |
-
|
| 503 |
-
// Display operation status
|
| 504 |
-
const statusContainer = document.querySelector('#operationStatus');
|
| 505 |
-
const statusGrid = statusContainer.querySelector('.status-grid');
|
| 506 |
-
statusContainer.style.display = 'block';
|
| 507 |
-
|
| 508 |
-
statusGrid.innerHTML = Object.entries(data.operationStatus)
|
| 509 |
-
.map(([operation, status]) => `
|
| 510 |
-
<div class="status-item">
|
| 511 |
-
<div class="status-icon ${status.success ? 'status-success' : 'status-error'}">
|
| 512 |
-
${status.success ? '✓' : '✗'}
|
| 513 |
-
</div>
|
| 514 |
-
<div>
|
| 515 |
-
<div>${formatLabel(operation)}</div>
|
| 516 |
-
${status.error ? `<div class="status-message">Error: ${status.error}</div>` : ''}
|
| 517 |
-
</div>
|
| 518 |
-
</div>
|
| 519 |
-
`).join('');
|
| 520 |
-
|
| 521 |
-
// Display stats
|
| 522 |
-
statsContainer.style.display = 'grid';
|
| 523 |
-
statsContainer.innerHTML = Object.entries(data.stats)
|
| 524 |
-
.map(([key, value]) => `
|
| 525 |
-
<div class="stat-item">
|
| 526 |
-
<div class="stat-label">${formatLabel(key)}</div>
|
| 527 |
-
<div class="stat-value">${value}</div>
|
| 528 |
-
</div>
|
| 529 |
-
`).join('');
|
| 530 |
-
|
| 531 |
-
// Show results container
|
| 532 |
-
resultsContainer.style.display = 'block';
|
| 533 |
-
|
| 534 |
-
// Update outputs with syntax highlighting
|
| 535 |
-
document.getElementById('htmlOutput').textContent = data.result.html;
|
| 536 |
-
document.getElementById('jsonOutput').textContent = data.result.json;
|
| 537 |
-
|
| 538 |
-
// Trigger Prism highlighting
|
| 539 |
-
Prism.highlightAll();
|
| 540 |
-
} catch (err) {
|
| 541 |
-
alert('Error processing HTML: ' + err.message);
|
| 542 |
-
}
|
| 543 |
-
});
|
| 544 |
-
|
| 545 |
-
// Event listeners for extraction
|
| 546 |
-
document.getElementById('generateScript').addEventListener('click', generateCheerioScript);
|
| 547 |
-
document.getElementById('executeExtraction').addEventListener('click', executeExtraction);
|
| 548 |
-
|
| 549 |
-
// Utility functions
|
| 550 |
-
function formatLabel(key) {
|
| 551 |
-
return key
|
| 552 |
-
.replace(/([A-Z])/g, ' $1')
|
| 553 |
-
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
| 554 |
-
.toLowerCase()
|
| 555 |
-
.replace(/^./, str => str.toUpperCase())
|
| 556 |
-
.replace('Html', 'HTML');
|
| 557 |
-
}
|
| 558 |
-
|
| 559 |
-
async function copyResult(type) {
|
| 560 |
-
const content = document.getElementById(`${type}Output`).textContent;
|
| 561 |
-
try {
|
| 562 |
-
await navigator.clipboard.writeText(content);
|
| 563 |
-
showCopyFeedback();
|
| 564 |
-
} catch (err) {
|
| 565 |
-
alert('Failed to copy to clipboard');
|
| 566 |
-
}
|
| 567 |
-
}
|
| 568 |
-
|
| 569 |
-
function downloadResult(type) {
|
| 570 |
-
const content = document.getElementById(`${type}Output`).textContent;
|
| 571 |
-
const blob = new Blob([content], { type: 'text/plain' });
|
| 572 |
-
const url = URL.createObjectURL(blob);
|
| 573 |
-
const a = document.createElement('a');
|
| 574 |
-
a.href = url;
|
| 575 |
-
a.download = `compressed.${type}`;
|
| 576 |
-
document.body.appendChild(a);
|
| 577 |
-
a.click();
|
| 578 |
-
document.body.removeChild(a);
|
| 579 |
-
URL.revokeObjectURL(url);
|
| 580 |
-
}
|
| 581 |
-
|
| 582 |
-
function showCopyFeedback() {
|
| 583 |
-
copyFeedback.style.display = 'block';
|
| 584 |
-
setTimeout(() => {
|
| 585 |
-
copyFeedback.style.display = 'none';
|
| 586 |
-
}, 2000);
|
| 587 |
-
}
|
| 588 |
</script>
|
| 589 |
-
|
| 590 |
</html>
|
|
|
|
| 49 |
border-radius: 8px;
|
| 50 |
margin: 20px 0;
|
| 51 |
}
|
| 52 |
+
.input-section {
|
| 53 |
+
margin-bottom: 30px;
|
| 54 |
+
padding: 20px;
|
| 55 |
+
border: 1px solid var(--border-color);
|
| 56 |
+
border-radius: 8px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
}
|
| 58 |
|
| 59 |
+
.script-section {
|
| 60 |
+
margin: 20px 0;
|
| 61 |
+
padding: 20px;
|
| 62 |
+
background: var(--background-color);
|
| 63 |
+
border-radius: 8px;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
}
|
| 65 |
|
| 66 |
+
#userInput {
|
| 67 |
+
width: 100%;
|
| 68 |
+
padding: 12px;
|
| 69 |
+
margin-bottom: 15px;
|
| 70 |
+
border: 1px solid var(--border-color);
|
| 71 |
+
border-radius: 4px;
|
| 72 |
}
|
| 73 |
|
| 74 |
+
#scriptOutput {
|
| 75 |
+
background: white;
|
| 76 |
+
padding: 15px;
|
| 77 |
+
border-radius: 4px;
|
| 78 |
+
margin-top: 15px;
|
| 79 |
}
|
| 80 |
|
| 81 |
+
.extraction-results {
|
| 82 |
+
margin-top: 20px;
|
| 83 |
+
padding: 15px;
|
| 84 |
+
background: white;
|
| 85 |
+
border-radius: 4px;
|
| 86 |
+
border: 1px solid var(--border-color);
|
| 87 |
}
|
| 88 |
+
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
</head>
|
| 90 |
<body>
|
| 91 |
<div class="container">
|
| 92 |
+
<h1>HTML Data Extractor</h1>
|
| 93 |
+
|
| 94 |
+
<div class="input-section">
|
| 95 |
+
<h2>Input HTML</h2>
|
| 96 |
+
<textarea
|
| 97 |
+
id="htmlInput"
|
| 98 |
+
placeholder="Paste your HTML here..."
|
| 99 |
+
></textarea>
|
| 100 |
+
|
| 101 |
+
<h3>Extraction Query</h3>
|
| 102 |
+
<input
|
| 103 |
+
type="text"
|
| 104 |
+
id="userInput"
|
| 105 |
+
placeholder="Describe what data you want to extract..."
|
| 106 |
+
>
|
| 107 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
<div class="button-group">
|
| 109 |
+
<button id="generateScript">Generate Script</button>
|
| 110 |
+
<button id="extractData" disabled>Extract Data</button>
|
| 111 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
</div>
|
| 113 |
+
|
| 114 |
+
<div class="script-section" style="display: none;">
|
| 115 |
+
<h2>Generated Cheerio Script</h2>
|
| 116 |
+
<pre><code class="language-javascript" id="scriptOutput"></code></pre>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
</div>
|
| 118 |
+
|
| 119 |
+
<div class="extraction-results" style="display: none;">
|
| 120 |
+
<h2>Extracted Data</h2>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
<pre><code class="language-json" id="extractionOutput"></code></pre>
|
|
|
|
| 122 |
</div>
|
|
|
|
|
|
|
|
|
|
| 123 |
</div>
|
| 124 |
+
|
|
|
|
|
|
|
|
|
|
| 125 |
<script>
|
| 126 |
+
const generateScriptBtn = document.getElementById('generateScript');
|
| 127 |
+
const extractDataBtn = document.getElementById('extractData');
|
| 128 |
+
const htmlInput = document.getElementById('htmlInput');
|
| 129 |
+
const userInput = document.getElementById('userInput');
|
| 130 |
+
const scriptSection = document.querySelector('.script-section');
|
| 131 |
+
const extractionResults = document.querySelector('.extraction-results');
|
| 132 |
+
|
| 133 |
+
let currentScript = '';
|
| 134 |
+
|
| 135 |
+
generateScriptBtn.addEventListener('click', async () => {
|
| 136 |
+
try {
|
| 137 |
+
const response = await fetch('https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script', {
|
| 138 |
+
method: 'POST',
|
| 139 |
+
headers: {
|
| 140 |
+
'accept': 'application/json',
|
| 141 |
+
'x-api-key': 'ae54a922-ed3a-4634-be4a-4e4dd470800a',
|
| 142 |
+
'Content-Type': 'application/json',
|
| 143 |
+
},
|
| 144 |
+
body: JSON.stringify({
|
| 145 |
+
html: htmlInput.value,
|
| 146 |
+
user_input: userInput.value
|
| 147 |
+
})
|
| 148 |
+
});
|
| 149 |
+
|
| 150 |
+
const data = await response.json();
|
| 151 |
+
|
| 152 |
+
if (data.status === 'success') {
|
| 153 |
+
currentScript = data.cheerio_script;
|
| 154 |
+
document.getElementById('scriptOutput').textContent = currentScript;
|
| 155 |
+
scriptSection.style.display = 'block';
|
| 156 |
+
extractDataBtn.disabled = false;
|
| 157 |
+
Prism.highlightAll();
|
| 158 |
+
}
|
| 159 |
+
} catch (error) {
|
| 160 |
+
alert('Error generating script: ' + error.message);
|
| 161 |
+
}
|
| 162 |
});
|
| 163 |
+
|
| 164 |
+
extractDataBtn.addEventListener('click', async () => {
|
| 165 |
+
try {
|
| 166 |
+
const response = await fetch('/extract', {
|
| 167 |
+
method: 'POST',
|
| 168 |
+
headers: {
|
| 169 |
+
'Content-Type': 'application/json'
|
| 170 |
+
},
|
| 171 |
+
body: JSON.stringify({
|
| 172 |
+
html: htmlInput.value,
|
| 173 |
+
script: currentScript
|
| 174 |
+
})
|
| 175 |
+
});
|
| 176 |
+
|
| 177 |
+
const data = await response.json();
|
| 178 |
+
|
| 179 |
+
if (data.success) {
|
| 180 |
+
document.getElementById('extractionOutput').textContent =
|
| 181 |
+
JSON.stringify(data.data, null, 2);
|
| 182 |
+
extractionResults.style.display = 'block';
|
| 183 |
+
Prism.highlightAll();
|
| 184 |
+
} else {
|
| 185 |
+
alert('Extraction failed: ' + data.error);
|
| 186 |
+
}
|
| 187 |
+
} catch (error) {
|
| 188 |
+
alert('Error during extraction: ' + error.message);
|
| 189 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
</script>
|
| 192 |
+
</body>
|
| 193 |
</html>
|