djuna commited on
Commit
1a81b28
·
verified ·
1 Parent(s): e662215

feat: fetch directly from hf

Browse files
Files changed (1) hide show
  1. index.html +83 -1
index.html CHANGED
@@ -276,7 +276,7 @@
276
 
277
  <div class="card">
278
  <h2>Configuration</h2>
279
- <div class="config-group">
280
  <div class="checkbox-wrapper">
281
  <input type="checkbox" id="addGenerationPrompt">
282
  <label for="addGenerationPrompt">Add Generation Prompt</label>
@@ -286,6 +286,10 @@
286
  <input type="text" id="eosToken" placeholder="EOS Token (e.g., </s>)" style="flex-grow: 1;">
287
  </div>
288
  </div>
 
 
 
 
289
 
290
  <h2>Template</h2>
291
  <textarea id="template" placeholder="Enter your template here..."></textarea>
@@ -433,6 +437,84 @@
433
 
434
  // Initialize
435
  updateMessageDisplay();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
  </script>
437
  </body>
438
  </html>
 
276
 
277
  <div class="card">
278
  <h2>Configuration</h2>
279
+ <div class="config-group">
280
  <div class="checkbox-wrapper">
281
  <input type="checkbox" id="addGenerationPrompt">
282
  <label for="addGenerationPrompt">Add Generation Prompt</label>
 
286
  <input type="text" id="eosToken" placeholder="EOS Token (e.g., </s>)" style="flex-grow: 1;">
287
  </div>
288
  </div>
289
+ <div class="input-group">
290
+ <input type="text" id="repoUrl" placeholder="Enter Hugging Face Repo URL" style="flex-grow: 1">
291
+ <button onclick="handleFetchConfig()">Fetch Config</button>
292
+ </div>
293
 
294
  <h2>Template</h2>
295
  <textarea id="template" placeholder="Enter your template here..."></textarea>
 
437
 
438
  // Initialize
439
  updateMessageDisplay();
440
+
441
+ // New Functions for fetching tokenizer config
442
+ function parseRepoUrl(url) {
443
+ const hfUrlRegex = /^(?:https?:\/\/)?(?:huggingface\.co|hf\.co)\/([^/]+)\/([^/]+)$/;
444
+ const shortRegex = /^([^/]+)\/([^/]+)$/;
445
+
446
+ let match = url.match(hfUrlRegex);
447
+ if (!match) {
448
+ match = url.match(shortRegex);
449
+ }
450
+
451
+ if (match) {
452
+ return { user: match[1], repo: match[2] };
453
+ }
454
+ return null;
455
+ }
456
+
457
+ async function fetchTokenizerConfig(user, repo) {
458
+ const apiUrl = `https://huggingface.co/${user}/${repo}/raw/main/tokenizer_config.json`;
459
+ try {
460
+ const response = await fetch(apiUrl);
461
+ if (!response.ok) {
462
+ throw new Error(`HTTP error! status: ${response.status}`);
463
+ }
464
+ return await response.json();
465
+ } catch (error) {
466
+ console.error("Error fetching tokenizer config:", error);
467
+ displayError(`Failed to fetch tokenizer config: ${error.message}`);
468
+ return null;
469
+ }
470
+ }
471
+
472
+ function displayError(message) {
473
+ console.error(message);
474
+ }
475
+
476
+ function populateConfigFields(config) {
477
+ const bosTokenInput = document.getElementById('bosToken');
478
+ const eosTokenInput = document.getElementById('eosToken');
479
+ const templateTextarea = document.getElementById('template');
480
+
481
+ bosTokenInput.value = config?.bos_token ?? "";
482
+ eosTokenInput.value = config?.eos_token ?? "";
483
+
484
+ let chatTemplate = config?.chat_template ?? "";
485
+
486
+ // Decode HTML entities
487
+ if (chatTemplate) {
488
+ const tempElement = document.createElement('div');
489
+ tempElement.innerHTML = chatTemplate;
490
+ chatTemplate = tempElement.textContent;
491
+ }
492
+
493
+ templateTextarea.value = chatTemplate;
494
+ }
495
+
496
+
497
+ async function handleFetchConfig() {
498
+ const repoUrl = document.getElementById('repoUrl').value;
499
+ const repoInfo = parseRepoUrl(repoUrl);
500
+
501
+ if (!repoInfo) {
502
+ displayError("Invalid Hugging Face repository URL format.");
503
+ return;
504
+ }
505
+
506
+ document.getElementById('repoUrl').parentElement.classList.add('loading');
507
+
508
+ const { user, repo } = repoInfo;
509
+ const config = await fetchTokenizerConfig(user, repo);
510
+
511
+ document.getElementById('repoUrl').parentElement.classList.remove('loading');
512
+
513
+ if (config) {
514
+ console.log("Tokenizer Config:", JSON.stringify(config, null, 2));
515
+ populateConfigFields(config);
516
+ }
517
+ }
518
  </script>
519
  </body>
520
  </html>