mrfakename commited on
Commit
9581133
Β·
verified Β·
1 Parent(s): 8ec5779

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +5 -7
  2. app.py +129 -0
  3. requirements.txt +4 -0
  4. templates/index.html +385 -0
  5. templates/no_repo.html +13 -0
README.md CHANGED
@@ -1,12 +1,10 @@
1
  ---
2
- title: Hf Search
3
- emoji: πŸ†
4
- colorFrom: blue
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.13.2
8
  app_file: app.py
9
  pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: HF Search
3
+ emoji: πŸ”
4
+ colorFrom: gray
5
+ colorTo: gray
6
  sdk: gradio
7
  sdk_version: 5.13.2
8
  app_file: app.py
9
  pinned: false
10
+ ---
 
 
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from html import escape
2
+ import os
3
+ import shutil
4
+ import time
5
+ from datetime import datetime, timedelta
6
+ import bleach
7
+ import requests
8
+ from flask import Flask, request, jsonify, render_template
9
+ from whoosh.index import create_in, open_dir
10
+ from whoosh.fields import Schema, TEXT, ID, BOOLEAN
11
+ from whoosh.qparser import QueryParser
12
+
13
+ app = Flask(__name__)
14
+
15
+ # Configure index directory and schema
16
+ BASE_INDEX_DIR = "discussion_indices"
17
+ CACHE_DURATION = timedelta(hours=24)
18
+
19
+ schema = Schema(
20
+ discussion_id=ID(stored=True),
21
+ title=TEXT(stored=True),
22
+ content=TEXT(stored=True),
23
+ author=TEXT(stored=True),
24
+ is_pr=BOOLEAN(stored=True),
25
+ is_open=BOOLEAN(stored=True)
26
+ )
27
+
28
+ def get_repo_index_dir(repo_name):
29
+ # Convert repo name to safe directory name
30
+ safe_name = repo_name.replace('/', '_')
31
+ return os.path.join(BASE_INDEX_DIR, safe_name)
32
+
33
+ def get_repo_last_indexed_file(repo_name):
34
+ return os.path.join(get_repo_index_dir(repo_name), 'last_indexed.txt')
35
+
36
+ def needs_reindex(repo_name):
37
+ last_indexed_file = get_repo_last_indexed_file(repo_name)
38
+ if not os.path.exists(last_indexed_file):
39
+ return True
40
+
41
+ with open(last_indexed_file, 'r') as f:
42
+ last_indexed = datetime.fromtimestamp(float(f.read().strip()))
43
+
44
+ return datetime.now() - last_indexed > CACHE_DURATION
45
+
46
+ def index_discussions(repo_name):
47
+ index_dir = get_repo_index_dir(repo_name)
48
+
49
+ # Clear and recreate index directory
50
+ if os.path.exists(index_dir):
51
+ shutil.rmtree(index_dir)
52
+ os.makedirs(index_dir, exist_ok=True)
53
+
54
+ # Create index
55
+ ix = create_in(index_dir, schema)
56
+ writer = ix.writer()
57
+
58
+ # Fetch and index discussions
59
+ discussions = requests.get(f'https://huggingface.co/api/{repo_name}/discussions').json()
60
+
61
+ for discussion in discussions['discussions']:
62
+ comments = requests.get(
63
+ f'https://huggingface.co/api/{repo_name}/discussions/{discussion["num"]}'
64
+ ).json()
65
+
66
+ # Combine all comments into one content string
67
+ content = []
68
+ for comment in comments['events']:
69
+ if comment['type'] == 'comment':
70
+ content.append(f'{comment["author"]["name"]}: {comment["data"]["latest"]["raw"]}')
71
+ writer.add_document(
72
+ discussion_id=str(discussion["num"]),
73
+ title=discussion["title"],
74
+ content=f"Title: {discussion['title']}\n\n" + '\n'.join(content),
75
+ author=discussion["author"]["name"],
76
+ is_pr=discussion["isPullRequest"],
77
+ is_open=discussion["status"] == "open"
78
+ )
79
+
80
+ writer.commit()
81
+
82
+ # Update last indexed timestamp
83
+ with open(get_repo_last_indexed_file(repo_name), 'w') as f:
84
+ f.write(str(time.time()))
85
+
86
+ @app.route('/')
87
+ def index():
88
+ repo_name = request.args.get('repo')
89
+ query = request.args.get('query')
90
+ if not repo_name:
91
+ return render_template('no_repo.html')
92
+ return render_template('index.html', repo_name=repo_name, query=query)
93
+
94
+ @app.route('/search', methods=['POST'])
95
+ def search():
96
+ data = request.json
97
+ query = data.get('query')
98
+ repo_name = data.get('repo')
99
+ if not repo_name:
100
+ return jsonify({'error': 'No repository provided'}), 400
101
+
102
+ if not query:
103
+ return jsonify({'error': 'No query provided'}), 400
104
+
105
+ # Check if we need to reindex
106
+ if needs_reindex(repo_name):
107
+ index_discussions(repo_name)
108
+
109
+ # Search the index
110
+ ix = open_dir(get_repo_index_dir(repo_name))
111
+ with ix.searcher() as searcher:
112
+ query_parser = QueryParser("content", ix.schema)
113
+ q = query_parser.parse(query)
114
+ results = searcher.search(q)
115
+
116
+ # Format results
117
+ formatted_results = [{
118
+ 'discussion_id': escape(result['discussion_id']),
119
+ 'title': escape(result['title']),
120
+ 'author': escape(result['author']),
121
+ 'excerpt': bleach.clean(result.highlights("content"), tags=['b'], strip=True),
122
+ 'is_pr': result['is_pr'],
123
+ 'is_open': result['is_open']
124
+ } for result in results]
125
+
126
+ return jsonify({'results': formatted_results})
127
+
128
+ if __name__ == '__main__':
129
+ app.run(host='0.0.0.0', port=7860)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ bleach
2
+ whoosh
3
+ flask
4
+ requests
templates/index.html ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+
4
+ <head>
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>HF Discussion Search</title>
7
+ <style>
8
+ :root {
9
+ --bg-primary: #ffffff;
10
+ --bg-secondary: #f3f4f6;
11
+ --text-primary: #111827;
12
+ --text-secondary: #4b5563;
13
+ --border-color: #e5e7eb;
14
+ --accent-color: #2563eb;
15
+ --accent-hover: #1d4ed8;
16
+ --focus-ring: #3b82f680;
17
+ }
18
+
19
+ @media (prefers-color-scheme: dark) {
20
+ :root {
21
+ --bg-primary: #1f2937;
22
+ --bg-secondary: #111827;
23
+ --text-primary: #f9fafb;
24
+ --text-secondary: #9ca3af;
25
+ --border-color: #374151;
26
+ --accent-color: #3b82f6;
27
+ --accent-hover: #2563eb;
28
+ }
29
+ }
30
+
31
+ * {
32
+ box-sizing: border-box;
33
+ font-family: system-ui, -apple-system, sans-serif;
34
+ }
35
+
36
+ /* Custom scrollbar styles */
37
+ ::-webkit-scrollbar {
38
+ width: 8px;
39
+ }
40
+
41
+ ::-webkit-scrollbar-track {
42
+ background: var(--bg-secondary);
43
+ }
44
+
45
+ ::-webkit-scrollbar-thumb {
46
+ background: var(--text-secondary);
47
+ border-radius: 4px;
48
+ }
49
+
50
+ ::-webkit-scrollbar-thumb:hover {
51
+ background: var(--text-primary);
52
+ }
53
+
54
+ /* Firefox scrollbar */
55
+ * {
56
+ scrollbar-width: thin;
57
+ scrollbar-color: var(--text-secondary) var(--bg-secondary);
58
+ }
59
+
60
+ body {
61
+ margin: 0;
62
+ padding: 20px;
63
+ background: var(--bg-primary);
64
+ color: var(--text-primary);
65
+ min-height: 100vh;
66
+ }
67
+
68
+ .container {
69
+ max-width: 800px;
70
+ margin: 0 auto;
71
+ }
72
+
73
+ h1 {
74
+ font-size: 24px;
75
+ margin: 0 0 20px 0;
76
+ color: var(--text-primary);
77
+ }
78
+
79
+ .search-container {
80
+ position: relative;
81
+ margin-bottom: 30px;
82
+ }
83
+
84
+ .search-input {
85
+ width: 100%;
86
+ padding: 12px 16px 12px 40px;
87
+ border: 1px solid var(--border-color);
88
+ border-radius: 8px;
89
+ background: var(--bg-secondary);
90
+ color: var(--text-primary);
91
+ font-size: 16px;
92
+ transition: border-color 0.2s, box-shadow 0.2s;
93
+ }
94
+
95
+ .search-icon {
96
+ position: absolute;
97
+ left: 12px;
98
+ top: 50%;
99
+ transform: translateY(-50%);
100
+ color: var(--text-secondary);
101
+ width: 20px;
102
+ height: 20px;
103
+ }
104
+
105
+ .search-input:focus {
106
+ outline: none;
107
+ border-color: var(--accent-color);
108
+ box-shadow: 0 0 0 3px var(--focus-ring);
109
+ }
110
+
111
+ .results {
112
+ display: grid;
113
+ gap: 16px;
114
+ }
115
+
116
+ .result-card {
117
+ padding: 16px;
118
+ background: var(--bg-secondary);
119
+ border: 1px solid var(--border-color);
120
+ border-radius: 8px;
121
+ transition: transform 0.2s;
122
+ position: relative;
123
+ display: flex;
124
+ gap: 16px;
125
+ }
126
+
127
+ .type-icon-container {
128
+ flex-shrink: 0;
129
+ width: 24px;
130
+ height: 24px;
131
+ color: var(--text-secondary);
132
+ }
133
+
134
+ .result-content {
135
+ flex-grow: 1;
136
+ min-width: 0;
137
+ }
138
+
139
+ .result-card.closed {
140
+ opacity: 0.75;
141
+ }
142
+
143
+ .result-card:hover {
144
+ transform: translateY(-2px);
145
+ }
146
+
147
+ .result-title {
148
+ font-size: 18px;
149
+ font-weight: 600;
150
+ margin-bottom: 8px;
151
+ color: var(--accent-color);
152
+ }
153
+
154
+ .result-author {
155
+ font-size: 14px;
156
+ color: var(--text-secondary);
157
+ margin-bottom: 12px;
158
+ }
159
+
160
+ .result-excerpt {
161
+ font-size: 14px;
162
+ line-height: 1.5;
163
+ color: var(--text-primary);
164
+ }
165
+
166
+ .result-excerpt mark {
167
+ background: var(--accent-color);
168
+ color: white;
169
+ padding: 0 2px;
170
+ border-radius: 2px;
171
+ }
172
+
173
+ .loader {
174
+ display: none;
175
+ justify-content: center;
176
+ margin: 40px 0;
177
+ }
178
+
179
+ .loader::after {
180
+ content: "";
181
+ width: 30px;
182
+ height: 30px;
183
+ border: 3px solid var(--border-color);
184
+ border-radius: 50%;
185
+ border-top-color: var(--accent-color);
186
+ animation: spin 1s linear infinite;
187
+ }
188
+
189
+ @keyframes spin {
190
+ to {
191
+ transform: rotate(360deg);
192
+ }
193
+ }
194
+
195
+ .no-results {
196
+ text-align: center;
197
+ color: var(--text-secondary);
198
+ padding: 40px 0;
199
+ }
200
+
201
+ .status-indicator {
202
+ position: absolute;
203
+ top: 16px;
204
+ right: 16px;
205
+ display: flex;
206
+ align-items: center;
207
+ gap: 4px;
208
+ font-size: 12px;
209
+ padding: 4px 8px;
210
+ border-radius: 12px;
211
+ }
212
+
213
+ .status-open {
214
+ background: #22c55e20;
215
+ color: #22c55e;
216
+ }
217
+
218
+ .status-closed {
219
+ background: #ef444420;
220
+ color: #ef4444;
221
+ }
222
+
223
+ .filter-controls {
224
+ display: flex;
225
+ gap: 8px;
226
+ margin-bottom: 16px;
227
+ }
228
+
229
+ .filter-button {
230
+ padding: 6px 12px;
231
+ border: 1px solid var(--border-color);
232
+ border-radius: 6px;
233
+ background: var(--bg-secondary);
234
+ color: var(--text-secondary);
235
+ cursor: pointer;
236
+ }
237
+
238
+ .filter-button.active {
239
+ background: var(--accent-color);
240
+ color: white;
241
+ border-color: var(--accent-color);
242
+ }
243
+
244
+ .type-icon {
245
+ width: 24px;
246
+ height: 24px;
247
+ }
248
+ </style>
249
+ </head>
250
+
251
+ <body>
252
+ <div class="container">
253
+ <div class="search-container">
254
+ <svg class="search-icon" viewBox="0 0 20 20" fill="currentColor">
255
+ <path fill-rule="evenodd" d="M8 4a4 4 0 100 8 4 4 0 000-8zM2 8a6 6 0 1110.89 3.476l4.817 4.817a1 1 0 01-1.414 1.414l-4.816-4.816A6 6 0 012 8z" clip-rule="evenodd" />
256
+ </svg>
257
+ <input type="text" id="searchInput" class="search-input" placeholder="Search discussions..." autofocus>
258
+ </div>
259
+ <div class="filter-controls">
260
+ <button class="filter-button active" data-filter="all">All</button>
261
+ <button class="filter-button" data-filter="open">Open</button>
262
+ <button class="filter-button" data-filter="closed">Closed</button>
263
+ </div>
264
+ <div id="loader" class="loader"></div>
265
+ <div id="results" class="results"></div>
266
+ </div>
267
+
268
+ <script>
269
+ const searchInput = document.getElementById('searchInput');
270
+ const loader = document.getElementById('loader');
271
+ const results = document.getElementById('results');
272
+ const filterButtons = document.querySelectorAll('.filter-button');
273
+ let currentFilter = 'all';
274
+ let searchTimeout;
275
+ let currentResults = [];
276
+
277
+ function getTypeIcon(isPR) {
278
+ return isPR ?
279
+ `<svg class="type-icon" viewBox="0 0 16 16" fill="currentColor">
280
+ <path fill-rule="evenodd" d="M7.177 3.073L9.573.677A.25.25 0 0110 .854v4.792a.25.25 0 01-.427.177L7.177 3.427a.25.25 0 010-.354zM3.75 2.5a.75.75 0 100 1.5.75.75 0 000-1.5zm-2.25.75a2.25 2.25 0 113 2.122v5.256a2.251 2.251 0 11-1.5 0V5.372A2.25 2.25 0 011.5 3.25zM11 2.5h-1V4h1a1 1 0 011 1v5.628a2.251 2.251 0 101.5 0V5A2.5 2.5 0 0011 2.5zm1 10.25a.75.75 0 111.5 0 .75.75 0 01-1.5 0zM3.75 12a.75.75 0 100 1.5.75.75 0 000-1.5z"/>
281
+ </svg>` :
282
+ `<svg class="type-icon" viewBox="0 0 16 16" fill="currentColor">
283
+ <path fill-rule="evenodd" d="M1.5 2.75a.25.25 0 01.25-.25h8.5a.25.25 0 01.25.25v5.5a.25.25 0 01-.25.25h-3.5a.75.75 0 00-.53.22L3.5 11.44V9.25a.75.75 0 00-.75-.75h-1a.25.25 0 01-.25-.25v-5.5z"/>
284
+ </svg>`;
285
+ }
286
+
287
+ function renderResults(results) {
288
+ const filteredResults = results.filter(result => {
289
+ if (currentFilter === 'all') return true;
290
+ if (currentFilter === 'open') return result.is_open;
291
+ if (currentFilter === 'closed') return !result.is_open;
292
+ });
293
+
294
+ if (filteredResults.length === 0) {
295
+ if (currentFilter === 'all') {
296
+ return '<div class="no-results">No results found</div>';
297
+ } else {
298
+ return `<div class="no-results">No ${currentFilter} discussions found</div>`;
299
+ }
300
+ }
301
+
302
+ return filteredResults
303
+ .map(result => `
304
+ <a href="https://huggingface.co/{{ repo_name }}/discussions/${result.discussion_id}"
305
+ class="result-card${!result.is_open ? ' closed' : ''}"
306
+ target="_blank" style="text-decoration: none;">
307
+ <div class="type-icon-container">
308
+ ${getTypeIcon(result.is_pr)}
309
+ </div>
310
+ <div class="result-content">
311
+ <div class="result-title">${result.title}</div>
312
+ <div class="result-author">by ${result.author}</div>
313
+ <div class="result-excerpt">${result.excerpt}</div>
314
+ <div class="status-indicator ${result.is_open ? 'status-open' : 'status-closed'}">
315
+ ${result.is_open ? 'Open' : 'Closed'}
316
+ </div>
317
+ </div>
318
+ </a>
319
+ `).join('');
320
+ }
321
+
322
+ filterButtons.forEach(button => {
323
+ button.addEventListener('click', () => {
324
+ filterButtons.forEach(btn => btn.classList.remove('active'));
325
+ button.classList.add('active');
326
+ currentFilter = button.dataset.filter;
327
+ results.innerHTML = renderResults(currentResults);
328
+ });
329
+ });
330
+
331
+ async function performSearch(query) {
332
+ loader.style.display = 'flex';
333
+ results.innerHTML = '';
334
+
335
+ try {
336
+ const response = await fetch('/search', {
337
+ method: 'POST',
338
+ headers: {
339
+ 'Content-Type': 'application/json',
340
+ },
341
+ body: JSON.stringify({
342
+ query: query,
343
+ repo: '{{ repo_name }}'
344
+ })
345
+ });
346
+
347
+ const data = await response.json();
348
+ currentResults = data.results;
349
+ results.innerHTML = renderResults(currentResults);
350
+ } catch (error) {
351
+ results.innerHTML = '<div class="no-results">An error occurred while searching</div>';
352
+ } finally {
353
+ loader.style.display = 'none';
354
+ }
355
+ }
356
+
357
+ searchInput.addEventListener('input', (e) => {
358
+ clearTimeout(searchTimeout);
359
+ const query = e.target.value.trim();
360
+
361
+ if (query.length === 0) {
362
+ results.innerHTML = '';
363
+ return;
364
+ }
365
+
366
+ if (query.length < 2) return;
367
+
368
+ searchTimeout = setTimeout(() => {
369
+ performSearch(query);
370
+ }, 300);
371
+ });
372
+
373
+ // Pre-fill search input and trigger search if query parameter exists
374
+ window.addEventListener('DOMContentLoaded', () => {
375
+ const urlParams = new URLSearchParams(window.location.search);
376
+ const query = urlParams.get('query');
377
+ if (query) {
378
+ searchInput.value = query;
379
+ performSearch(query);
380
+ }
381
+ });
382
+ </script>
383
+ </body>
384
+
385
+ </html>
templates/no_repo.html ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>No repository provided</title>
7
+ </head>
8
+ <body>
9
+ <h1>No repository provided</h1>
10
+ <p>If you are trying to access the Search API, please provide a repository name in the URL.</p>
11
+ <p>This is not intended to be used as a public/standalone service. Please see <a href="https://github.com/fakerybakery/hf-tools">the GitHub repository</a> for more information on how to use this tool.</p>
12
+ </body>
13
+ </html>