import gradio as gr import requests import json import re import time from typing import List, Dict, Tuple, Optional from urllib.parse import quote class GGUFModelFinder: def __init__(self): self.popular_gguf_creators = [ "TheBloke", "bartowski", "mradermacher", "microsoft", "QuantFactory", "lmstudio-ai", "huggingface", "mlabonne", "NousResearch", "MaziyarPanahi" ] self.api_base = "https://huggingface.co/api" self.headers = { "User-Agent": "GGUF-Model-Finder/1.0" } def clean_model_name(self, model_name: str) -> str: """Clean and normalize model name for better searching""" # Remove common prefixes and suffixes cleaned = model_name.strip() # Remove author/organization prefix if present if "/" in cleaned: cleaned = cleaned.split("/")[-1] # Remove common suffixes suffixes_to_remove = [ "-GGUF", "-gguf", "-GPTQ", "-gptq", "-AWQ", "-awq", "-HF", "-hf", "-chat", "-instruct", "-base", "-v1", "-v2", "-v3", "-uncensored", "-finetune" ] for suffix in suffixes_to_remove: if cleaned.lower().endswith(suffix.lower()): cleaned = cleaned[:-len(suffix)] return cleaned.strip() def search_models(self, query: str, author: str = None, limit: int = 20) -> List[Dict]: """Search for models using HuggingFace API""" try: search_url = f"{self.api_base}/models" params = { "search": query, "filter": "gguf", "limit": limit, "sort": "downloads" } if author: params["author"] = author response = requests.get(search_url, params=params, headers=self.headers, timeout=10) if response.status_code == 200: return response.json() else: return [] except Exception as e: print(f"Error searching models: {e}") return [] def search_gguf_variants(self, model_name: str) -> List[Dict]: """Search for GGUF variants of a given model""" cleaned_name = self.clean_model_name(model_name) all_results = [] # Search with different query variations search_terms = [ cleaned_name, f"{cleaned_name} GGUF", f"{cleaned_name}-GGUF", f"{cleaned_name}_GGUF" ] # Search through popular GGUF creators for creator in self.popular_gguf_creators: for term in search_terms: results = self.search_models(term, author=creator, limit=10) all_results.extend(results) time.sleep(0.1) # Rate limiting # Also search generally without author filter for term in search_terms: results = self.search_models(term, limit=15) all_results.extend(results) time.sleep(0.1) # Remove duplicates and filter relevant results seen_ids = set() filtered_results = [] for model in all_results: model_id = model.get('id', '') if model_id not in seen_ids and 'gguf' in model_id.lower(): seen_ids.add(model_id) # Check if model name is relevant model_name_clean = self.clean_model_name(model_id) if self.is_relevant_match(cleaned_name, model_name_clean): filtered_results.append(model) # Sort by downloads (descending) filtered_results.sort(key=lambda x: x.get('downloads', 0), reverse=True) return filtered_results[:20] # Return top 20 results def is_relevant_match(self, original: str, candidate: str) -> bool: """Check if candidate model is a relevant match for original""" original_lower = original.lower() candidate_lower = candidate.lower() # Direct substring match if original_lower in candidate_lower or candidate_lower in original_lower: return True # Check word overlap original_words = set(re.findall(r'\w+', original_lower)) candidate_words = set(re.findall(r'\w+', candidate_lower)) # If most words overlap, it's likely a match if len(original_words) > 0: overlap_ratio = len(original_words.intersection(candidate_words)) / len(original_words) return overlap_ratio >= 0.6 return False def get_model_details(self, model_id: str) -> Dict: """Get detailed information about a specific model""" try: url = f"{self.api_base}/models/{model_id}" response = requests.get(url, headers=self.headers, timeout=10) if response.status_code == 200: return response.json() return {} except Exception as e: print(f"Error getting model details: {e}") return {} def format_model_info(self, model: Dict) -> str: """Format model information for display""" model_id = model.get('id', 'Unknown') downloads = model.get('downloads', 0) likes = model.get('likes', 0) updated = model.get('lastModified', 'Unknown') # Format the date if updated != 'Unknown': try: from datetime import datetime date_obj = datetime.fromisoformat(updated.replace('Z', '+00:00')) updated = date_obj.strftime('%Y-%m-%d') except: pass # Get model size info if available size_info = "" if 'siblings' in model: total_size = 0 file_count = 0 for sibling in model['siblings']: if sibling.get('rfilename', '').endswith('.gguf'): file_count += 1 if 'size' in sibling: total_size += sibling['size'] if total_size > 0: size_gb = total_size / (1024**3) size_info = f" | Size: {size_gb:.1f}GB ({file_count} GGUF files)" model_url = f"https://huggingface.co/{model_id}" return f""" **[{model_id}]({model_url})** - Downloads: {downloads:,} | Likes: {likes} | Updated: {updated}{size_info} """ def find_gguf_models(model_name: str, progress=gr.Progress()) -> Tuple[str, str]: """Main function to find GGUF models""" if not model_name.strip(): return "Please enter a model name to search for.", "" progress(0.1, desc="Initializing search...") finder = GGUFModelFinder() progress(0.3, desc="Searching for GGUF variants...") results = finder.search_gguf_variants(model_name) progress(0.8, desc="Formatting results...") if not results: no_results = f""" # No GGUF Models Found 😞 Could not find any GGUF variants for **{model_name}**. ## Suggestions: 1. **Check the spelling** of the model name 2. **Try a simpler name** (e.g., just "llama-2-7b" instead of "meta-llama/Llama-2-7b-chat-hf") 3. **Search manually** on [Hugging Face](https://huggingface.co/models?other=gguf) with the GGUF filter 4. **Check popular GGUF creators**: - [ReallyFloppyPenguin](https://huggingface.co/ReallyFloppyPenguin) - [TheBloke](https://huggingface.co/TheBloke) - [bartowski](https://huggingface.co/bartowski) - [mradermacher](https://huggingface.co/mradermacher) - [QuantFactory](https://huggingface.co/QuantFactory) The model you're looking for might not have been converted to GGUF format yet, or might be available under a different name. """ return no_results, "" # Create main results results_md = f""" # GGUF Models Found for "{model_name}" 🎯 Found **{len(results)}** GGUF variant(s): """ for i, model in enumerate(results, 1): results_md += f"{i}. {finder.format_model_info(model)}\n" # Create additional info additional_info = f""" ## 📋 What is GGUF? GGUF (GPT-Generated Unified Format) is a file format for storing models for inference with GGML and llama.cpp. It's designed to be fast to load and save, and to be extensible. ## 🔧 How to Use These Models ### With llama.cpp: ```bash ./main -m model.gguf -p "Your prompt here" ``` ### With Ollama: ```bash ollama create mymodel -f Modelfile ollama run mymodel ``` ### With Python (llama-cpp-python): ```python from llama_cpp import Llama llm = Llama(model_path="model.gguf") output = llm("Your prompt here") ``` ## 💡 Tips for Choosing a Model - **Q4_K_M**: Good balance of quality and size - **Q5_K_M**: Higher quality, larger size - **Q6_K**: Even higher quality, larger size - **Q8_0**: Highest quality, largest size Lower numbers = smaller file size but lower quality Higher numbers = larger file size but higher quality ## 🌟 Popular GGUF Model Creators The results above are from trusted model quantizers who regularly convert popular models to GGUF format. """ progress(1.0, desc="Complete!") return results_md, additional_info # Create the Gradio interface def create_interface(): with gr.Blocks( title="GGUF Model Finder", theme=gr.themes.Soft(), css=""" .container { max-width: 1200px; margin: auto; } .header { text-align: center; margin: 20px 0; } .search-box { margin: 20px 0; } """ ) as iface: gr.HTML("""

🔍 GGUF Model Finder

Find GGUF (quantized) versions of your favorite language models for local inference

""") with gr.Row(): with gr.Column(scale=3): model_input = gr.Textbox( label="Model Name", placeholder="e.g., llama-2-7b, mistral-7b, codellama-34b, deepseek-coder-6.7b", info="Enter the name of the model you want to find GGUF versions for", lines=1 ) with gr.Column(scale=1): search_btn = gr.Button("🔍 Search GGUF Models", variant="primary", size="lg") gr.HTML("""
💡 Quick Examples:
llama-2-7b - Meta's Llama 2 7B model
mistral-7b - Mistral AI's 7B model
codellama-34b - Code Llama 34B model
neural-chat-7b - Intel's Neural Chat model
deepseek-coder - DeepSeek Coder model
""") with gr.Row(): with gr.Column(scale=2): results_output = gr.Markdown( label="Search Results", value="Enter a model name above and click 'Search GGUF Models' to find quantized versions.", height=400 ) with gr.Column(scale=1): info_output = gr.Markdown( label="Additional Information", value="", height=400 ) # Event handlers search_btn.click( fn=find_gguf_models, inputs=[model_input], outputs=[results_output, info_output], show_progress=True ) model_input.submit( fn=find_gguf_models, inputs=[model_input], outputs=[results_output, info_output], show_progress=True ) gr.HTML("""

Made with ❤️ using Gradio | Data from Hugging Face

GGUF format by the llama.cpp team

""") return iface if __name__ == "__main__": # Create and launch the interface demo = create_interface() demo.launch( server_name="0.0.0.0", server_port=7860, share=True, )