|  | import gradio as gr | 
					
						
						|  | from transformers import WhisperProcessor, WhisperForConditionalGeneration | 
					
						
						|  | from datasets import load_dataset | 
					
						
						|  | import torch | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | model_name = "openai/whisper-small" | 
					
						
						|  | processor = WhisperProcessor.from_pretrained(model_name) | 
					
						
						|  | model = WhisperForConditionalGeneration.from_pretrained(model_name) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def transcribe(audio): | 
					
						
						|  |  | 
					
						
						|  | audio_input = processor(audio, return_tensors="pt").input_values | 
					
						
						|  | with torch.no_grad(): | 
					
						
						|  | logits = model(audio_input).logits | 
					
						
						|  | predicted_ids = torch.argmax(logits, dim=-1) | 
					
						
						|  | transcription = processor.batch_decode(predicted_ids) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | return transcription[0] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | iface = gr.Interface( | 
					
						
						|  | fn=transcribe, | 
					
						
						|  | inputs=gr.Audio(source="microphone", type="filepath"), | 
					
						
						|  | outputs="text", | 
					
						
						|  | title="Whisper Transcription for Developers", | 
					
						
						|  | description="使用 Whisper 和 bigcode 数据集转录开发者相关术语。" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | iface.launch() | 
					
						
						|  |  |