xiddiqui commited on
Commit
aa11130
·
1 Parent(s): 91200a4

updated the app.py file, now used unsloth fastlanguage model

Browse files
Files changed (2) hide show
  1. app.py +58 -17
  2. requirements.txt +4 -0
app.py CHANGED
@@ -1,25 +1,66 @@
1
  import gradio as gr
 
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- model_name = "xiddiqui/News_Summarizer" # The repo name of your model
5
- tokenizer = AutoTokenizer.from_pretrained(model_name)
6
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
7
 
8
- # Summarize function
9
- def summarize_text(input_text):
10
- inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
11
- outputs = model.generate(inputs.input_ids, max_length=150, num_beams=4, early_stopping=True)
12
- summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  return summary
14
 
15
- # Gradio Interface
16
- interface = gr.Interface(
17
- fn=summarize_text,
18
- inputs=gr.Textbox(lines=10, placeholder="Enter text to summarize..."),
19
- outputs=gr.Textbox(lines=5, placeholder="Summary will appear here..."),
20
- title="Text Summarizer",
21
- description="Provide a lengthy text, and the model will summarize it for you.",
 
22
  )
23
 
24
- # Launch the app
25
- interface.launch()
 
 
1
  import gradio as gr
2
+
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ from unsloth import FastLanguageModel
5
+
6
+ max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
7
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
8
+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
9
+
10
+ model, tokenizer = FastLanguageModel.from_pretrained(
11
+ model_name = "xiddiqui/News_Summarizer",
12
+ max_seq_length = max_seq_length,
13
+ dtype = dtype,
14
+ load_in_4bit = load_in_4bit,
15
+ # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
16
+ )
17
+
18
+ # Define the summarization function
19
+ def generate_summary(input_text):
20
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
21
+
22
+ ### Instruction:
23
+ Summarize the following:
24
 
25
+ ### Input:
26
+ {}
 
27
 
28
+ ### Response:
29
+ """
30
+
31
+ # Tokenize and prepare the input text
32
+ inputs = tokenizer(
33
+ [alpaca_prompt.format(input_text)],
34
+ return_tensors="pt",
35
+ truncation=True,
36
+ max_length=max_seq_length
37
+ ).to("cpu") # Ensure computations are done on CPU
38
+
39
+ # Set up TextStreamer for efficient text generation
40
+ from transformers import TextStreamer
41
+ text_streamer = TextStreamer(tokenizer)
42
+
43
+ # Generate summary
44
+ summary_ids = model.generate(
45
+ **inputs,
46
+ streamer=text_streamer,
47
+ max_new_tokens=64 # Limit the length of the output
48
+ )
49
+
50
+ # Decode the output summary
51
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
52
  return summary
53
 
54
+ # Set up Gradio interface
55
+ iface = gr.Interface(
56
+ fn=generate_summary, # Function to be called on user input
57
+ inputs="text", # Single text box for user input
58
+ outputs="text", # Output as text
59
+ live=True, # Optional: updates summary as user types
60
+ title="News Summarizer", # Title of the app
61
+ description="Enter a news article, and get a concise summary of the content."
62
  )
63
 
64
+ # Launch Gradio app
65
+ iface.launch()
66
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ unsloth
4
+ gradio