SantiagoTesla commited on
Commit
f589681
·
1 Parent(s): 4e730f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -57
app.py CHANGED
@@ -1,58 +1,2 @@
1
- import transformers
2
  import gradio as gr
3
- from torch import cuda, bfloat16
4
- import torch
5
- from transformers import StoppingCriteria, StoppingCriteriaList
6
-
7
- device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
8
-
9
- model = transformers.AutoModelForCausalLM.from_pretrained(
10
- 'mosaicml/mpt-7b-instruct',
11
- trust_remote_code=True,
12
- torch_dtype=bfloat16,
13
- max_seq_len=2048
14
- )
15
- model.eval()
16
- model.to(device)
17
- print(f"Model loaded on {device}")
18
-
19
- def chatbot(input):
20
- tokenizer = transformers.AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
21
-
22
- # mpt-7b is trained to add "<|endoftext|>" at the end of generations
23
- stop_token_ids = tokenizer.convert_tokens_to_ids(["<|endoftext|>"])
24
-
25
- # define custom stopping criteria object
26
- class StopOnTokens(StoppingCriteria):
27
- def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
28
- for stop_id in stop_token_ids:
29
- if input_ids[0][-1] == stop_id:
30
- return True
31
- return False
32
-
33
- stopping_criteria = StoppingCriteriaList([StopOnTokens()])
34
- for i in range(50):
35
- generate_text = transformers.pipeline(
36
- model=model, tokenizer=tokenizer,
37
- return_full_text=True, # langchain expects the full text
38
- task='text-generation',
39
- device=device,
40
- # we pass model parameters here too
41
- stopping_criteria=stopping_criteria, # without this model will ramble
42
- temperature=0.1, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
43
- top_p=0.15, # select from top tokens whose probability add up to 15%
44
- top_k=0, # select from top 0 tokens (because zero, relies on top_p)
45
- max_new_tokens=1000, # max number of tokens to generate in the output
46
- repetition_penalty=1.1 # without this output begins repeating
47
- )
48
-
49
- res = generate_text(input)
50
- output = res[0]["generated_text"]
51
- return output
52
-
53
- inputs = gr.inputs.Textbox(lines=7, label="Chat with AI")
54
- outputs = gr.outputs.Textbox(label="Reply")
55
-
56
- gr.Interface(fn=chatbot, inputs=inputs, outputs=outputs, title="Self_Trained_V2",
57
- description="Ask anything you want",
58
- ).launch(share=True)
 
 
1
  import gradio as gr
2
+ gr.Interface.load("models/anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g").launch()