edithram23 commited on
Commit
52db929
·
verified ·
1 Parent(s): 575b715

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +5 -12
main.py CHANGED
@@ -4,20 +4,16 @@ os.environ["HF_HOME"] = "/.cache"
4
  import re
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
 
7
- model_dir_small = 'edithram23/Redaction'
8
- tokenizer_small = AutoTokenizer.from_pretrained(model_dir_small)
9
- model_small = AutoModelForSeq2SeqLM.from_pretrained(model_dir_small)
10
-
11
 
12
  model_dir_large = 'edithram23/Redaction_Personal_info_v1'
13
  tokenizer_large = AutoTokenizer.from_pretrained(model_dir_large)
14
  model_large = AutoModelForSeq2SeqLM.from_pretrained(model_dir_large)
15
 
16
- def mask_generation(text,model=model_small,tokenizer=tokenizer_small,max_len=64):
17
  import re
18
- inputs = ["Mask Generation: " + text]
19
- inputs = tokenizer(inputs, max_length=max_len, truncation=True, return_tensors="pt")
20
- output = model.generate(**inputs, num_beams=8, do_sample=True, max_length=max_len2)
21
  decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
22
  predicted_title = decoded_output.strip()
23
  pattern = r'\[.*?\]'
@@ -36,10 +32,7 @@ async def hello():
36
 
37
  @app.post("/mask")
38
  async def mask_input(query):
39
- if(len(query)<90):
40
- output = mask_generation(query)
41
- else:
42
- output = mask_generation(query,model_large,tokenizer_large,512,len(query))
43
  return {"data" : output}
44
 
45
  if __name__ == '__main__':
 
4
  import re
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
 
 
 
 
 
7
 
8
  model_dir_large = 'edithram23/Redaction_Personal_info_v1'
9
  tokenizer_large = AutoTokenizer.from_pretrained(model_dir_large)
10
  model_large = AutoModelForSeq2SeqLM.from_pretrained(model_dir_large)
11
 
12
+ def mask_generation(text,model=model_large,tokenizer=tokenizer_large):
13
  import re
14
+ inputs = ["Mask Generation: " + text+'.']
15
+ inputs = tokenizer(inputs, max_length=512, truncation=True, return_tensors="pt")
16
+ output = model.generate(**inputs, num_beams=8, do_sample=True, max_length=512)
17
  decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
18
  predicted_title = decoded_output.strip()
19
  pattern = r'\[.*?\]'
 
32
 
33
  @app.post("/mask")
34
  async def mask_input(query):
35
+ output = mask_generation(query)
 
 
 
36
  return {"data" : output}
37
 
38
  if __name__ == '__main__':