Dongli He commited on
Commit
dd76d38
·
1 Parent(s): d04a81d

Add application file

Browse files
Files changed (25) hide show
  1. .gitignore +3 -0
  2. app.py +47 -0
  3. data/roberta-large-wmt/all_results.json +14 -0
  4. data/roberta-large-wmt/config.json +36 -0
  5. data/roberta-large-wmt/eval_results.json +8 -0
  6. data/roberta-large-wmt/merges.txt +0 -0
  7. data/roberta-large-wmt/pytorch_model.bin +3 -0
  8. data/roberta-large-wmt/runs/May20_22-09-26_m3h006/1684584588.9904363/events.out.tfevents.1684584588.m3h006.9656.1 +3 -0
  9. data/roberta-large-wmt/runs/May20_22-09-26_m3h006/events.out.tfevents.1684584588.m3h006.9656.0 +3 -0
  10. data/roberta-large-wmt/runs/May20_22-09-26_m3h006/events.out.tfevents.1684585101.m3h006.9656.2 +3 -0
  11. data/roberta-large-wmt/runs/May20_22-19-20_m3h006/events.out.tfevents.1684585194.m3h006.11791.0 +3 -0
  12. data/roberta-large-wmt/runs/May20_22-21-52_m3h006/events.out.tfevents.1684585347.m3h006.11945.0 +3 -0
  13. data/roberta-large-wmt/runs/May21_12-15-58_m3-login1/events.out.tfevents.1684637428.m3-login1.888308.0 +3 -0
  14. data/roberta-large-wmt/runs/May29_12-47-52_m3-login1/events.out.tfevents.1685332847.m3-login1.2429874.0 +3 -0
  15. data/roberta-large-wmt/runs/May29_14-17-10_m3-login1/events.out.tfevents.1685334472.m3-login1.3030729.0 +3 -0
  16. data/roberta-large-wmt/runs/May30_22-24-13_m3g021/events.out.tfevents.1685449490.m3g021.8428.0 +3 -0
  17. data/roberta-large-wmt/runs/May30_22-25-29_m3g021/events.out.tfevents.1685449589.m3g021.8584.0 +3 -0
  18. data/roberta-large-wmt/special_tokens_map.json +1 -0
  19. data/roberta-large-wmt/tokenizer.json +0 -0
  20. data/roberta-large-wmt/tokenizer_config.json +1 -0
  21. data/roberta-large-wmt/train_results.json +8 -0
  22. data/roberta-large-wmt/trainer_state.json +25 -0
  23. data/roberta-large-wmt/training_args.bin +3 -0
  24. data/roberta-large-wmt/vocab.json +0 -0
  25. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .idea
2
+
3
+ __pycache__
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pathlib
2
+
3
+ import gradio as gr
4
+ import torch
5
+ from scipy.special import softmax
6
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
+
8
+ # load model
9
+ MODEL_PATH = pathlib.Path("data") / "roberta-large-wmt"
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
12
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
13
+
14
+ model.to("cuda" if torch.cuda.is_available() else "cpu")
15
+
16
+
17
+ def detect(text: str) -> str:
18
+ inputs = tokenizer(text, return_tensors="pt")
19
+ with torch.no_grad():
20
+ logits = model(**inputs).logits
21
+ predicted_class_id = logits.argmax(dim=1).item()
22
+
23
+ label = model.config.id2label[predicted_class_id]
24
+
25
+ result = "machine-generated" if label == "0" else "human-generated"
26
+
27
+ return ", ".join(
28
+ [
29
+ f"The text is {result}",
30
+ f"with a certainty of ${100 * softmax(logits, axis=1)[0][int(label)]:.2f}%",
31
+ ]
32
+ )
33
+
34
+
35
+ with gr.Blocks() as demo:
36
+ gr.Markdown(
37
+ """
38
+ # Hello!
39
+ This is the demo for <a href="https://arxiv.org/abs/2305.12680" target="_blank">G3Detector</a>.
40
+ """
41
+ )
42
+ inp = gr.Textbox(label="Text", placeholder="Paste text here...")
43
+ out = gr.Textbox(label="Result")
44
+ detect_btn = gr.Button("Detect")
45
+ detect_btn.click(fn=detect, inputs=inp, outputs=out, api_name="G3Detector")
46
+
47
+ demo.launch()
data/roberta-large-wmt/all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.0,
4
+ "eval_loss": 9.865092277526855,
5
+ "eval_runtime": 24.5645,
6
+ "eval_samples": 3000,
7
+ "eval_samples_per_second": 122.127,
8
+ "eval_steps_per_second": 15.266,
9
+ "train_loss": 0.05986976114908854,
10
+ "train_runtime": 467.209,
11
+ "train_samples": 4000,
12
+ "train_samples_per_second": 25.684,
13
+ "train_steps_per_second": 0.803
14
+ }
data/roberta-large-wmt/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../pretrained_models/roberta-large",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "id2label": {
14
+ "0": "0",
15
+ "1": "1"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 4096,
19
+ "label2id": {
20
+ "0": 0,
21
+ "1": 1
22
+ },
23
+ "layer_norm_eps": 1e-05,
24
+ "max_position_embeddings": 514,
25
+ "model_type": "roberta",
26
+ "num_attention_heads": 16,
27
+ "num_hidden_layers": 24,
28
+ "pad_token_id": 1,
29
+ "position_embedding_type": "absolute",
30
+ "problem_type": "single_label_classification",
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.14.0.dev0",
33
+ "type_vocab_size": 1,
34
+ "use_cache": true,
35
+ "vocab_size": 50265
36
+ }
data/roberta-large-wmt/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_accuracy": 0.0,
3
+ "eval_loss": 9.865092277526855,
4
+ "eval_runtime": 24.5645,
5
+ "eval_samples": 3000,
6
+ "eval_samples_per_second": 122.127,
7
+ "eval_steps_per_second": 15.266
8
+ }
data/roberta-large-wmt/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/roberta-large-wmt/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82cd188fc5001427fe27ab72fce2ff7c7c3dc3fcc07e83d31d666c0ea9f31170
3
+ size 1421616717
data/roberta-large-wmt/runs/May20_22-09-26_m3h006/1684584588.9904363/events.out.tfevents.1684584588.m3h006.9656.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee38d96706910519229b8cc97ac25858f84e5f652ab0189d8b19d60cee502dbb
3
+ size 4729
data/roberta-large-wmt/runs/May20_22-09-26_m3h006/events.out.tfevents.1684584588.m3h006.9656.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b9865db1e84c28d5b9e7415514cd9a482c8d23332e663df26b31e29b079a9c6
3
+ size 3776
data/roberta-large-wmt/runs/May20_22-09-26_m3h006/events.out.tfevents.1684585101.m3h006.9656.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50312bc13839ce4f93644a25129405069667f1dc82fecb60e249e1df16989d8a
3
+ size 411
data/roberta-large-wmt/runs/May20_22-19-20_m3h006/events.out.tfevents.1684585194.m3h006.11791.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a9f24f63934c83f4163a9d22253781de8c55037e598ff8681ffcd60695840b
3
+ size 346
data/roberta-large-wmt/runs/May20_22-21-52_m3h006/events.out.tfevents.1684585347.m3h006.11945.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:109c484dc0944c11ff5bb7cf64e43106f68e58377ce79e3963d4c632da02db7a
3
+ size 346
data/roberta-large-wmt/runs/May21_12-15-58_m3-login1/events.out.tfevents.1684637428.m3-login1.888308.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af362826610b64151fb885838595134f2eb817a0cd8d01dbbafa6169a013a41
3
+ size 346
data/roberta-large-wmt/runs/May29_12-47-52_m3-login1/events.out.tfevents.1685332847.m3-login1.2429874.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d359a0bce4306bbf1c3b965148f13ff650b371a7c668e503c8252d562470dc7
3
+ size 346
data/roberta-large-wmt/runs/May29_14-17-10_m3-login1/events.out.tfevents.1685334472.m3-login1.3030729.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6215f803425b5027debfb198fa6979aa7e0cbf6a6593c17dc1d7ab93cb59416
3
+ size 346
data/roberta-large-wmt/runs/May30_22-24-13_m3g021/events.out.tfevents.1685449490.m3g021.8428.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e16a988fe5380d0e5412ac8e6921c53a20f15b2e36e9303c8dea3ed175dafbac
3
+ size 346
data/roberta-large-wmt/runs/May30_22-25-29_m3g021/events.out.tfevents.1685449589.m3g021.8584.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9070109320cffa38989216524da0f5528c6934794e69a3b49f486be234fb286
3
+ size 346
data/roberta-large-wmt/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
data/roberta-large-wmt/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
data/roberta-large-wmt/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": "../pretrained_models/roberta-large", "tokenizer_class": "RobertaTokenizer"}
data/roberta-large-wmt/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.05986976114908854,
4
+ "train_runtime": 467.209,
5
+ "train_samples": 4000,
6
+ "train_samples_per_second": 25.684,
7
+ "train_steps_per_second": 0.803
8
+ }
data/roberta-large-wmt/trainer_state.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 375,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 3.0,
12
+ "step": 375,
13
+ "total_flos": 2795794089984000.0,
14
+ "train_loss": 0.05986976114908854,
15
+ "train_runtime": 467.209,
16
+ "train_samples_per_second": 25.684,
17
+ "train_steps_per_second": 0.803
18
+ }
19
+ ],
20
+ "max_steps": 375,
21
+ "num_train_epochs": 3,
22
+ "total_flos": 2795794089984000.0,
23
+ "trial_name": null,
24
+ "trial_params": null
25
+ }
data/roberta-large-wmt/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ae53eff0ce2eacf03ac69d3806648b65f0af4937e5d4ddd3e98a56923bdf1c1
3
+ size 2927
data/roberta-large-wmt/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ scipy
3
+ torch
4
+ transformers