chore: update Marketing v2
Browse files- app.py +48 -31
- files/anonymized_document.txt +7 -6
- files/encrypted_document.txt +6 -0
- files/mapping_clear_to_anonymized.pkl +2 -2
- files/original_document.txt +7 -6
- files/original_document_uuid_mapping.json +8 -17
- utils_demo.py +6 -5
app.py
CHANGED
|
@@ -32,8 +32,10 @@ time.sleep(3)
|
|
| 32 |
# Load data from files required for the application
|
| 33 |
UUID_MAP = read_json(MAPPING_UUID_PATH)
|
| 34 |
ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
|
| 35 |
-
|
|
|
|
| 36 |
ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
|
|
|
|
| 37 |
|
| 38 |
# 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
|
| 39 |
|
|
@@ -44,9 +46,9 @@ ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
|
|
| 44 |
USER_ID = numpy.random.randint(0, 2**32)
|
| 45 |
|
| 46 |
|
| 47 |
-
def
|
| 48 |
|
| 49 |
-
selected_sentences = [
|
| 50 |
|
| 51 |
anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
|
| 52 |
|
|
@@ -90,6 +92,16 @@ def key_gen_fn() -> Dict:
|
|
| 90 |
print("Keys have been generated ✅")
|
| 91 |
return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
def encrypt_query_fn(query):
|
| 95 |
|
|
@@ -141,10 +153,10 @@ def encrypt_query_fn(query):
|
|
| 141 |
KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
|
| 142 |
)
|
| 143 |
|
| 144 |
-
encrypted_quant_tokens_hex = [token.hex()[500:
|
| 145 |
|
| 146 |
return {
|
| 147 |
-
output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex)),
|
| 148 |
anonymized_text_output: gr.update(visible=True, value=None),
|
| 149 |
identified_words_output_df: gr.update(visible=False, value=None),
|
| 150 |
}
|
|
@@ -510,28 +522,26 @@ with demo:
|
|
| 510 |
)
|
| 511 |
|
| 512 |
with gr.Row():
|
| 513 |
-
with gr.Column():
|
| 514 |
-
gr.Markdown("**Original document:**")
|
| 515 |
-
|
| 516 |
-
with gr.Column():
|
| 517 |
-
gr.Markdown("**Encrypted document:**")
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
with gr.Row():
|
| 521 |
-
with gr.Column():
|
| 522 |
original_sentences_box = gr.CheckboxGroup(
|
| 523 |
ORIGINAL_DOCUMENT,
|
| 524 |
value=ORIGINAL_DOCUMENT,
|
| 525 |
-
|
|
|
|
| 526 |
)
|
| 527 |
|
| 528 |
-
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
anonymized_doc_box = gr.Textbox(
|
| 530 |
-
|
|
|
|
| 531 |
)
|
| 532 |
|
| 533 |
original_sentences_box.change(
|
| 534 |
-
fn=
|
| 535 |
inputs=[original_sentences_box],
|
| 536 |
outputs=[anonymized_doc_box],
|
| 537 |
)
|
|
@@ -541,12 +551,9 @@ with demo:
|
|
| 541 |
gr.Markdown("<hr />")
|
| 542 |
gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
|
| 543 |
"""Please choose from the predefined options in
|
| 544 |
-
<span style='color:grey'>“Prompt examples”</span>
|
| 545 |
-
the <span style='color:grey'>“Customized prompt”</span>
|
| 546 |
-
|
| 547 |
-
Remain concise and relevant to the context. Any off-topic query will not be processed.
|
| 548 |
-
"""
|
| 549 |
-
)
|
| 550 |
|
| 551 |
with gr.Row():
|
| 552 |
with gr.Column(scale=5):
|
|
@@ -559,7 +566,7 @@ with demo:
|
|
| 559 |
gr.Markdown("Or")
|
| 560 |
|
| 561 |
query_box = gr.Textbox(
|
| 562 |
-
value="What is
|
| 563 |
)
|
| 564 |
|
| 565 |
default_query_box.change(
|
|
@@ -592,9 +599,19 @@ with demo:
|
|
| 592 |
|
| 593 |
run_fhe_btn = gr.Button("Anonymize using FHE")
|
| 594 |
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
|
| 599 |
identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
|
| 600 |
|
|
@@ -618,7 +635,7 @@ with demo:
|
|
| 618 |
########################## ChatGpt Part ##########################
|
| 619 |
|
| 620 |
gr.Markdown("<hr />")
|
| 621 |
-
gr.Markdown("##
|
| 622 |
gr.Markdown(
|
| 623 |
"""After securely anonymizing the query with FHE,
|
| 624 |
you can forward it to ChatGPT without having any concern about information leakage."""
|
|
@@ -627,9 +644,9 @@ with demo:
|
|
| 627 |
chatgpt_button = gr.Button("Query ChatGPT")
|
| 628 |
|
| 629 |
with gr.Row():
|
| 630 |
-
chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=
|
| 631 |
chatgpt_response_deanonymized = gr.Textbox(
|
| 632 |
-
label="ChatGPT's non-anonymized response:", lines=
|
| 633 |
)
|
| 634 |
|
| 635 |
chatgpt_button.click(
|
|
|
|
| 32 |
# Load data from files required for the application
|
| 33 |
UUID_MAP = read_json(MAPPING_UUID_PATH)
|
| 34 |
ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
|
| 35 |
+
MAPPING_ANONYMIZED_SENTENCES = read_pickle(MAPPING_ANONYMIZED_SENTENCES_PATH)
|
| 36 |
+
MAPPING_ENCRYPTED_SENTENCES = read_pickle(MAPPING_ENCRYPTED_SENTENCES_PATH)
|
| 37 |
ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
|
| 38 |
+
print(ORIGINAL_DOCUMENT)
|
| 39 |
|
| 40 |
# 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
|
| 41 |
|
|
|
|
| 46 |
USER_ID = numpy.random.randint(0, 2**32)
|
| 47 |
|
| 48 |
|
| 49 |
+
def select_static_anonymized_sentences_fn(selected_sentences: List):
|
| 50 |
|
| 51 |
+
selected_sentences = [MAPPING_ANONYMIZED_SENTENCES[sentence] for sentence in selected_sentences]
|
| 52 |
|
| 53 |
anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
|
| 54 |
|
|
|
|
| 92 |
print("Keys have been generated ✅")
|
| 93 |
return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
|
| 94 |
|
| 95 |
+
def select_static_encrypted_sentences_fn(selected_sentences: List):
|
| 96 |
+
|
| 97 |
+
selected_sentences = [MAPPING_ENCRYPTED_SENTENCES[sentence] for sentence in selected_sentences]
|
| 98 |
+
|
| 99 |
+
anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
|
| 100 |
+
|
| 101 |
+
anonymized_selected_sentence = [sentence for _, sentence in anonymized_selected_sentence]
|
| 102 |
+
|
| 103 |
+
return {encrypted_doc_box: gr.update(value="\n\n".join(anonymized_selected_sentence))}
|
| 104 |
+
|
| 105 |
|
| 106 |
def encrypt_query_fn(query):
|
| 107 |
|
|
|
|
| 153 |
KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
|
| 154 |
)
|
| 155 |
|
| 156 |
+
encrypted_quant_tokens_hex = [token.hex()[500:580] for token in encrypted_tokens]
|
| 157 |
|
| 158 |
return {
|
| 159 |
+
output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=4),
|
| 160 |
anonymized_text_output: gr.update(visible=True, value=None),
|
| 161 |
identified_words_output_df: gr.update(visible=False, value=None),
|
| 162 |
}
|
|
|
|
| 522 |
)
|
| 523 |
|
| 524 |
with gr.Row():
|
| 525 |
+
with gr.Column(scale=5):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 526 |
original_sentences_box = gr.CheckboxGroup(
|
| 527 |
ORIGINAL_DOCUMENT,
|
| 528 |
value=ORIGINAL_DOCUMENT,
|
| 529 |
+
label="Contract:",
|
| 530 |
+
show_label=True,
|
| 531 |
)
|
| 532 |
|
| 533 |
+
with gr.Column(scale=1, min_width=6):
|
| 534 |
+
gr.HTML("<div style='height: 77px;'></div>")
|
| 535 |
+
encrypt_doc_btn = gr.Button("Encrypt the document")
|
| 536 |
+
|
| 537 |
+
with gr.Column(scale=5):
|
| 538 |
anonymized_doc_box = gr.Textbox(
|
| 539 |
+
label="Encrypted document:",
|
| 540 |
+
show_label=True, value=ANONYMIZED_DOCUMENT, interactive=False, lines=11
|
| 541 |
)
|
| 542 |
|
| 543 |
original_sentences_box.change(
|
| 544 |
+
fn=select_static_anonymized_sentences_fn,
|
| 545 |
inputs=[original_sentences_box],
|
| 546 |
outputs=[anonymized_doc_box],
|
| 547 |
)
|
|
|
|
| 551 |
gr.Markdown("<hr />")
|
| 552 |
gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
|
| 553 |
"""Please choose from the predefined options in
|
| 554 |
+
<span style='color:grey'>“Prompt examples”</span> or craft a custom question in
|
| 555 |
+
the <span style='color:grey'>“Customized prompt”</span> text box.
|
| 556 |
+
Remain concise and relevant to the context. Any off-topic query will not be processed.""")
|
|
|
|
|
|
|
|
|
|
| 557 |
|
| 558 |
with gr.Row():
|
| 559 |
with gr.Column(scale=5):
|
|
|
|
| 566 |
gr.Markdown("Or")
|
| 567 |
|
| 568 |
query_box = gr.Textbox(
|
| 569 |
+
value="What is Kate international bank account number?", label="CUSTOMIZED PROMPT:", interactive=True
|
| 570 |
)
|
| 571 |
|
| 572 |
default_query_box.change(
|
|
|
|
| 599 |
|
| 600 |
run_fhe_btn = gr.Button("Anonymize using FHE")
|
| 601 |
|
| 602 |
+
with gr.Row():
|
| 603 |
+
with gr.Column(scale=5):
|
| 604 |
+
|
| 605 |
+
anonymized_text_output = gr.Textbox(
|
| 606 |
+
label="Decrypted and anonymized document", lines=5, interactive=True
|
| 607 |
+
)
|
| 608 |
+
|
| 609 |
+
with gr.Column(scale=5):
|
| 610 |
+
|
| 611 |
+
anonymized_query_output = gr.Textbox(
|
| 612 |
+
label="Decrypted and anonymized prompt", lines=5, interactive=True
|
| 613 |
+
)
|
| 614 |
+
|
| 615 |
|
| 616 |
identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
|
| 617 |
|
|
|
|
| 635 |
########################## ChatGpt Part ##########################
|
| 636 |
|
| 637 |
gr.Markdown("<hr />")
|
| 638 |
+
gr.Markdown("## Step 4: Send anonymized prompt to ChatGPT")
|
| 639 |
gr.Markdown(
|
| 640 |
"""After securely anonymizing the query with FHE,
|
| 641 |
you can forward it to ChatGPT without having any concern about information leakage."""
|
|
|
|
| 644 |
chatgpt_button = gr.Button("Query ChatGPT")
|
| 645 |
|
| 646 |
with gr.Row():
|
| 647 |
+
chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=5)
|
| 648 |
chatgpt_response_deanonymized = gr.Textbox(
|
| 649 |
+
label="ChatGPT's non-anonymized response:", lines=5
|
| 650 |
)
|
| 651 |
|
| 652 |
chatgpt_button.click(
|
files/anonymized_document.txt
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
-
|
| 2 |
-
My credit card number is e5b499b0 and my crypto wallet id is ac41d58b.
|
| 3 |
|
| 4 |
-
|
| 5 |
|
| 6 |
-
|
| 7 |
|
| 8 |
-
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
Members: e3383f5b 70fc6ec5 and 2708cb61 cda521d5
|
|
|
|
| 2 |
|
| 3 |
+
Date: e381418b 3534158a, 96c403e5
|
| 4 |
|
| 5 |
+
Scope: 2708cb61 agrees to provide graphic design services to e3383f5b for the creation of a company logo.
|
| 6 |
|
| 7 |
+
Amount: Bob agrees to pay 2708cb61 500 upon completion and delivery of the logo.
|
| 8 |
|
| 9 |
+
Deadline: The logo design must be completed and delivered to Bob within 14 days of the contract signing date.
|
| 10 |
+
|
| 11 |
+
Payment terms: 2708cb61s international bank account N: 61294a43
|
files/encrypted_document.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
827c6bc946254b0ffe4695b4f0fc001f2383a4cf066b44312f9fa5a5733e6b6e097e69d08441b32ad93a3e84f95932e3142ce91a7502f5594eafdd6ccc2ad779e2e166ff0ac66e5e015ac64e5aac0979
|
| 2 |
+
a2a7ebe458d32843738fe5e550642bfb2c7efeb0f3b65892c25c0ed4a329f90e62dd97a58f6c852d0a4f2e7bae863381a325b679761570df764a0f6aec3f01a23516f0cee88972e5ac324f6af45d8e7ee
|
| 3 |
+
2bb53d60fd24b7e24334179c30624dd49eea11c2210e2b63d2a5fc5ea20766be019c68e061cbbad75bdb3255b1428b1dd46d361ec0261dfcbc3081d1e43aaf1bfde51f81f9036e728c3931a9e3ff4b37
|
| 4 |
+
3b05e1c4a0adc1a82b3d471990b60990d7d4e66132ed15fc8a18a129432e4c73c2bb74ecc89be49571090b5de8934661928c1e153dd8746858aa8ece4d59452159147f8b54da923a9eb99d1c8006b389
|
| 5 |
+
788027d896e0977b3012f6d50718153aac3b6642ad7e72383ecd9def3b8f4cc6c7e6851f4491eed1c5693b56dd9ac79f03fde97bf9ad0d3c6bc1c8f94e95901ceeb4a38893f2a189e63562b43a453c3a
|
| 6 |
+
426b376b438d9755946251b5da13f7585ab9557bec48f1e300c43cfa9e6f3f2bc1eef1a13d801161c8a59384914ffbd4da96b25dad84c51b77df73060a1319d72a2e5d4d7eea734ba72a1a6657aa93ba
|
files/mapping_clear_to_anonymized.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce0f400a2f644ddf99bcbc76f856afc1ad79055b1f01133a69e7617d257de98c
|
| 3 |
+
size 943
|
files/original_document.txt
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
-
|
| 2 |
-
My credit card number is 4095-2609-9393-4932 and my crypto wallet id is 16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ.
|
| 3 |
|
| 4 |
-
|
| 5 |
|
| 6 |
-
|
| 7 |
|
| 8 |
-
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
Members: David Johnson and Kate Hemingway
|
|
|
|
| 2 |
|
| 3 |
+
Date: February 06, 2000
|
| 4 |
|
| 5 |
+
Scope: Kate agrees to provide graphic design services to David for the creation of a company logo.
|
| 6 |
|
| 7 |
+
Amount: Bob agrees to pay Kate $500 upon completion and delivery of the logo.
|
| 8 |
|
| 9 |
+
Deadline: The logo design must be completed and delivered to Bob within 14 days of the contract signing date.
|
| 10 |
+
|
| 11 |
+
Payment terms: Kate’s international bank account N°: IL150120690000003111111
|
files/original_document_uuid_mapping.json
CHANGED
|
@@ -1,19 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
-
"
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"954567876544": "9eb07461",
|
| 11 |
-
"David": "ebe99761",
|
| 12 |
-
"IL150120690000003111111": "5ca977a4",
|
| 13 |
-
"International": "71d0f51c",
|
| 14 |
-
"Johnson": "53a9291d",
|
| 15 |
-
"Kate": "b474d794",
|
| 16 |
-
"Maine": "6337f12f",
|
| 17 |
-
"microsoft.com": "0d574451",
|
| 18 |
-
"[email protected]": "1f78e797"
|
| 19 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"06": "3534158a",
|
| 3 |
+
"2000": "96c403e5",
|
| 4 |
+
"David": "e3383f5b",
|
| 5 |
+
"February": "e381418b",
|
| 6 |
+
"Hemingway": "cda521d5",
|
| 7 |
+
"IL150120690000003111111": "61294a43",
|
| 8 |
+
"Johnson": "70fc6ec5",
|
| 9 |
+
"Kate": "2708cb61"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
}
|
utils_demo.py
CHANGED
|
@@ -18,7 +18,7 @@ from pathlib import Path
|
|
| 18 |
SERVER_URL = "http://localhost:8000/"
|
| 19 |
|
| 20 |
# Maximum length for user queries
|
| 21 |
-
MAX_USER_QUERY_LEN =
|
| 22 |
|
| 23 |
# Base Directories
|
| 24 |
CURRENT_DIR = Path(__file__).parent
|
|
@@ -38,15 +38,16 @@ LOGREG_MODEL_PATH = CURRENT_DIR / "models" / "cml_logreg.model"
|
|
| 38 |
ORIGINAL_FILE_PATH = DATA_PATH / "original_document.txt"
|
| 39 |
ANONYMIZED_FILE_PATH = DATA_PATH / "anonymized_document.txt"
|
| 40 |
MAPPING_UUID_PATH = DATA_PATH / "original_document_uuid_mapping.json"
|
| 41 |
-
|
|
|
|
| 42 |
PROMPT_PATH = DATA_PATH / "chatgpt_prompt.txt"
|
| 43 |
|
| 44 |
|
| 45 |
# List of example queries for easy access
|
| 46 |
DEFAULT_QUERIES = {
|
| 47 |
-
"Example Query 1": "
|
| 48 |
-
"Example Query 2": "
|
| 49 |
-
"Example Query 3": "
|
| 50 |
}
|
| 51 |
|
| 52 |
# Load tokenizer and model
|
|
|
|
| 18 |
SERVER_URL = "http://localhost:8000/"
|
| 19 |
|
| 20 |
# Maximum length for user queries
|
| 21 |
+
MAX_USER_QUERY_LEN = 128
|
| 22 |
|
| 23 |
# Base Directories
|
| 24 |
CURRENT_DIR = Path(__file__).parent
|
|
|
|
| 38 |
ORIGINAL_FILE_PATH = DATA_PATH / "original_document.txt"
|
| 39 |
ANONYMIZED_FILE_PATH = DATA_PATH / "anonymized_document.txt"
|
| 40 |
MAPPING_UUID_PATH = DATA_PATH / "original_document_uuid_mapping.json"
|
| 41 |
+
MAPPING_ANONYMIZED_SENTENCES_PATH = DATA_PATH / "mapping_clear_to_anonymized.pkl"
|
| 42 |
+
MAPPING_ENCRYPTED_SENTENCES_PATH = DATA_PATH / "mapping_clear_to_encrypted.pkl"
|
| 43 |
PROMPT_PATH = DATA_PATH / "chatgpt_prompt.txt"
|
| 44 |
|
| 45 |
|
| 46 |
# List of example queries for easy access
|
| 47 |
DEFAULT_QUERIES = {
|
| 48 |
+
"Example Query 1": "What is the amount of the contract between David and Kate?",
|
| 49 |
+
"Example Query 2": "What's the duration of the contract?",
|
| 50 |
+
"Example Query 3": "Does Kate have an international bank account?",
|
| 51 |
}
|
| 52 |
|
| 53 |
# Load tokenizer and model
|