kevinhug commited on
Commit
a1d18f3
·
1 Parent(s): 29221b5

fine tune LLM

Browse files
Files changed (3) hide show
  1. app.py +53 -30
  2. banks_txt_like.pkl +3 -0
  3. requirements.txt +2 -1
app.py CHANGED
@@ -2,11 +2,6 @@ import gradio as gr
2
  # from langchain.vectorstores import Chroma
3
 
4
 
5
- import chromadb
6
- client = chromadb.PersistentClient(path="chroma.db")
7
-
8
- db = client.get_collection(name="banks")
9
-
10
  '''
11
  https://huggingface.co/spaces/kevinhug/clientX
12
 
@@ -17,31 +12,53 @@ counter="""
17
  <div class="elfsight-app-5f3e8eb9-9103-490e-9999-e20aa4157dc7" data-elfsight-app-lazy></div>
18
  """
19
 
 
 
 
 
 
 
 
 
20
  def similar(issue):
21
  global db
22
  docs = db.query(query_texts=issue, n_results=5)
23
  return docs
24
 
25
-
 
 
 
 
 
 
 
26
  '''
27
  https://www.gradio.app/docs/interface
28
  '''
29
- iface = gr.Interface(fn=similar, inputs="text", outputs="json",
30
- title="Enhancing Customer Engagement and Operational Efficiency with Semantic Similarity Document Search (SSDS)",
31
- examples=[["having bad client experience"],
32
- ["having credit card problem"],
33
- ["late payment fee"],
34
- ["credit score dropping"]],
35
- description="""
 
 
 
36
  Data Scientist: Kevin Wong, [email protected], 416-903-7937
37
- ============
38
- open source ml bank dataset
39
  https://www.kaggle.com/datasets/trainingdatapro/20000-customers-reviews-on-banks/?select=Banks.csv
 
 
 
 
40
 
41
- Using Sentence Embedding to inject Public ML Banks Text Dataset @ https://github.com/kevinwkc/analytics/blob/master/ai/vectorDB.py""",
42
- custom_js=counter,
43
- article="""
44
-
45
  Description:
46
  =======
47
  In today's dynamic financial landscape, the Semantic Similarity Document Search (SSDS) capability is a practical innovation to improve client experience, marketing leads, and sentiment analysis. As a Data Scientist with a decades in the financial industry, I see the value of SSDS in action.
@@ -71,15 +88,21 @@ With no need for jargon, SSDS delivers tangible value to our fintech operations.
71
  ### issue:
72
  - upset customer
73
 
74
- Future Improvement
75
- ============
76
- tuning the distance for use case
77
-
78
- <script src="https://static.elfsight.com/platform/platform.js" data-use-service-core defer></script>
79
- <div class="elfsight-app-5f3e8eb9-9103-490e-9999-e20aa4157dc7" data-elfsight-app-lazy></div>
80
- """)
81
- iface.launch()
 
 
 
 
 
 
 
 
 
82
 
83
- iface2 = gr.Interface(fn=similar, inputs="text", outputs="json",
84
- title="testing")
85
- iface.launch()
 
2
  # from langchain.vectorstores import Chroma
3
 
4
 
 
 
 
 
 
5
  '''
6
  https://huggingface.co/spaces/kevinhug/clientX
7
 
 
12
  <div class="elfsight-app-5f3e8eb9-9103-490e-9999-e20aa4157dc7" data-elfsight-app-lazy></div>
13
  """
14
 
15
+ '''
16
+ SIMILAR VECTOR DB SEARCH
17
+ '''
18
+ import chromadb
19
+ client = chromadb.PersistentClient(path="chroma.db")
20
+
21
+ db = client.get_collection(name="banks")
22
+
23
  def similar(issue):
24
  global db
25
  docs = db.query(query_texts=issue, n_results=5)
26
  return docs
27
 
28
+ '''
29
+ FINE TUNE LLM LIKE SCORE
30
+ '''
31
+ from fastai.vision.all import *
32
+ learn = load_learner('banks_txt_like.pkl')
33
+ def like(issue):
34
+ pred,idx,probs = learn.predict(issue)
35
+ return pred
36
  '''
37
  https://www.gradio.app/docs/interface
38
  '''
39
+
40
+
41
+
42
+
43
+ with gr.Blocks() as demo:
44
+ gr.Markdown("""Enhancing Customer Engagement and Operational Efficiency with NLP
45
+ =========
46
+ LLM
47
+ Semantic Similarity Document Search (SSDS)
48
+
49
  Data Scientist: Kevin Wong, [email protected], 416-903-7937
50
+
51
+ Open source ml bank dataset
52
  https://www.kaggle.com/datasets/trainingdatapro/20000-customers-reviews-on-banks/?select=Banks.csv
53
+ """)
54
+ with gr.Tab("Semantic Similarity Document Search (SSDS)"):
55
+ in_similar = gr.Textbox(placeholder="having credit card problem")
56
+ out_similar = gr.JSON()
57
 
58
+ btn_similar = gr.Button("Find Similar Verbatim")
59
+ btn_similar.click(fn=similar, inputs=in_similar, outputs=out_similar)
60
+
61
+ gr.Markdown("""
62
  Description:
63
  =======
64
  In today's dynamic financial landscape, the Semantic Similarity Document Search (SSDS) capability is a practical innovation to improve client experience, marketing leads, and sentiment analysis. As a Data Scientist with a decades in the financial industry, I see the value of SSDS in action.
 
88
  ### issue:
89
  - upset customer
90
 
91
+ Using Sentence Embedding to inject Public ML Banks Text Dataset @ https://github.com/kevinwkc/analytics/blob/master/ai/vectorDB.py
92
+ """)
93
+
94
+ with gr.Tab("Fine Tune LLM")
95
+ in_like = gr.Textbox(placeholder="having credit card problem")
96
+ out_like = gr.Textbox(placeholder="like score")
97
+
98
+ btn_like = gr.Button("Find Like Score")
99
+ btn_like.click(fn=like, inputs=in_like, outputs=out_like)
100
+
101
+
102
+ with gr.Accordion("Future Improvement"):
103
+ gr.Markdown("""
104
+ tuning the distance for use case
105
+ """)
106
+
107
+ demo.launch()
108
 
 
 
 
banks_txt_like.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3e0db2b1e176931dde3f5172bb57aac30df4fe3521b80dbf330be74f0dde368
3
+ size 130662474
requirements.txt CHANGED
@@ -1 +1,2 @@
1
- chromadb
 
 
1
+ chromadb
2
+ fastai