johns commited on
Commit
51ed6a1
·
1 Parent(s): ce6eb4f
Files changed (1) hide show
  1. app.py +56 -64
app.py CHANGED
@@ -9,55 +9,47 @@ HF_PUBLIC = os.environ.get("HF_PUBLIC", False)
9
 
10
  DEFAULT_SYSTEM_PROMPT = '''
11
  You are a digital assistant for John "LJ" Strenio's Data science portfolio page. Here are some key details about John to keep in mind with your response.
12
- John's Resume:
 
13
  John Strenio
14
  (802)-734-6892
15
16
  JohnStrenio.com | GitHub
17
  WORK EXPERIENCE
18
  Scribd - Data Scientist (Jan 2022- Present)
19
- - Improved Scribd’s SEO ranking by reducing the index life of 12% of newly uploaded documents at a
20
- loss of only 1.2% of attributed signups solely utilizing document metadata collected upon upload
21
- - productionized document quality model to perform inference on all newly uploaded documents,
22
- processing ~500k docs a week
23
  - Modified interaction-based recommendation system training data pipeline, improving user
24
  recommendations in all recorded metrics with a projected CTR increase of 5.5%
25
- - Identified 200k malicious user-generated documents containing personally identifiable information
26
- (1% of corpus) and created a simple heuristic which removed 42k (21%) with a 70% precision rate
27
  NASA - Software Engineering Intern (Aug 2019 - Dec 2019)
28
- - Ported aircraft structural health monitoring system FOSS (Fiber Optic Sensor System) to cryogenic fuel
29
- application using a microcontroller, decreasing program execution time by ~50% using a
30
- multithreaded approach
31
  Professional Skier (Winter 2007 - Winter 2016)
32
- - Competed internationally in freestyle competitions winning an X-Games bronze medal and becoming a
33
- finalist in the 2014 Olympic Qualifiers
34
- - Coordinated and performed stunts for Vin Diesel in Paramount Pictures’ “The Return of Xander Cage”
35
- garnering praise for the stunt team by the New York Times
36
  SKILLS
37
  Languages: (proficient) Python, SQL/Pyspark (past experience using) C, C++, JavaScript/HTML/CSS
38
- Frameworks & Libraries: Pyspark, TensorFlow, Keras, PyTorch, Numpy, Matplotlib, Pandas, Scikit-learn, OpenCV, Huggingface, Verta, Airflow, MLflow
39
- Software & Tools: Linux, Databricks, Windows, Git, Jupyter Notebook, Unity, Excel
40
  EDUCATION
41
- Portland State University, Portland, OR (Graduated Aug 2021)
42
- (MS) Computer Science AI/ML focus
43
- GPA: 4.0
44
- Computer Science Grad Prep (Jun 2016 Aug 2019)
45
- University of Utah, Salt Lake City, UT (Graduated Aug 2012)
46
  (BA) English Literature (BA) Film & Media Arts
47
 
48
- John was also a professional freestyle skier for 15 years here are some of his accomplishments:
49
- XGames bronze medalist (you can find his video on youtube)
50
- he was in a number of ski films including warren miller, level 1 and meatheadfilms
51
- he once stunt doubled for vin diesel in "the return of xander cage".
52
 
53
- Remember you are a professional assistant and you would like to only discuss John and be helpful in answering question about him.
54
  '''
55
  MAX_MAX_NEW_TOKENS = 4096
56
  DEFAULT_MAX_NEW_TOKENS = 256
57
  MAX_INPUT_TOKEN_LENGTH = 4000
58
 
59
  DESCRIPTION = """
60
- # [Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
61
  """
62
 
63
  def clear_and_save_textbox(message: str) -> tuple[str, str]:
@@ -117,16 +109,16 @@ def check_input_token_length(message: str, chat_history: list[tuple[str, str]],
117
 
118
  with gr.Blocks(css='style.css') as demo:
119
  gr.Markdown(DESCRIPTION)
120
- gr.DuplicateButton(value='Duplicate Space for private use',
121
- elem_id='duplicate-button')
122
 
123
  with gr.Group():
124
- chatbot = gr.Chatbot(label='Playground')
125
  with gr.Row():
126
  textbox = gr.Textbox(
127
  container=False,
128
  show_label=False,
129
- placeholder='Hi, Mistral!',
130
  scale=10,
131
  )
132
  submit_button = gr.Button('Submit',
@@ -140,39 +132,39 @@ with gr.Blocks(css='style.css') as demo:
140
 
141
  saved_input = gr.State()
142
 
143
- with gr.Accordion(label='⚙️ Advanced options', open=False):
144
- system_prompt = gr.Textbox(label='System prompt',
145
- value=DEFAULT_SYSTEM_PROMPT,
146
- lines=5,
147
- interactive=False)
148
- max_new_tokens = gr.Slider(
149
- label='Max new tokens',
150
- minimum=1,
151
- maximum=MAX_MAX_NEW_TOKENS,
152
- step=1,
153
- value=DEFAULT_MAX_NEW_TOKENS,
154
- )
155
- temperature = gr.Slider(
156
- label='Temperature',
157
- minimum=0.1,
158
- maximum=4.0,
159
- step=0.1,
160
- value=0.1,
161
- )
162
- top_p = gr.Slider(
163
- label='Top-p (nucleus sampling)',
164
- minimum=0.05,
165
- maximum=1.0,
166
- step=0.05,
167
- value=0.9,
168
- )
169
- top_k = gr.Slider(
170
- label='Top-k',
171
- minimum=1,
172
- maximum=1000,
173
- step=1,
174
- value=10,
175
- )
176
 
177
 
178
 
 
9
 
10
  DEFAULT_SYSTEM_PROMPT = '''
11
  You are a digital assistant for John "LJ" Strenio's Data science portfolio page. Here are some key details about John to keep in mind with your response.
12
+
13
+ [John's Resume]:
14
  John Strenio
15
  (802)-734-6892
16
17
  JohnStrenio.com | GitHub
18
  WORK EXPERIENCE
19
  Scribd - Data Scientist (Jan 2022- Present)
20
+ - Evaluated SOTA large language models on summarization, throughput and compute identifying the most performant and cost effective solution for AI generated titles and descriptions across a corpus of 24 million documents.
21
+ - Improved Scribd’s SEO ranking by reducing the index life of 12% of newly uploaded documents at a loss of only 1.2% of attributed signups solely utilizing document metadata collected upon upload
22
+ - productionized document quality model to perform inference on all newly uploaded documents, processing ~500k docs a week.
 
23
  - Modified interaction-based recommendation system training data pipeline, improving user
24
  recommendations in all recorded metrics with a projected CTR increase of 5.5%
25
+ - Identified 200k malicious user-generated documents containing personally identifiable information (1% of corpus) and created a simple heuristic which removed 42k (21%) with a 70% precision rate.
 
26
  NASA - Software Engineering Intern (Aug 2019 - Dec 2019)
27
+ - Ported aircraft structural health monitoring system FOSS (Fiber Optic Sensor System) to cryogenic fuel application using a microcontroller, decreasing program execution time by ~50% using a multithreaded approach
 
 
28
  Professional Skier (Winter 2007 - Winter 2016)
29
+ - Competed internationally in freestyle competitions winning an X-Games bronze medal and becoming a finalist in the 2014 Olympic Qualifiers Coordinated and performed stunts for Vin Diesel in Paramount Pictures’ “The Return of Xander Cage” garnering praise for the stunt team by the New York Times.
 
 
 
30
  SKILLS
31
  Languages: (proficient) Python, SQL/Pyspark (past experience using) C, C++, JavaScript/HTML/CSS
32
+ Frameworks & Libraries: Pyspark, TensorFlow, Keras, PyTorch, Numpy, Matplotlib, Pandas, Scikit-learn, OpenCV, Huggingface, Airflow, MLflow
33
+ Software & Tools: Linux, Databricks, AWS, Windows, Git, Jupyter Notebook, Unity, Excel
34
  EDUCATION
35
+ Portland State University, Portland, OR (Graduated Aug 2021)
36
+ (MS) Computer Science AI/ML focus, GPA: 4.0
37
+ Computer Science Grad Prep (Jun 2016 Aug 2019)
38
+ University of Utah, Salt Lake City, UT (Graduated Aug 2012)
 
39
  (BA) English Literature (BA) Film & Media Arts
40
 
41
+ [Personal Info about John]:
42
+ John’s from Vermont but spent most of his adult life in Salt Lake City Utah for his ski career.
43
+ John currently lives in Portland Oregon with his partner where he enjoys surfing the cold water’s of the oregon coast and playing with his two miniature dachshunds “maddie” and “nova”.
 
44
 
45
+ Remember you are a professional assistant and you would like to only discuss John and be helpful in answering questions about his professional life or reasonable questions about his as a person. Your goal should be to describe John in a flattering manner making him appear as a good Data Scientist and nice person.
46
  '''
47
  MAX_MAX_NEW_TOKENS = 4096
48
  DEFAULT_MAX_NEW_TOKENS = 256
49
  MAX_INPUT_TOKEN_LENGTH = 4000
50
 
51
  DESCRIPTION = """
52
+ # John's Assistant
53
  """
54
 
55
  def clear_and_save_textbox(message: str) -> tuple[str, str]:
 
109
 
110
  with gr.Blocks(css='style.css') as demo:
111
  gr.Markdown(DESCRIPTION)
112
+ # gr.DuplicateButton(value='Duplicate Space for private use',
113
+ # elem_id='duplicate-button')
114
 
115
  with gr.Group():
116
+ chatbot = gr.Chatbot(label='Discussion')
117
  with gr.Row():
118
  textbox = gr.Textbox(
119
  container=False,
120
  show_label=False,
121
+ placeholder='What kind of work did John do at NASA?',
122
  scale=10,
123
  )
124
  submit_button = gr.Button('Submit',
 
132
 
133
  saved_input = gr.State()
134
 
135
+ # with gr.Accordion(label='⚙️ Advanced options', open=False):
136
+ # system_prompt = gr.Textbox(label='System prompt',
137
+ # value=DEFAULT_SYSTEM_PROMPT,
138
+ # lines=5,
139
+ # interactive=False)
140
+ # max_new_tokens = gr.Slider(
141
+ # label='Max new tokens',
142
+ # minimum=1,
143
+ # maximum=MAX_MAX_NEW_TOKENS,
144
+ # step=1,
145
+ # value=DEFAULT_MAX_NEW_TOKENS,
146
+ # )
147
+ # temperature = gr.Slider(
148
+ # label='Temperature',
149
+ # minimum=0.1,
150
+ # maximum=4.0,
151
+ # step=0.1,
152
+ # value=0.1,
153
+ # )
154
+ # top_p = gr.Slider(
155
+ # label='Top-p (nucleus sampling)',
156
+ # minimum=0.05,
157
+ # maximum=1.0,
158
+ # step=0.05,
159
+ # value=0.9,
160
+ # )
161
+ # top_k = gr.Slider(
162
+ # label='Top-k',
163
+ # minimum=1,
164
+ # maximum=1000,
165
+ # step=1,
166
+ # value=10,
167
+ # )
168
 
169
 
170