Spaces:
Sleeping
Sleeping
MandarBhalerao
commited on
Add files via upload
Browse filesremoved all app related files outside
app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain_community.document_loaders import WebBaseLoader
|
3 |
+
|
4 |
+
from chains import Chain
|
5 |
+
# from portfolio import Portfolio
|
6 |
+
from utils import clean_text, extract_text_from_pdf
|
7 |
+
|
8 |
+
|
9 |
+
def create_streamlit_app(llm, clean_text):
|
10 |
+
st.title("📧 Welcome to Cold E-Mail Generator")
|
11 |
+
|
12 |
+
# PDF upload section
|
13 |
+
uploaded_file = st.file_uploader("Upload your resume as PDF", type=["pdf"])
|
14 |
+
pdf_text = extract_text_from_pdf(uploaded_file)
|
15 |
+
# if pdf_text:
|
16 |
+
# st.text_area("Extracted Text", value=pdf_text, height=300)
|
17 |
+
|
18 |
+
|
19 |
+
url_input = st.text_input("Enter the URL of Job Posting:", value="https://careers.myntra.com/job-detail/?id=7431200002")
|
20 |
+
submit_button = st.button("Generate E-mail")
|
21 |
+
|
22 |
+
if submit_button:
|
23 |
+
try:
|
24 |
+
loader = WebBaseLoader([url_input])
|
25 |
+
data = clean_text(loader.load().pop().page_content) # cleans any unnecessary garbage text
|
26 |
+
jobs = llm.extract_jobs(data) # create json objects for the job
|
27 |
+
for job in jobs: # this is for if one web page has multiple jobs
|
28 |
+
# skills = job.get('skills', [])
|
29 |
+
summarized_text = llm.summarize_pdf(pdf_text)
|
30 |
+
# st.text_area(summarized_text)
|
31 |
+
email = llm.write_mail(job, summarized_text) # write the email
|
32 |
+
# st.code(email, language='markdown')
|
33 |
+
st.text_area("Email is as follows", value=email, height=500)
|
34 |
+
|
35 |
+
# st.code('hello')
|
36 |
+
except Exception as e:
|
37 |
+
st.error(f"An Error Occurred: {e}")
|
38 |
+
|
39 |
+
|
40 |
+
if __name__ == "__main__":
|
41 |
+
chain = Chain()
|
42 |
+
# portfolio = Portfolio()
|
43 |
+
st.set_page_config(layout="wide", page_title="Cold Email Generator", page_icon="📧")
|
44 |
+
create_streamlit_app(chain, clean_text)
|
45 |
+
|
46 |
+
|
chains.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from langchain_groq import ChatGroq
|
3 |
+
from langchain_core.prompts import PromptTemplate
|
4 |
+
from langchain_core.output_parsers import JsonOutputParser
|
5 |
+
from langchain_core.exceptions import OutputParserException
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
import streamlit as st
|
9 |
+
|
10 |
+
GROQ_API_KEY = st.secrets["default"]["GROQ_API_KEY"]
|
11 |
+
|
12 |
+
# using this we can have a file called .env in your root folder where you can keep your API key.
|
13 |
+
# load_dotenv() # This will find the .env file and it will set the things in that file as your environment variable
|
14 |
+
|
15 |
+
# print(os.getenv("GROQ_API_KEY")) # just for testing
|
16 |
+
|
17 |
+
class Chain:
|
18 |
+
def __init__(self):
|
19 |
+
self.llm = ChatGroq(temperature=0, groq_api_key=GROQ_API_KEY, model_name="llama-3.1-70b-versatile")
|
20 |
+
# self.llm = ChatGroq(temperature=0, groq_api_key=os.getenv("GROQ_API_KEY"), model_name="llama-3.1-70b-versatile")
|
21 |
+
|
22 |
+
|
23 |
+
# function for extracting the job description and then passing it to a json parser to convert it to json
|
24 |
+
def extract_jobs(self, cleaned_text):
|
25 |
+
prompt_extract = PromptTemplate.from_template(
|
26 |
+
"""
|
27 |
+
### SCRAPED TEXT FROM WEBSITE:
|
28 |
+
{page_data}
|
29 |
+
### INSTRUCTION:
|
30 |
+
The scraped text is from the career's page of a website.
|
31 |
+
Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`.
|
32 |
+
Only return the valid JSON.
|
33 |
+
### VALID JSON (NO PREAMBLE):
|
34 |
+
"""
|
35 |
+
)
|
36 |
+
chain_extract = prompt_extract | self.llm
|
37 |
+
res = chain_extract.invoke(input={"page_data": cleaned_text})
|
38 |
+
try:
|
39 |
+
json_parser = JsonOutputParser()
|
40 |
+
res = json_parser.parse(res.content)
|
41 |
+
# Check if the result is a list and extract the first dictionary
|
42 |
+
# if isinstance(json_res, list):
|
43 |
+
# json_res = json_res[0]
|
44 |
+
|
45 |
+
except OutputParserException:
|
46 |
+
raise OutputParserException("Context too big. Unable to parse jobs.")
|
47 |
+
return res if isinstance(res, list) else [res]
|
48 |
+
|
49 |
+
|
50 |
+
def summarize_pdf(self, pdf_data):
|
51 |
+
prompt_extract = PromptTemplate.from_template(
|
52 |
+
"""
|
53 |
+
### PDF DATA OBTAINED FROM RESUME:
|
54 |
+
{pdf_data}
|
55 |
+
### INSTRUCTION:
|
56 |
+
The data is from the resume of a person.
|
57 |
+
Your job is to extract all the details of this person and summarize it in 200 words, which includes name, education, experience, projects, skills.
|
58 |
+
### (NO PREAMBLE):
|
59 |
+
"""
|
60 |
+
)
|
61 |
+
chain_extract = prompt_extract | self.llm # this will form a langchain chain ie you are getting a prompt and passing it to LLM
|
62 |
+
res2 = chain_extract.invoke(input={'pdf_data':pdf_data})
|
63 |
+
# print(res.content)
|
64 |
+
summary = res2.content
|
65 |
+
return summary
|
66 |
+
|
67 |
+
def write_mail(self, job_description, summary):
|
68 |
+
prompt_email = PromptTemplate.from_template(
|
69 |
+
"""
|
70 |
+
### JOB DESCRIPTION:
|
71 |
+
This is a job description
|
72 |
+
|
73 |
+
{job_description}
|
74 |
+
|
75 |
+
### INSTRUCTION:
|
76 |
+
These are the person's details.
|
77 |
+
{summary}
|
78 |
+
Consider yourself as this person.
|
79 |
+
|
80 |
+
Introduce yourself in an engaging way from above with your name from the above details and your current designation.
|
81 |
+
|
82 |
+
Try to find some things in the job description which are similar with your details. Mention those things which are similar.
|
83 |
+
Do not mention anything which is not present in the details.
|
84 |
+
|
85 |
+
Your job is to write a cold email of about 250 words to the hiring manager regarding the job mentioned above describing the capability of you
|
86 |
+
in fulfilling their needs. The cold email must be engaging to read.
|
87 |
+
End the email with Name and Current place where your are working or studying.
|
88 |
+
Do not provide a preamble.
|
89 |
+
### EMAIL (NO PREAMBLE):
|
90 |
+
|
91 |
+
"""
|
92 |
+
)
|
93 |
+
chain_email = prompt_email | self.llm
|
94 |
+
res = chain_email.invoke({"job_description": str(job_description), "summary": summary})
|
95 |
+
return res.content
|
96 |
+
|
97 |
+
# if __name__ == "__main__":
|
98 |
+
# print(os.getenv("GROQ_API_KEY"))
|
utils.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import pdfplumber
|
3 |
+
|
4 |
+
|
5 |
+
# remove not required things and clean the text
|
6 |
+
def clean_text(text):
|
7 |
+
# Remove HTML tags
|
8 |
+
text = re.sub(r'<[^>]*?>', '', text)
|
9 |
+
# Remove URLs
|
10 |
+
text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
|
11 |
+
# Remove special characters
|
12 |
+
text = re.sub(r'[^a-zA-Z0-9 ]', '', text)
|
13 |
+
# Replace multiple spaces with a single space
|
14 |
+
text = re.sub(r'\s{2,}', ' ', text)
|
15 |
+
# Trim leading and trailing whitespace
|
16 |
+
text = text.strip()
|
17 |
+
# Remove extra whitespace
|
18 |
+
text = ' '.join(text.split())
|
19 |
+
return text
|
20 |
+
|
21 |
+
def extract_text_from_pdf(uploaded_file):
|
22 |
+
if uploaded_file is not None:
|
23 |
+
with pdfplumber.open(uploaded_file) as pdf:
|
24 |
+
pages = [page.extract_text() for page in pdf.pages]
|
25 |
+
return "\n".join(pages) if pages else ""
|
26 |
+
return ""
|