Spaces:

spine-crook
/

cold-email-generator

Sleeping

App Files Files Community

MandarBhalerao commited on Sep 12, 2024

Commit

9630cd6

unverified ·

1 Parent(s): 17ae79e

Add files via upload

Browse files

removed all app related files outside

Files changed (3) hide show

app.py +46 -0
chains.py +98 -0
utils.py +26 -0

app.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import streamlit as st
+from langchain_community.document_loaders import WebBaseLoader
+from chains import Chain
+# from portfolio import Portfolio
+from utils import clean_text, extract_text_from_pdf
+def create_streamlit_app(llm, clean_text):
+    st.title("📧 Welcome to Cold E-Mail Generator")
+    # PDF upload section
+    uploaded_file = st.file_uploader("Upload your resume as PDF", type=["pdf"])
+    pdf_text = extract_text_from_pdf(uploaded_file)
+    # if pdf_text:
+    #     st.text_area("Extracted Text", value=pdf_text, height=300)
+    url_input = st.text_input("Enter the URL of Job Posting:", value="https://careers.myntra.com/job-detail/?id=7431200002")
+    submit_button = st.button("Generate E-mail")
+    if submit_button:
+        try:
+            loader = WebBaseLoader([url_input])
+            data = clean_text(loader.load().pop().page_content)   # cleans any unnecessary garbage text
+            jobs = llm.extract_jobs(data)                         # create json objects for the job
+            for job in jobs:                                      # this is for if one web page has multiple jobs
+                # skills = job.get('skills', [])
+                summarized_text = llm.summarize_pdf(pdf_text)
+                # st.text_area(summarized_text)
+                email = llm.write_mail(job, summarized_text)             # write the email
+                # st.code(email, language='markdown')
+                st.text_area("Email is as follows", value=email, height=500)
+                # st.code('hello')
+        except Exception as e:
+            st.error(f"An Error Occurred: {e}")
+if __name__ == "__main__":
+    chain = Chain()
+    # portfolio = Portfolio()
+    st.set_page_config(layout="wide", page_title="Cold Email Generator", page_icon="📧")
+    create_streamlit_app(chain, clean_text)

chains.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import os
+from langchain_groq import ChatGroq
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.exceptions import OutputParserException
+from dotenv import load_dotenv
+import streamlit as st
+GROQ_API_KEY = st.secrets["default"]["GROQ_API_KEY"]
+# using this we can have a file called .env in your root folder where you can keep your API key.
+# load_dotenv()   # This will find the .env file and it will set the things in that file as your environment variable
+# print(os.getenv("GROQ_API_KEY"))        # just for testing
+class Chain:
+    def __init__(self):
+        self.llm = ChatGroq(temperature=0, groq_api_key=GROQ_API_KEY, model_name="llama-3.1-70b-versatile")
+        # self.llm = ChatGroq(temperature=0, groq_api_key=os.getenv("GROQ_API_KEY"), model_name="llama-3.1-70b-versatile")
+    # function for extracting the job description and then passing it to a json parser to convert it to json
+    def extract_jobs(self, cleaned_text):
+        prompt_extract = PromptTemplate.from_template(
+            """
+            ### SCRAPED TEXT FROM WEBSITE:
+            {page_data}
+            ### INSTRUCTION:
+            The scraped text is from the career's page of a website.
+            Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`.
+            Only return the valid JSON.
+            ### VALID JSON (NO PREAMBLE):
+            """
+        )
+        chain_extract = prompt_extract | self.llm
+        res = chain_extract.invoke(input={"page_data": cleaned_text})
+        try:
+            json_parser = JsonOutputParser()
+            res = json_parser.parse(res.content)
+            # Check if the result is a list and extract the first dictionary
+            # if isinstance(json_res, list):
+            #     json_res = json_res[0]
+        except OutputParserException:
+            raise OutputParserException("Context too big. Unable to parse jobs.")
+        return res if isinstance(res, list) else [res]
+    def summarize_pdf(self, pdf_data):
+        prompt_extract = PromptTemplate.from_template(
+        """
+        ### PDF DATA OBTAINED FROM RESUME:
+        {pdf_data}
+        ### INSTRUCTION:
+        The data is from the resume of a person.
+        Your job is to extract all the details of this person and summarize it in 200 words, which includes name, education, experience, projects, skills.
+        ### (NO PREAMBLE):
+        """
+        )
+        chain_extract = prompt_extract | self.llm    # this will form a langchain chain ie you are getting a prompt and passing it to LLM
+        res2 = chain_extract.invoke(input={'pdf_data':pdf_data})
+        # print(res.content)
+        summary = res2.content
+        return summary
+    def write_mail(self, job_description, summary):
+        prompt_email = PromptTemplate.from_template(
+        """
+        ### JOB DESCRIPTION:
+        This is a job description
+        {job_description}
+        ### INSTRUCTION:
+        These are the person's details.
+        {summary}
+        Consider yourself as this person.
+        Introduce yourself in an engaging way from above with your name from the above details and your current designation.
+        Try to find some things in the job description which are similar with your details. Mention those things which are similar.
+        Do not mention anything which is not present in the details.
+        Your job is to write a cold email of about 250 words to the hiring manager regarding the job mentioned above describing the capability of you
+        in fulfilling their needs. The cold email must be engaging to read.
+        End the email with Name and Current place where your are working or studying.
+        Do not provide a preamble.
+        ### EMAIL (NO PREAMBLE):
+        """
+        )
+        chain_email = prompt_email | self.llm
+        res = chain_email.invoke({"job_description": str(job_description), "summary": summary})
+        return res.content
+# if __name__ == "__main__":
+#     print(os.getenv("GROQ_API_KEY"))

utils.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import re
+import pdfplumber
+# remove not required things and clean the text
+def clean_text(text):
+    # Remove HTML tags
+    text = re.sub(r'<[^>]*?>', '', text)
+    # Remove URLs
+    text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
+    # Remove special characters
+    text = re.sub(r'[^a-zA-Z0-9 ]', '', text)
+    # Replace multiple spaces with a single space
+    text = re.sub(r'\s{2,}', ' ', text)
+    # Trim leading and trailing whitespace
+    text = text.strip()
+    # Remove extra whitespace
+    text = ' '.join(text.split())
+    return text
+def extract_text_from_pdf(uploaded_file):
+    if uploaded_file is not None:
+        with pdfplumber.open(uploaded_file) as pdf:
+            pages = [page.extract_text() for page in pdf.pages]
+        return "\n".join(pages) if pages else ""
+    return ""