Spaces:
Build error
Build error
Commit
·
174db76
1
Parent(s):
e89f604
Update app.py
Browse files
app.py
CHANGED
@@ -4,9 +4,180 @@ import math
|
|
4 |
sentiment_model = pipeline("text-classification", model="AhmedTaha012/managersFeedback-V1.0.7")
|
5 |
increase_decrease_model = pipeline("text-classification", model="AhmedTaha012/nextQuarter-status-V1.1.9")
|
6 |
ner_model = pipeline("token-classification", model="AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
st.title("Transcript Analysis")
|
9 |
transcript = st.text_area("Enter the transcript:", height=200)
|
|
|
|
|
|
|
|
|
|
|
10 |
tokens=transcript.split()
|
11 |
splitSize=256
|
12 |
chunks=[tokens[r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens)/splitSize))]
|
|
|
4 |
sentiment_model = pipeline("text-classification", model="AhmedTaha012/managersFeedback-V1.0.7")
|
5 |
increase_decrease_model = pipeline("text-classification", model="AhmedTaha012/nextQuarter-status-V1.1.9")
|
6 |
ner_model = pipeline("token-classification", model="AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
|
7 |
+
def getSpeakers(data):
|
8 |
+
if "Speakers" in data:
|
9 |
+
return "\n".join([x for x in data.split("Speakers")[-1].split("\n") if "--" in x])
|
10 |
+
elif "Call participants" in data:
|
11 |
+
return "\n".join([x for x in data.split("Call participants")[-1].split("\n") if "--" in x])
|
12 |
+
elif "Call Participants" in data:
|
13 |
+
return "\n".join([x for x in data.split("Call Participants")[-1].split("\n") if "--" in x])
|
14 |
+
def removeSpeakers(data):
|
15 |
+
if "Speakers" in data:
|
16 |
+
return data.split("Speakers")[0]
|
17 |
+
elif "Call participants" in data:
|
18 |
+
return data.split("Call participants")[0]
|
19 |
+
elif "Call Participants" in data:
|
20 |
+
return data.split("Call Participants")[0]
|
21 |
+
def getQA(data):
|
22 |
+
if "Questions and Answers" in data:
|
23 |
+
return data.split("Questions and Answers")[-1]
|
24 |
+
elif "Questions & Answers" in data:
|
25 |
+
return data.split("Questions & Answers")[-1]
|
26 |
+
elif "Q&A" in data:
|
27 |
+
return data.split("Q&A")[-1]
|
28 |
+
else:
|
29 |
+
return ""
|
30 |
+
def removeQA(data):
|
31 |
+
if "Questions and Answers" in data:
|
32 |
+
return data.split("Questions and Answers")[0]
|
33 |
+
elif "Questions & Answers" in data:
|
34 |
+
return data.split("Questions & Answers")[0]
|
35 |
+
elif "Q&A" in data:
|
36 |
+
return data.split("Q&A")[0]
|
37 |
+
else:
|
38 |
+
return ""
|
39 |
+
def clean_and_preprocess(text):
|
40 |
+
text=[x for x in text.split("\n") if len(x)>100]
|
41 |
+
l=[]
|
42 |
+
for t in text:
|
43 |
+
# Convert to lowercase
|
44 |
+
t = t.lower()
|
45 |
+
# Tokenize text into words
|
46 |
+
words = nltk.word_tokenize(t)
|
47 |
+
# Remove stopwords
|
48 |
+
stop_words = set(stopwords.words('english'))
|
49 |
+
filtered_words = [word for word in words if word not in stop_words]
|
50 |
+
|
51 |
+
# Join the words back into a cleaned text
|
52 |
+
cleaned_text = ' '.join(filtered_words)
|
53 |
+
l.append(cleaned_text)
|
54 |
+
return "\n".join(l)
|
55 |
+
def replace_abbreviations(text):
|
56 |
+
|
57 |
+
replacements = {
|
58 |
+
'Q1': 'first quarter',
|
59 |
+
'Q2': 'second quarter',
|
60 |
+
'Q3': 'third quarter',
|
61 |
+
'Q4': 'fourth quarter',
|
62 |
+
'q1': 'first quarter',
|
63 |
+
'q2': 'second quarter',
|
64 |
+
'q3': 'third quarter',
|
65 |
+
'q4': 'fourth quarter',
|
66 |
+
'FY': 'fiscal year',
|
67 |
+
'YoY': 'year over year',
|
68 |
+
'MoM': 'month over month',
|
69 |
+
'EBITDA': 'earnings before interest, taxes, depreciation, and amortization',
|
70 |
+
'ROI': 'return on investment',
|
71 |
+
'EPS': 'earnings per share',
|
72 |
+
'P/E': 'price-to-earnings',
|
73 |
+
'DCF': 'discounted cash flow',
|
74 |
+
'CAGR': 'compound annual growth rate',
|
75 |
+
'GDP': 'gross domestic product',
|
76 |
+
'CFO': 'chief financial officer',
|
77 |
+
'GAAP': 'generally accepted accounting principles',
|
78 |
+
'SEC': 'U.S. Securities and Exchange Commission',
|
79 |
+
'IPO': 'initial public offering',
|
80 |
+
'M&A': 'mergers and acquisitions',
|
81 |
+
'EBIT': 'earnings before interest and taxes',
|
82 |
+
'IRR': 'internal rate of return',
|
83 |
+
'ROA': 'return on assets',
|
84 |
+
'ROE': 'return on equity',
|
85 |
+
'NAV': 'net asset value',
|
86 |
+
'PE ratio': 'price-to-earnings ratio',
|
87 |
+
'EPS growth': 'earnings per share growth',
|
88 |
+
'Fiscal Year': 'financial year',
|
89 |
+
'CAPEX': 'capital expenditure',
|
90 |
+
'APR': 'annual percentage rate',
|
91 |
+
'P&L': 'profit and loss',
|
92 |
+
'NPM': 'net profit margin',
|
93 |
+
'EBT': 'earnings before taxes',
|
94 |
+
'EBITDAR': 'earnings before interest, taxes, depreciation, amortization, and rent',
|
95 |
+
'PAT': 'profit after tax',
|
96 |
+
'COGS': 'cost of goods sold',
|
97 |
+
'EBTIDA': 'earnings before taxes, interest, depreciation, and amortization',
|
98 |
+
'E&Y': 'Ernst & Young',
|
99 |
+
'B2B': 'business to business',
|
100 |
+
'B2C': 'business to consumer',
|
101 |
+
'LIFO': 'last in, first out',
|
102 |
+
'FIFO': 'first in, first out',
|
103 |
+
'FCF': 'free cash flow',
|
104 |
+
'LTM': 'last twelve months',
|
105 |
+
'OPEX': 'operating expenses',
|
106 |
+
'TSR': 'total shareholder return',
|
107 |
+
'PP&E': 'property, plant, and equipment',
|
108 |
+
'PBT': 'profit before tax',
|
109 |
+
'EBITDAR margin': 'earnings before interest, taxes, depreciation, amortization, and rent margin',
|
110 |
+
'ROIC': 'return on invested capital',
|
111 |
+
'EPS': 'earnings per share',
|
112 |
+
'P/E': 'price-to-earnings',
|
113 |
+
'EBITDA': 'earnings before interest, taxes, depreciation, and amortization',
|
114 |
+
'YOY': 'year-over-year',
|
115 |
+
'MOM': 'month-over-month',
|
116 |
+
'CAGR': 'compound annual growth rate',
|
117 |
+
'GDP': 'gross domestic product',
|
118 |
+
'ROI': 'return on investment',
|
119 |
+
'ROE': 'return on equity',
|
120 |
+
'EBIT': 'earnings before interest and taxes',
|
121 |
+
'DCF': 'discounted cash flow',
|
122 |
+
'GAAP': 'Generally Accepted Accounting Principles',
|
123 |
+
'LTM': 'last twelve months',
|
124 |
+
'EBIT margin': 'earnings before interest and taxes margin',
|
125 |
+
'EBT': 'earnings before taxes',
|
126 |
+
'EBTA': 'earnings before taxes and amortization',
|
127 |
+
'FTE': 'full-time equivalent',
|
128 |
+
'EBIDTA': 'earnings before interest, depreciation, taxes, and amortization',
|
129 |
+
'EBTIDA': 'earnings before taxes, interest, depreciation, and amortization',
|
130 |
+
'EBITDAR': 'earnings before interest, taxes, depreciation, amortization, and rent',
|
131 |
+
'COGS': 'cost of goods sold',
|
132 |
+
'APR': 'annual percentage rate',
|
133 |
+
'PESTEL': 'Political, Economic, Social, Technological, Environmental, and Legal',
|
134 |
+
'KPI': 'key performance indicator',
|
135 |
+
'SWOT': 'Strengths, Weaknesses, Opportunities, Threats',
|
136 |
+
'CAPEX': 'capital expenditures',
|
137 |
+
'EBITDARM': 'earnings before interest, taxes, depreciation, amortization, rent, and management fees',
|
138 |
+
'EBITDAX': 'earnings before interest, taxes, depreciation, amortization, and exploration expenses',
|
139 |
+
'EBITDAS': 'earnings before interest, taxes, depreciation, amortization, and restructuring costs',
|
140 |
+
'EBITDAX-C': 'earnings before interest, taxes, depreciation, amortization, exploration expenses, and commodity derivatives',
|
141 |
+
'EBITDAX-R': 'earnings before interest, taxes, depreciation, amortization, exploration expenses, and asset retirement obligations',
|
142 |
+
'EBITDAX-E': 'earnings before interest, taxes, depreciation, amortization, exploration expenses, and environmental liabilities'
|
143 |
+
|
144 |
+
# Add more abbreviations and replacements as needed
|
145 |
+
}
|
146 |
+
for abbreviation, full_form in replacements.items():
|
147 |
+
text = text.replace(abbreviation, full_form)
|
148 |
+
|
149 |
+
return text
|
150 |
+
|
151 |
+
def clean_and_preprocess(text):
|
152 |
+
text=[x for x in text.split("\n") if len(x)>100]
|
153 |
+
l=[]
|
154 |
+
for t in text:
|
155 |
+
# Convert to lowercase
|
156 |
+
t = t.lower()
|
157 |
+
# Tokenize text into words
|
158 |
+
words = nltk.word_tokenize(t)
|
159 |
+
# Remove stopwords
|
160 |
+
stop_words = set(stopwords.words('english'))
|
161 |
+
filtered_words = [word for word in words if word not in stop_words]
|
162 |
+
|
163 |
+
# Join the words back into a cleaned text
|
164 |
+
cleaned_text = ' '.join(filtered_words)
|
165 |
+
l.append(cleaned_text)
|
166 |
+
return "\n".join(l)
|
167 |
+
|
168 |
+
|
169 |
+
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
|
174 |
st.title("Transcript Analysis")
|
175 |
transcript = st.text_area("Enter the transcript:", height=200)
|
176 |
+
transcript=replace_abbreviations(transcript)
|
177 |
+
transcript=replace_abbreviations(transcript)
|
178 |
+
transcript=removeSpeakers(transcript)
|
179 |
+
transcript=removeQA(transcript)
|
180 |
+
transcript=clean_and_preprocess(transcript)
|
181 |
tokens=transcript.split()
|
182 |
splitSize=256
|
183 |
chunks=[tokens[r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens)/splitSize))]
|