Spaces:
Running
Running
added question answering funcationality
Browse files- TranscriptApi/__pycache__/models.cpython-310.pyc +0 -0
- TranscriptApi/common/__pycache__/utils.cpython-310.pyc +0 -0
- TranscriptApi/common/utils.py +12 -5
- TranscriptApi/models.py +4 -2
- TranscriptApi/resources/__pycache__/routes.cpython-310.pyc +0 -0
- TranscriptApi/resources/routes.py +47 -19
- __pycache__/app.cpython-310.pyc +0 -0
- instance/site.db +0 -0
TranscriptApi/__pycache__/models.cpython-310.pyc
CHANGED
Binary files a/TranscriptApi/__pycache__/models.cpython-310.pyc and b/TranscriptApi/__pycache__/models.cpython-310.pyc differ
|
|
TranscriptApi/common/__pycache__/utils.cpython-310.pyc
CHANGED
Binary files a/TranscriptApi/common/__pycache__/utils.cpython-310.pyc and b/TranscriptApi/common/__pycache__/utils.cpython-310.pyc differ
|
|
TranscriptApi/common/utils.py
CHANGED
@@ -40,7 +40,6 @@ def get_video(video_url, location, filename = 'audio'):
|
|
40 |
audio_filename = location + filename + '.mp3'
|
41 |
print('[INFO] downloading video...')
|
42 |
video = YouTube(video_url).streams.filter(file_extension = 'mp4').first().download(filename = video_filename)
|
43 |
-
print('something')
|
44 |
video = VideoFileClip(video_filename)
|
45 |
print('[INFO] extracting audio from video...')
|
46 |
video.audio.write_audiofile(audio_filename)
|
@@ -141,7 +140,10 @@ def summarize_youtube_video(video_url, outputs_dir):
|
|
141 |
complete_summary = ' '.join(summaries)
|
142 |
with open(summary_file, 'w') as f:
|
143 |
f.write(complete_summary)
|
144 |
-
|
|
|
|
|
|
|
145 |
############################################################
|
146 |
|
147 |
|
@@ -198,9 +200,9 @@ def summarize_string(text : str):
|
|
198 |
def summarize_file(file_location, file_extension, working_dir = "TranscriptApi/static/files"):
|
199 |
# _, file_extension = os.path.splitext(file_location)
|
200 |
text = ""
|
201 |
-
if file_extension == '
|
202 |
text = extract_text_pdf(file_location)
|
203 |
-
elif file_extension == '
|
204 |
text = extract_text_txt(file_location)
|
205 |
else:
|
206 |
return "[ERROR]"
|
@@ -208,4 +210,9 @@ def summarize_file(file_location, file_extension, working_dir = "TranscriptApi/s
|
|
208 |
if os.path.exists(working_dir):
|
209 |
shutil.rmtree(working_dir)
|
210 |
os.mkdir(working_dir)
|
211 |
-
return summarize_string(text)
|
|
|
|
|
|
|
|
|
|
|
|
40 |
audio_filename = location + filename + '.mp3'
|
41 |
print('[INFO] downloading video...')
|
42 |
video = YouTube(video_url).streams.filter(file_extension = 'mp4').first().download(filename = video_filename)
|
|
|
43 |
video = VideoFileClip(video_filename)
|
44 |
print('[INFO] extracting audio from video...')
|
45 |
video.audio.write_audiofile(audio_filename)
|
|
|
140 |
complete_summary = ' '.join(summaries)
|
141 |
with open(summary_file, 'w') as f:
|
142 |
f.write(complete_summary)
|
143 |
+
|
144 |
+
with open(transcripts_file, 'r') as f:
|
145 |
+
complete_transcript = f.read()
|
146 |
+
return {'transcript': complete_transcript, 'summary' : complete_summary}
|
147 |
############################################################
|
148 |
|
149 |
|
|
|
200 |
def summarize_file(file_location, file_extension, working_dir = "TranscriptApi/static/files"):
|
201 |
# _, file_extension = os.path.splitext(file_location)
|
202 |
text = ""
|
203 |
+
if file_extension == 'pdf':
|
204 |
text = extract_text_pdf(file_location)
|
205 |
+
elif file_extension == 'txt':
|
206 |
text = extract_text_txt(file_location)
|
207 |
else:
|
208 |
return "[ERROR]"
|
|
|
210 |
if os.path.exists(working_dir):
|
211 |
shutil.rmtree(working_dir)
|
212 |
os.mkdir(working_dir)
|
213 |
+
return [text, summarize_string(text)]
|
214 |
+
|
215 |
+
def answer(question: str, context : str):
|
216 |
+
# qa = pipeline(task = "question-answering", model = "Th3BossC/QuestionAnsweringModel", tokenizer = "Th3BossC/QuestionAnsweringModel")
|
217 |
+
qa = pipeline(task = "question-answering", model = "deepset/roberta-base-squad2")
|
218 |
+
return qa(question = question, context = context)['answer']
|
TranscriptApi/models.py
CHANGED
@@ -6,17 +6,19 @@ class VideoSummary(db.Model):
|
|
6 |
date = db.Column(db.DateTime(), nullable = False, default = datetime.utcnow)
|
7 |
video_id = db.Column(db.String(10), unique = True, nullable = False)
|
8 |
title = db.Column(db.String(100), nullable = False)
|
|
|
9 |
summary = db.Column(db.Text(), nullable = False)
|
10 |
|
11 |
def __repr__(self):
|
12 |
-
|
13 |
|
14 |
|
15 |
class FileSummary(db.Model):
|
16 |
id = db.Column(db.Integer, primary_key = True)
|
17 |
date = db.Column(db.DateTime(), nullable = False, default = datetime.utcnow)
|
18 |
title = db.Column(db.String(100), nullable = False)
|
|
|
19 |
summary = db.Column(db.Text(), nullable = False)
|
20 |
|
21 |
def __repr__(self):
|
22 |
-
|
|
|
6 |
date = db.Column(db.DateTime(), nullable = False, default = datetime.utcnow)
|
7 |
video_id = db.Column(db.String(10), unique = True, nullable = False)
|
8 |
title = db.Column(db.String(100), nullable = False)
|
9 |
+
transcript = db.Column(db.Text(), nullable = False)
|
10 |
summary = db.Column(db.Text(), nullable = False)
|
11 |
|
12 |
def __repr__(self):
|
13 |
+
f'VideoSummary({self.id}, {self.video_id}, {self.title})'
|
14 |
|
15 |
|
16 |
class FileSummary(db.Model):
|
17 |
id = db.Column(db.Integer, primary_key = True)
|
18 |
date = db.Column(db.DateTime(), nullable = False, default = datetime.utcnow)
|
19 |
title = db.Column(db.String(100), nullable = False)
|
20 |
+
transcript = db.Column(db.Text(), nullable = False)
|
21 |
summary = db.Column(db.Text(), nullable = False)
|
22 |
|
23 |
def __repr__(self):
|
24 |
+
f"FileSummary({self.id}, {self.title})"
|
TranscriptApi/resources/__pycache__/routes.cpython-310.pyc
CHANGED
Binary files a/TranscriptApi/resources/__pycache__/routes.cpython-310.pyc and b/TranscriptApi/resources/__pycache__/routes.cpython-310.pyc differ
|
|
TranscriptApi/resources/routes.py
CHANGED
@@ -1,62 +1,90 @@
|
|
1 |
from flask import Blueprint, request, current_app
|
2 |
from flask_restful import Api, Resource
|
3 |
-
from TranscriptApi.common.utils import title, summarize_youtube_video, summarize_file, summarize_string
|
4 |
from TranscriptApi.models import VideoSummary, FileSummary
|
5 |
from TranscriptApi import db
|
6 |
import os
|
|
|
7 |
|
8 |
resources = Blueprint('resources', __name__)
|
9 |
api = Api(resources)
|
10 |
|
|
|
11 |
class VideoTranscript(Resource):
|
12 |
def get(self, video_id):
|
13 |
print(request)
|
14 |
summaryExist = VideoSummary.query.filter_by(video_id = video_id).first()
|
15 |
if summaryExist is not None:
|
16 |
-
return {'title' : summaryExist.title, 'summary' : summaryExist.summary}, 200
|
17 |
-
|
18 |
-
|
19 |
try:
|
20 |
video_title = title(video_id)
|
21 |
except:
|
22 |
return {'error' : 'Video ID not valid'}, 400
|
23 |
try:
|
24 |
-
|
25 |
-
newVideo = VideoSummary(title = video_title, video_id = video_id, summary = summary)
|
26 |
db.session.add(newVideo)
|
27 |
db.session.commit()
|
28 |
-
return {'title' : video_title, 'summary' : summary}, 200
|
29 |
except Exception as e:
|
30 |
return 500
|
31 |
-
|
32 |
-
|
33 |
api.add_resource(VideoTranscript, '/video_api/<string:video_id>')
|
34 |
|
35 |
|
36 |
class FileTranscript(Resource):
|
37 |
-
|
38 |
def post(self, type):
|
39 |
-
|
40 |
-
|
41 |
if type == 'pdf' or type == 'txt':
|
42 |
print(request.files)
|
43 |
file = request.files['file']
|
44 |
file_location = os.path.join(current_app.config.get('UPLOAD_FOLDER'), file.filename)
|
45 |
file.save(os.path.join(current_app.config.get('UPLOAD_FOLDER'), file.filename))
|
46 |
-
summary = summarize_file(file_location = file_location, file_extension = type)
|
47 |
file_name = file.filename
|
48 |
elif type == 'direct_text':
|
49 |
-
summary = summarize_string(request.json['text'])
|
50 |
file_name = "Entered Text"
|
51 |
if summary == "[ERROR]":
|
|
|
|
|
|
|
52 |
return {'error' : 'We are expreriencing some issues...'}, 500
|
53 |
else:
|
54 |
-
newSummary = FileSummary(title = file_name, summary = summary)
|
55 |
db.session.add(newSummary)
|
56 |
db.session.commit()
|
|
|
|
|
|
|
57 |
return {'title' : file_name, 'summary' : summary}, 200
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from flask import Blueprint, request, current_app
|
2 |
from flask_restful import Api, Resource
|
3 |
+
from TranscriptApi.common.utils import title, summarize_youtube_video, summarize_file, summarize_string, answer
|
4 |
from TranscriptApi.models import VideoSummary, FileSummary
|
5 |
from TranscriptApi import db
|
6 |
import os
|
7 |
+
import shutil
|
8 |
|
9 |
resources = Blueprint('resources', __name__)
|
10 |
api = Api(resources)
|
11 |
|
12 |
+
|
13 |
class VideoTranscript(Resource):
|
14 |
def get(self, video_id):
|
15 |
print(request)
|
16 |
summaryExist = VideoSummary.query.filter_by(video_id = video_id).first()
|
17 |
if summaryExist is not None:
|
18 |
+
return {'video_id' : video_id, 'title' : summaryExist.title, 'summary' : summaryExist.summary}, 200
|
|
|
|
|
19 |
try:
|
20 |
video_title = title(video_id)
|
21 |
except:
|
22 |
return {'error' : 'Video ID not valid'}, 400
|
23 |
try:
|
24 |
+
output = summarize_youtube_video('https://www.youtube.com/watch?v=' + video_id, 'TranscriptApi/common/audio')
|
25 |
+
newVideo = VideoSummary(title = video_title, video_id = video_id, transcript = f"The title of the video is {video_title}. {output['transcript']}", summary = output['summary'])
|
26 |
db.session.add(newVideo)
|
27 |
db.session.commit()
|
28 |
+
return {'video_id' : video_id, 'title' : video_title, 'summary' : output['summary']}, 200
|
29 |
except Exception as e:
|
30 |
return 500
|
|
|
|
|
31 |
api.add_resource(VideoTranscript, '/video_api/<string:video_id>')
|
32 |
|
33 |
|
34 |
class FileTranscript(Resource):
|
|
|
35 |
def post(self, type):
|
|
|
|
|
36 |
if type == 'pdf' or type == 'txt':
|
37 |
print(request.files)
|
38 |
file = request.files['file']
|
39 |
file_location = os.path.join(current_app.config.get('UPLOAD_FOLDER'), file.filename)
|
40 |
file.save(os.path.join(current_app.config.get('UPLOAD_FOLDER'), file.filename))
|
41 |
+
transcript, summary = summarize_file(file_location = file_location, file_extension = type)
|
42 |
file_name = file.filename
|
43 |
elif type == 'direct_text':
|
44 |
+
transcript, summary = summarize_string(request.json['text'])
|
45 |
file_name = "Entered Text"
|
46 |
if summary == "[ERROR]":
|
47 |
+
if os.path.exists(current_app.config.get('UPLOAD_FOLDER')):
|
48 |
+
shutil.rmtree(current_app.config.get('UPLOAD_FOLDER'))
|
49 |
+
os.mkdir(current_app.config.get('UPLOAD_FOLDER'))
|
50 |
return {'error' : 'We are expreriencing some issues...'}, 500
|
51 |
else:
|
52 |
+
newSummary = FileSummary(title = file_name, transcript = transcript, summary = summary)
|
53 |
db.session.add(newSummary)
|
54 |
db.session.commit()
|
55 |
+
if os.path.exists(current_app.config.get('UPLOAD_FOLDER')):
|
56 |
+
shutil.rmtree(current_app.config.get('UPLOAD_FOLDER'))
|
57 |
+
os.mkdir(current_app.config.get('UPLOAD_FOLDER'))
|
58 |
return {'title' : file_name, 'summary' : summary}, 200
|
59 |
+
api.add_resource(FileTranscript, '/file_api/<string:type>')
|
60 |
+
|
61 |
+
|
62 |
+
class VideoQuestions(Resource):
|
63 |
+
def post(self, video_id):
|
64 |
+
print(request.json)
|
65 |
+
videoExists = VideoSummary.query.filter_by(video_id = video_id).first()
|
66 |
+
if videoExists is None:
|
67 |
+
transcript, summary = summarize_youtube_video('https://www.youtube.com/watch?v=' + video_id, 'TranscriptApi/common/audio')
|
68 |
+
video_title = title(video_id)
|
69 |
+
newVideo = VideoSummary(title = video_title, video_id = video_id, transcript = f"The title of the video is {video_title}. {transcript}", summary = summary)
|
70 |
+
|
71 |
+
VideoExists = VideoSummary.query.filter_by(video_id = video_id).first()
|
72 |
+
data = request.json # {question : "blabla"}
|
73 |
+
try:
|
74 |
+
ans = answer(question = data["question"], context = VideoExists.transcript)
|
75 |
+
return {'question' : data['question'], 'answer' : ans}, 200
|
76 |
+
except:
|
77 |
+
return {'error' : 'something went wrong'}, 500
|
78 |
+
api.add_resource(VideoQuestions, '/video_question_api/<string:video_id>')
|
79 |
+
|
80 |
|
81 |
+
class FileQuestions(Resource):
|
82 |
+
def post(self, id):
|
83 |
+
transcriptData = FileSummary.query.filter_by(id = id).first()
|
84 |
+
print(transcriptData)
|
85 |
+
if transcriptData is not None:
|
86 |
+
ans = answer(question = request.json['question'], context = transcriptData.transcript)
|
87 |
+
return {'question' : request.json['question'], 'answer' : ans}, 200
|
88 |
+
else:
|
89 |
+
return {'error' : 'file not found'}, 400
|
90 |
+
api.add_resource(FileQuestions, '/file_question_api/<int:id>')
|
__pycache__/app.cpython-310.pyc
CHANGED
Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ
|
|
instance/site.db
CHANGED
Binary files a/instance/site.db and b/instance/site.db differ
|
|