Th3BossC commited on
Commit
15ec869
·
1 Parent(s): 0d6526c

added question answering funcationality

Browse files
TranscriptApi/__pycache__/models.cpython-310.pyc CHANGED
Binary files a/TranscriptApi/__pycache__/models.cpython-310.pyc and b/TranscriptApi/__pycache__/models.cpython-310.pyc differ
 
TranscriptApi/common/__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/TranscriptApi/common/__pycache__/utils.cpython-310.pyc and b/TranscriptApi/common/__pycache__/utils.cpython-310.pyc differ
 
TranscriptApi/common/utils.py CHANGED
@@ -40,7 +40,6 @@ def get_video(video_url, location, filename = 'audio'):
40
  audio_filename = location + filename + '.mp3'
41
  print('[INFO] downloading video...')
42
  video = YouTube(video_url).streams.filter(file_extension = 'mp4').first().download(filename = video_filename)
43
- print('something')
44
  video = VideoFileClip(video_filename)
45
  print('[INFO] extracting audio from video...')
46
  video.audio.write_audiofile(audio_filename)
@@ -141,7 +140,10 @@ def summarize_youtube_video(video_url, outputs_dir):
141
  complete_summary = ' '.join(summaries)
142
  with open(summary_file, 'w') as f:
143
  f.write(complete_summary)
144
- return complete_summary
 
 
 
145
  ############################################################
146
 
147
 
@@ -198,9 +200,9 @@ def summarize_string(text : str):
198
  def summarize_file(file_location, file_extension, working_dir = "TranscriptApi/static/files"):
199
  # _, file_extension = os.path.splitext(file_location)
200
  text = ""
201
- if file_extension == '.pdf':
202
  text = extract_text_pdf(file_location)
203
- elif file_extension == '.txt':
204
  text = extract_text_txt(file_location)
205
  else:
206
  return "[ERROR]"
@@ -208,4 +210,9 @@ def summarize_file(file_location, file_extension, working_dir = "TranscriptApi/s
208
  if os.path.exists(working_dir):
209
  shutil.rmtree(working_dir)
210
  os.mkdir(working_dir)
211
- return summarize_string(text)
 
 
 
 
 
 
40
  audio_filename = location + filename + '.mp3'
41
  print('[INFO] downloading video...')
42
  video = YouTube(video_url).streams.filter(file_extension = 'mp4').first().download(filename = video_filename)
 
43
  video = VideoFileClip(video_filename)
44
  print('[INFO] extracting audio from video...')
45
  video.audio.write_audiofile(audio_filename)
 
140
  complete_summary = ' '.join(summaries)
141
  with open(summary_file, 'w') as f:
142
  f.write(complete_summary)
143
+
144
+ with open(transcripts_file, 'r') as f:
145
+ complete_transcript = f.read()
146
+ return {'transcript': complete_transcript, 'summary' : complete_summary}
147
  ############################################################
148
 
149
 
 
200
  def summarize_file(file_location, file_extension, working_dir = "TranscriptApi/static/files"):
201
  # _, file_extension = os.path.splitext(file_location)
202
  text = ""
203
+ if file_extension == 'pdf':
204
  text = extract_text_pdf(file_location)
205
+ elif file_extension == 'txt':
206
  text = extract_text_txt(file_location)
207
  else:
208
  return "[ERROR]"
 
210
  if os.path.exists(working_dir):
211
  shutil.rmtree(working_dir)
212
  os.mkdir(working_dir)
213
+ return [text, summarize_string(text)]
214
+
215
+ def answer(question: str, context : str):
216
+ # qa = pipeline(task = "question-answering", model = "Th3BossC/QuestionAnsweringModel", tokenizer = "Th3BossC/QuestionAnsweringModel")
217
+ qa = pipeline(task = "question-answering", model = "deepset/roberta-base-squad2")
218
+ return qa(question = question, context = context)['answer']
TranscriptApi/models.py CHANGED
@@ -6,17 +6,19 @@ class VideoSummary(db.Model):
6
  date = db.Column(db.DateTime(), nullable = False, default = datetime.utcnow)
7
  video_id = db.Column(db.String(10), unique = True, nullable = False)
8
  title = db.Column(db.String(100), nullable = False)
 
9
  summary = db.Column(db.Text(), nullable = False)
10
 
11
  def __repr__(self):
12
- print(f'VideoSummary({self.id}, {self.video_id}, {self.title})')
13
 
14
 
15
  class FileSummary(db.Model):
16
  id = db.Column(db.Integer, primary_key = True)
17
  date = db.Column(db.DateTime(), nullable = False, default = datetime.utcnow)
18
  title = db.Column(db.String(100), nullable = False)
 
19
  summary = db.Column(db.Text(), nullable = False)
20
 
21
  def __repr__(self):
22
- print(f"FileSummary({self.id}, {self.title})")
 
6
  date = db.Column(db.DateTime(), nullable = False, default = datetime.utcnow)
7
  video_id = db.Column(db.String(10), unique = True, nullable = False)
8
  title = db.Column(db.String(100), nullable = False)
9
+ transcript = db.Column(db.Text(), nullable = False)
10
  summary = db.Column(db.Text(), nullable = False)
11
 
12
  def __repr__(self):
13
+ f'VideoSummary({self.id}, {self.video_id}, {self.title})'
14
 
15
 
16
  class FileSummary(db.Model):
17
  id = db.Column(db.Integer, primary_key = True)
18
  date = db.Column(db.DateTime(), nullable = False, default = datetime.utcnow)
19
  title = db.Column(db.String(100), nullable = False)
20
+ transcript = db.Column(db.Text(), nullable = False)
21
  summary = db.Column(db.Text(), nullable = False)
22
 
23
  def __repr__(self):
24
+ f"FileSummary({self.id}, {self.title})"
TranscriptApi/resources/__pycache__/routes.cpython-310.pyc CHANGED
Binary files a/TranscriptApi/resources/__pycache__/routes.cpython-310.pyc and b/TranscriptApi/resources/__pycache__/routes.cpython-310.pyc differ
 
TranscriptApi/resources/routes.py CHANGED
@@ -1,62 +1,90 @@
1
  from flask import Blueprint, request, current_app
2
  from flask_restful import Api, Resource
3
- from TranscriptApi.common.utils import title, summarize_youtube_video, summarize_file, summarize_string
4
  from TranscriptApi.models import VideoSummary, FileSummary
5
  from TranscriptApi import db
6
  import os
 
7
 
8
  resources = Blueprint('resources', __name__)
9
  api = Api(resources)
10
 
 
11
  class VideoTranscript(Resource):
12
  def get(self, video_id):
13
  print(request)
14
  summaryExist = VideoSummary.query.filter_by(video_id = video_id).first()
15
  if summaryExist is not None:
16
- return {'title' : summaryExist.title, 'summary' : summaryExist.summary}, 200
17
-
18
-
19
  try:
20
  video_title = title(video_id)
21
  except:
22
  return {'error' : 'Video ID not valid'}, 400
23
  try:
24
- summary = summarize_youtube_video('https://www.youtube.com/watch?v=' + video_id, 'TranscriptApi/common/audio')
25
- newVideo = VideoSummary(title = video_title, video_id = video_id, summary = summary)
26
  db.session.add(newVideo)
27
  db.session.commit()
28
- return {'title' : video_title, 'summary' : summary}, 200
29
  except Exception as e:
30
  return 500
31
-
32
-
33
  api.add_resource(VideoTranscript, '/video_api/<string:video_id>')
34
 
35
 
36
  class FileTranscript(Resource):
37
-
38
  def post(self, type):
39
-
40
-
41
  if type == 'pdf' or type == 'txt':
42
  print(request.files)
43
  file = request.files['file']
44
  file_location = os.path.join(current_app.config.get('UPLOAD_FOLDER'), file.filename)
45
  file.save(os.path.join(current_app.config.get('UPLOAD_FOLDER'), file.filename))
46
- summary = summarize_file(file_location = file_location, file_extension = type)
47
  file_name = file.filename
48
  elif type == 'direct_text':
49
- summary = summarize_string(request.json['text'])
50
  file_name = "Entered Text"
51
  if summary == "[ERROR]":
 
 
 
52
  return {'error' : 'We are expreriencing some issues...'}, 500
53
  else:
54
- newSummary = FileSummary(title = file_name, summary = summary)
55
  db.session.add(newSummary)
56
  db.session.commit()
 
 
 
57
  return {'title' : file_name, 'summary' : summary}, 200
58
- print(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
-
61
-
62
- api.add_resource(FileTranscript, '/file_api/<string:type>')
 
 
 
 
 
 
 
 
1
  from flask import Blueprint, request, current_app
2
  from flask_restful import Api, Resource
3
+ from TranscriptApi.common.utils import title, summarize_youtube_video, summarize_file, summarize_string, answer
4
  from TranscriptApi.models import VideoSummary, FileSummary
5
  from TranscriptApi import db
6
  import os
7
+ import shutil
8
 
9
  resources = Blueprint('resources', __name__)
10
  api = Api(resources)
11
 
12
+
13
  class VideoTranscript(Resource):
14
  def get(self, video_id):
15
  print(request)
16
  summaryExist = VideoSummary.query.filter_by(video_id = video_id).first()
17
  if summaryExist is not None:
18
+ return {'video_id' : video_id, 'title' : summaryExist.title, 'summary' : summaryExist.summary}, 200
 
 
19
  try:
20
  video_title = title(video_id)
21
  except:
22
  return {'error' : 'Video ID not valid'}, 400
23
  try:
24
+ output = summarize_youtube_video('https://www.youtube.com/watch?v=' + video_id, 'TranscriptApi/common/audio')
25
+ newVideo = VideoSummary(title = video_title, video_id = video_id, transcript = f"The title of the video is {video_title}. {output['transcript']}", summary = output['summary'])
26
  db.session.add(newVideo)
27
  db.session.commit()
28
+ return {'video_id' : video_id, 'title' : video_title, 'summary' : output['summary']}, 200
29
  except Exception as e:
30
  return 500
 
 
31
  api.add_resource(VideoTranscript, '/video_api/<string:video_id>')
32
 
33
 
34
  class FileTranscript(Resource):
 
35
  def post(self, type):
 
 
36
  if type == 'pdf' or type == 'txt':
37
  print(request.files)
38
  file = request.files['file']
39
  file_location = os.path.join(current_app.config.get('UPLOAD_FOLDER'), file.filename)
40
  file.save(os.path.join(current_app.config.get('UPLOAD_FOLDER'), file.filename))
41
+ transcript, summary = summarize_file(file_location = file_location, file_extension = type)
42
  file_name = file.filename
43
  elif type == 'direct_text':
44
+ transcript, summary = summarize_string(request.json['text'])
45
  file_name = "Entered Text"
46
  if summary == "[ERROR]":
47
+ if os.path.exists(current_app.config.get('UPLOAD_FOLDER')):
48
+ shutil.rmtree(current_app.config.get('UPLOAD_FOLDER'))
49
+ os.mkdir(current_app.config.get('UPLOAD_FOLDER'))
50
  return {'error' : 'We are expreriencing some issues...'}, 500
51
  else:
52
+ newSummary = FileSummary(title = file_name, transcript = transcript, summary = summary)
53
  db.session.add(newSummary)
54
  db.session.commit()
55
+ if os.path.exists(current_app.config.get('UPLOAD_FOLDER')):
56
+ shutil.rmtree(current_app.config.get('UPLOAD_FOLDER'))
57
+ os.mkdir(current_app.config.get('UPLOAD_FOLDER'))
58
  return {'title' : file_name, 'summary' : summary}, 200
59
+ api.add_resource(FileTranscript, '/file_api/<string:type>')
60
+
61
+
62
+ class VideoQuestions(Resource):
63
+ def post(self, video_id):
64
+ print(request.json)
65
+ videoExists = VideoSummary.query.filter_by(video_id = video_id).first()
66
+ if videoExists is None:
67
+ transcript, summary = summarize_youtube_video('https://www.youtube.com/watch?v=' + video_id, 'TranscriptApi/common/audio')
68
+ video_title = title(video_id)
69
+ newVideo = VideoSummary(title = video_title, video_id = video_id, transcript = f"The title of the video is {video_title}. {transcript}", summary = summary)
70
+
71
+ VideoExists = VideoSummary.query.filter_by(video_id = video_id).first()
72
+ data = request.json # {question : "blabla"}
73
+ try:
74
+ ans = answer(question = data["question"], context = VideoExists.transcript)
75
+ return {'question' : data['question'], 'answer' : ans}, 200
76
+ except:
77
+ return {'error' : 'something went wrong'}, 500
78
+ api.add_resource(VideoQuestions, '/video_question_api/<string:video_id>')
79
+
80
 
81
+ class FileQuestions(Resource):
82
+ def post(self, id):
83
+ transcriptData = FileSummary.query.filter_by(id = id).first()
84
+ print(transcriptData)
85
+ if transcriptData is not None:
86
+ ans = answer(question = request.json['question'], context = transcriptData.transcript)
87
+ return {'question' : request.json['question'], 'answer' : ans}, 200
88
+ else:
89
+ return {'error' : 'file not found'}, 400
90
+ api.add_resource(FileQuestions, '/file_question_api/<int:id>')
__pycache__/app.cpython-310.pyc CHANGED
Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ
 
instance/site.db CHANGED
Binary files a/instance/site.db and b/instance/site.db differ