JUNGU commited on
Commit
9ad2423
ยท
verified ยท
1 Parent(s): 24238f5

Create youtube_worksheet.py

Browse files
Files changed (1) hide show
  1. youtube_worksheet.py +64 -0
youtube_worksheet.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi
2
+ import google.generativeai as genai
3
+ from docx import Document
4
+ import re
5
+
6
+ class YouTubeWorksheet:
7
+ def __init__(self, api_key):
8
+ # Gemini API ์ดˆ๊ธฐํ™”
9
+ genai.configure(api_key=api_key)
10
+ self.model = genai.GenerativeModel('gemini-1.5-pro')
11
+
12
+ def get_video_id(self, url):
13
+ # YouTube URL์—์„œ video ID ์ถ”์ถœ
14
+ video_id = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', url)
15
+ return video_id.group(1) if video_id else None
16
+
17
+ def get_transcript(self, url):
18
+ video_id = self.get_video_id(url)
19
+ if not video_id:
20
+ return None
21
+ try:
22
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
23
+ return ' '.join([entry['text'] for entry in transcript])
24
+ except Exception as e:
25
+ print(f"์ž๋ง‰ ์ถ”์ถœ ์˜ค๋ฅ˜: {e}")
26
+ return None
27
+
28
+ def create_worksheet(self, transcript):
29
+ prompt = f"""
30
+ ๋‹ค์Œ ํ…์ŠคํŠธ๋ฅผ ๋ฌธ์žฅ๋ณ„๋กœ ๋‚˜๋ˆ„๊ณ , ๊ฐ ๋ฌธ์žฅ์— ๋Œ€ํ•ด:
31
+ 1. ๋นˆ์นธ ๋ฌธ์ œ ๋งŒ๋“ค๊ธฐ (์ค‘์š” ๋‹จ์–ด๋ฅผ ___๋กœ ๋Œ€์ฒด)
32
+ 2. ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜๊ธฐ
33
+
34
+ ํ…์ŠคํŠธ: {transcript}
35
+
36
+ ํ‘œ ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅ:
37
+ ์›๋ฌธ์žฅ|๋นˆ์นธ ๋ฌธ์ œ|ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ
38
+ """
39
+
40
+ response = self.model.generate_content(prompt)
41
+ return response.text
42
+
43
+ def save_to_docx(self, content, output_file="worksheet.docx"):
44
+ doc = Document()
45
+ doc.add_heading('YouTube ํ•™์Šต ํ™œ๋™์ง€', 0)
46
+
47
+ # ํ‘œ ์ƒ์„ฑ ๋ฐ ๋‚ด์šฉ ์ถ”๊ฐ€
48
+ rows = content.strip().split('\n')[2:] # ํ—ค๋” ์ œ์™ธ
49
+ table = doc.add_table(rows=len(rows)+1, cols=3)
50
+ table.style = 'Table Grid'
51
+
52
+ # ํ—ค๋” ์ถ”๊ฐ€
53
+ headers = ['์›๋ฌธ์žฅ', '๋นˆ์นธ ๋ฌธ์ œ', 'ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ']
54
+ for i, header in enumerate(headers):
55
+ table.cell(0, i).text = header
56
+
57
+ # ๋‚ด์šฉ ์ถ”๊ฐ€
58
+ for i, row in enumerate(rows):
59
+ cells = row.split('|')
60
+ for j, cell in enumerate(cells):
61
+ table.cell(i+1, j).text = cell.strip()
62
+
63
+ doc.save(output_file)
64
+ return output_file