merligus commited on
Commit
2566a62
·
1 Parent(s): 4ff8f6d

query context, answer, and sources update

Browse files
Files changed (2) hide show
  1. data/books/segment.py +70 -65
  2. query.py +10 -11
data/books/segment.py CHANGED
@@ -1,80 +1,85 @@
1
  import os
2
 
3
- # path to the directory with the files
4
- path = "./alice_segmented"
 
 
 
5
 
6
  # create the folder and ignores if it alrerady exists
7
  os.makedirs(path, exist_ok=True)
8
 
9
  # word to segment the file
10
- # words_list = [
11
- # "SUMÁRIO",
12
- # "INTRODUÇÃO",
13
- # "CA­PÍ­TU­LO I",
14
- # "CA­PÍ­TU­LO II",
15
- # "CA­PÍ­TU­LO III",
16
- # "CA­PÍ­TU­LO IV",
17
- # "CA­PÍ­TU­LO V",
18
- # "CA­PÍ­TU­LO VI",
19
- # "CA­PÍ­TU­LO VII",
20
- # "CA­PÍ­TU­LO VI­II",
21
- # "CA­PÍ­TU­LO IX",
22
- # "CA­PÍ­TU­LO X",
23
- # "CA­PÍ­TU­LO XI",
24
- # "CA­PÍ­TU­LO XII",
25
- # "CA­PÍ­TU­LO XI­II",
26
- # "CA­PÍ­TU­LO XIV",
27
- # "CA­PÍ­TU­LO XV",
28
- # "CA­PÍ­TU­LO XVI",
29
- # "CA­PÍ­TU­LO XVII",
30
- # "CA­PÍ­TU­LO XVI­II",
31
- # "CA­PÍ­TU­LO XIX",
32
- # "CA­PÍ­TU­LO XX",
33
- # "CA­PÍ­TU­LO XXI",
34
- # "CA­PÍ­TU­LO XXII",
35
- # "CA­PÍ­TU­LO XXI­II",
36
- # "CA­PÍ­TU­LO XXIV",
37
- # "CA­PÍ­TU­LO XXV",
38
- # "CA­PÍ­TU­LO XX­VI",
39
- # "CA­PÍ­TU­LO XX­VII",
40
- # "CON­TO: O HÓS­PE­DE DE DRÁ­CU­LA",
41
- # "A NO­VA HIS­TÓ­RIA DO",
42
- # "RE­SE­NHA",
43
- # "LEI­TU­RA",
44
- # "HAMPSHI­RE AD­VER­TI­SER",
45
- # "Ro­man­ces Re­cen­tes",
46
- # "EN­TRE­VIS­TA DA",
47
- # "Wil­li­am Glads­to­ne",
48
- # "MARY ELI­ZA­BE­TH BRAD­DON",
49
- # "SIR AR­THUR CO­NAN DOY­LE",
50
- # "OS­CAR WIL­DE",
51
- # "POS­FÁ­CIO",
52
- # "A SOM­BRA DO VAM­PI­RO",
53
- # "VAMPIRO En­ci­clo­pé­dia Bri­tan­ni­ca",
54
- # "O Vam­pi­ro",
55
- # "Frag­men­to*",
56
- # "Car­mil­la",
57
- # "Char­les Bau­de­lai­re: O Vam­pi­ro",
58
- # ]
59
  words_list = [
60
- "PRE",
61
- "CHAPTER I",
62
- "CHAPTER II",
63
- "CHAPTER III",
64
- "CHAPTER IV",
65
- "CHAPTER V",
66
- "CHAPTER VI",
67
- "CHAPTER VII",
68
- "CHAPTER VIII",
69
- "CHAPTER IX",
70
- "CHAPTER X",
71
- "CHAPTER XI",
72
- "CHAPTER XII",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  # open the file
76
  with open(
77
- "alice_in_wonderland.md",
78
  "r",
79
  ) as file:
80
  # current word to look for
 
1
  import os
2
 
3
+ # path to the input file
4
+ input_file = "dracula.txt"
5
+
6
+ # path to the out directory
7
+ path = "./dracula_segmented"
8
 
9
  # create the folder and ignores if it alrerady exists
10
  os.makedirs(path, exist_ok=True)
11
 
12
  # word to segment the file
13
+ # dracula
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  words_list = [
15
+ "SUMÁRIO",
16
+ "INTRODUÇÃO",
17
+ "CAPÍTULO I",
18
+ "CAPÍTULO II",
19
+ "CAPÍTULO III",
20
+ "CAPÍTULO IV",
21
+ "CAPÍTULO V",
22
+ "CAPÍTULO VI",
23
+ "CAPÍTULO VII",
24
+ "CAPÍTULO VIII",
25
+ "CAPÍTULO IX",
26
+ "CAPÍTULO X",
27
+ "CAPÍTULO XI",
28
+ "CAPÍTULO XII",
29
+ "CAPÍTULO XIII",
30
+ "CAPÍTULO XIV",
31
+ "CAPÍTULO XV",
32
+ "CAPÍTULO XVI",
33
+ "CAPÍTULO XVII",
34
+ "CAPÍTULO XVIII",
35
+ "CAPÍTULO XIX",
36
+ "CAPÍTULO XX",
37
+ "CAPÍTULO XXI",
38
+ "CAPÍTULO XXII",
39
+ "CAPÍTULO XXIII",
40
+ "CAPÍTULO XXIV",
41
+ "CAPÍTULO XXV",
42
+ "CAPÍTULO XXVI",
43
+ "CAPÍTULO XXVII",
44
+ "CONTO: O HÓSPEDE DE DRÁCULA",
45
+ "A NOVA HISTÓRIA DO",
46
+ "RESENHA",
47
+ "LEITURA",
48
+ "HAMPSHIRE ADVERTISER",
49
+ "Romances Recentes",
50
+ "ENTREVISTA DA",
51
+ "William Gladstone",
52
+ "MARY ELIZABETH BRADDON",
53
+ "SIR ARTHUR CONAN DOYLE",
54
+ "OSCAR WILDE",
55
+ "POSFÁCIO",
56
+ "A SOMBRA DO VAMPIRO",
57
+ "VAMPIRO Enciclopédia Britannica",
58
+ "O Vampiro",
59
+ "Fragmento*",
60
+ "Carmilla",
61
+ "Charles Baudelaire: O Vampiro",
62
  ]
63
+ # alice
64
+ # words_list = [
65
+ # "PRE",
66
+ # "CHAPTER I",
67
+ # "CHAPTER II",
68
+ # "CHAPTER III",
69
+ # "CHAPTER IV",
70
+ # "CHAPTER V",
71
+ # "CHAPTER VI",
72
+ # "CHAPTER VII",
73
+ # "CHAPTER VIII",
74
+ # "CHAPTER IX",
75
+ # "CHAPTER X",
76
+ # "CHAPTER XI",
77
+ # "CHAPTER XII",
78
+ # ]
79
 
80
  # open the file
81
  with open(
82
+ input_file,
83
  "r",
84
  ) as file:
85
  # current word to look for
query.py CHANGED
@@ -74,16 +74,15 @@ Answer the question based on the above context in question's original language:
74
  # pipeline
75
  chain = prompt | llm
76
 
77
- print(f"Context:\n{context}\n")
78
 
79
  # ask
80
- print(
81
- chain.invoke(
82
- {
83
- "context": context,
84
- "question": question,
85
- }
86
- ).content
87
- )
88
-
89
- print(f"\nSources:\n{sources}")
 
74
  # pipeline
75
  chain = prompt | llm
76
 
77
+ print(f"Context:\n{context}\n*************************")
78
 
79
  # ask
80
+ answer = chain.invoke(
81
+ {
82
+ "context": context,
83
+ "question": question,
84
+ }
85
+ ).content
86
+ print(f"Answer:\n{answer}\n*************************")
87
+
88
+ print(f"Sources:\n{sources}")