crystal-technologies commited on
Commit
714d948
·
1 Parent(s): 4a51238

Upload 1653 files

Browse files
.gitattributes CHANGED
@@ -4,3 +4,4 @@ Perceptrix/finetune/scripts/eval/local_data/reading_comprehension/narrative_qa.j
4
  Perceptrix/finetune/scripts/eval/local_data/symbolic_problem_solving/bigbench_elementary_math_qa.jsonl filter=lfs diff=lfs merge=lfs -text
5
  SoundScribe/SpeakerID/tools/speech_data_explorer/screenshot.png filter=lfs diff=lfs merge=lfs -text
6
  SoundScribe/voices/Vatsal.wav filter=lfs diff=lfs merge=lfs -text
 
 
4
  Perceptrix/finetune/scripts/eval/local_data/symbolic_problem_solving/bigbench_elementary_math_qa.jsonl filter=lfs diff=lfs merge=lfs -text
5
  SoundScribe/SpeakerID/tools/speech_data_explorer/screenshot.png filter=lfs diff=lfs merge=lfs -text
6
  SoundScribe/voices/Vatsal.wav filter=lfs diff=lfs merge=lfs -text
7
+ database/audio.wav filter=lfs diff=lfs merge=lfs -text
CircumSpect/vqa/vqa.py CHANGED
@@ -4,7 +4,7 @@ from CircumSpect.vqa.conversation_obj import conv_templates_obj, SeparatorStyle_
4
  from CircumSpect.vqa.conversation_vqa import conv_templates, SeparatorStyle
5
  from transformers import AutoTokenizer, BitsAndBytesConfig
6
  from CircumSpect.vqa.utils import disable_torch_init
7
- from CircumSpect.vqa.streamer import TextStreamer
8
  from CircumSpect.vqa.model import *
9
  from utils import setup_device
10
  from io import BytesIO
@@ -95,6 +95,9 @@ if "mpt" in model_name.lower():
95
  else:
96
  roles = conv.roles
97
 
 
 
 
98
 
99
  def answer_question(question, image_file):
100
  conv = conv_templates[conv_mode].copy()
@@ -130,8 +133,6 @@ def answer_question(question, image_file):
130
  keywords = [stop_str]
131
  stopping_criteria = KeywordsStoppingCriteria(
132
  keywords, tokenizer, input_ids)
133
- streamer = TextStreamer(tokenizer, skip_prompt=True,
134
- skip_special_tokens=True)
135
 
136
  with open("./database/vlm-reply.txt", 'w') as clear_file:
137
  clear_file.write("")
@@ -193,8 +194,6 @@ def find_object_description(question, image_file):
193
  keywords = [stop_str]
194
  stopping_criteria = KeywordsStoppingCriteria(
195
  keywords, tokenizer, input_ids)
196
- streamer = TextStreamer(tokenizer, skip_prompt=True,
197
- skip_special_tokens=True)
198
 
199
  with torch.inference_mode():
200
  output_ids = model.generate(
 
4
  from CircumSpect.vqa.conversation_vqa import conv_templates, SeparatorStyle
5
  from transformers import AutoTokenizer, BitsAndBytesConfig
6
  from CircumSpect.vqa.utils import disable_torch_init
7
+ from Perceptrix.streamer import TextStreamer
8
  from CircumSpect.vqa.model import *
9
  from utils import setup_device
10
  from io import BytesIO
 
95
  else:
96
  roles = conv.roles
97
 
98
+ streamer = TextStreamer(tokenizer, skip_prompt=True,
99
+ skip_special_tokens=True, save_file="vlm-reply.txt")
100
+
101
 
102
  def answer_question(question, image_file):
103
  conv = conv_templates[conv_mode].copy()
 
133
  keywords = [stop_str]
134
  stopping_criteria = KeywordsStoppingCriteria(
135
  keywords, tokenizer, input_ids)
 
 
136
 
137
  with open("./database/vlm-reply.txt", 'w') as clear_file:
138
  clear_file.write("")
 
194
  keywords = [stop_str]
195
  stopping_criteria = KeywordsStoppingCriteria(
196
  keywords, tokenizer, input_ids)
 
 
197
 
198
  with torch.inference_mode():
199
  output_ids = model.generate(
Perceptrix/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from Perceptrix.engine import robotix, identify_objects_from_text, search_keyword
2
+ from chat import perceptrix
Perceptrix/chat.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, BitsAndBytesConfig, GenerationConfig
2
+ from Perceptrix.streamer import TextStreamer
3
+ from utils import setup_device
4
+ import torch
5
+ import os
6
+
7
+ model_name = os.environ.get('CHAT_MODEL')
8
+
9
+ model_path = "models/CRYSTAL-chat" if model_name == None else model_name
10
+ config = AutoConfig.from_pretrained(
11
+ model_path, trust_remote_code=True)
12
+
13
+ device = setup_device()
14
+
15
+ bnb_config = BitsAndBytesConfig(
16
+ load_in_4bit=True,
17
+ bnb_4bit_use_double_quant=True,
18
+ bnb_4bit_quant_type="nf4",
19
+ bnb_4bit_compute_dtype=torch.float32 if device == "cpu" else torch.bfloat16
20
+ )
21
+
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ model_path,
24
+ torch_dtype=torch.float32 if device == "cpu" else torch.bfloat16,
25
+ config=config,
26
+ device_map="auto",
27
+ trust_remote_code=True,
28
+ low_cpu_mem_usage=True,
29
+ offload_folder="offloads",
30
+ quantization_config=bnb_config if str(device) != "cpu" else None,
31
+ )
32
+
33
+ tokenizer = AutoTokenizer.from_pretrained(
34
+ model_path,
35
+ trust_remote_code=True,
36
+ )
37
+
38
+ if tokenizer.pad_token_id is None:
39
+ tokenizer.pad_token = tokenizer.eos_token
40
+
41
+ tokenizer.padding_side = "left"
42
+ tokenizer = tokenizer
43
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
44
+ model.eval()
45
+
46
+ streamer = TextStreamer(tokenizer, skip_prompt=True,
47
+ skip_special_tokens=True, save_file="reply.txt")
48
+
49
+ def evaluate(
50
+ prompt='',
51
+ temperature=0.4,
52
+ top_p=0.65,
53
+ top_k=35,
54
+ repetition_penalty=1.1,
55
+ max_new_tokens=512,
56
+ **kwargs,
57
+ ):
58
+ inputs = tokenizer(prompt, return_tensors="pt")
59
+ input_ids = inputs["input_ids"].to(device)
60
+ generation_config = GenerationConfig(
61
+ temperature=temperature,
62
+ top_p=top_p,
63
+ top_k=top_k,
64
+ repetition_penalty=repetition_penalty,
65
+ **kwargs,
66
+ )
67
+
68
+ with torch.no_grad():
69
+ generation_output = model.generate(
70
+ input_ids=input_ids,
71
+ generation_config=generation_config,
72
+ return_dict_in_generate=True,
73
+ output_scores=True,
74
+ max_new_tokens=max_new_tokens,
75
+ eos_token_id=tokenizer.eos_token_id,
76
+ pad_token_id=tokenizer.pad_token_id,
77
+ streamer=streamer,
78
+ )
79
+ s = generation_output.sequences[0]
80
+ output = tokenizer.decode(s, skip_special_tokens=True)
81
+ yield output.split("### Response:")[-1].strip()
82
+
83
+
84
+ def predict(
85
+ inputs,
86
+ temperature=0.4,
87
+ top_p=0.65,
88
+ top_k=35,
89
+ repetition_penalty=1.1,
90
+ max_new_tokens=512,
91
+ ):
92
+ now_prompt = inputs
93
+
94
+ response = evaluate(
95
+ now_prompt, temperature, top_p, top_k, repetition_penalty, max_new_tokens, do_sample=True
96
+ )
97
+
98
+ for i in response:
99
+ print(i)
100
+ response = i
101
+
102
+ return response
103
+
104
+
105
+ instructions = "You are Comprehensive Robotics Yielding Sophisticated Technology And Logistics (CRYSTAL), an AI robot developed by Vatsal Dutt to be the most advanced robot in the world. You will be provided with prompts and other information to help the user."
106
+
107
+ def perceptrix(prompt):
108
+ prompt = instructions+"\n"+prompt
109
+ response = predict(
110
+ inputs=prompt, temperature=0.2, top_p=0.9, max_new_tokens=512
111
+ )
112
+ spl_tokens = ["<|im_start|>", "<|im_end|>"]
113
+ clean_prompt = prompt.replace(spl_tokens[0], "").replace(spl_tokens[1], "")
114
+ return response[len(clean_prompt):]
115
+
116
+
117
+ if __name__ == "__main__":
118
+ history = ""
119
+ while True:
120
+ user_input = input("User: ")
121
+ user_input = "<|im_start|>User\n"+user_input+"<|im_end|>\n<|im_start|>CRYSTAL\n"
122
+ result = perceptrix(history+user_input)
123
+ history += user_input + result + "<|im_end|>\n"
Perceptrix/engine.py CHANGED
@@ -1,7 +1,5 @@
1
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, GenerationConfig
2
- from Perceptrix.callbacks import Iteratorize, Stream
3
  from utils import setup_device
4
- import transformers
5
  import torch
6
  import tqdm
7
  import os
@@ -10,9 +8,6 @@ model_name = os.environ.get('LLM_MODEL')
10
 
11
  model_path = "models/CRYSTAL-instruct" if model_name == None else model_name
12
 
13
- config = transformers.AutoConfig.from_pretrained(
14
- model_name, trust_remote_code=True)
15
-
16
  device = setup_device()
17
 
18
  bnb_config = BitsAndBytesConfig(
@@ -25,7 +20,6 @@ bnb_config = BitsAndBytesConfig(
25
  model = AutoModelForCausalLM.from_pretrained(
26
  model_name,
27
  torch_dtype=torch.float32 if device == "cpu" else torch.bfloat16,
28
- config=config,
29
  device_map="auto",
30
  trust_remote_code=True,
31
  low_cpu_mem_usage=True,
@@ -36,6 +30,7 @@ model = AutoModelForCausalLM.from_pretrained(
36
  tokenizer = AutoTokenizer.from_pretrained(
37
  model_name,
38
  trust_remote_code=True,
 
39
  )
40
 
41
  PROMPT = '''### Instruction:
@@ -52,10 +47,6 @@ tokenizer = tokenizer
52
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
53
  model.eval()
54
 
55
- INSTRUCTION_KEY = "### Instruction:"
56
- RESPONSE_KEY = "### Response:"
57
- END_KEY = "### End"
58
-
59
 
60
  def evaluate(
61
  prompt='',
@@ -76,39 +67,6 @@ def evaluate(
76
  repetition_penalty=repetition_penalty,
77
  **kwargs,
78
  )
79
- generate_params = {
80
- "input_ids": input_ids,
81
- "generation_config": generation_config,
82
- "return_dict_in_generate": True,
83
- "output_scores": True,
84
- "max_new_tokens": max_new_tokens,
85
- }
86
-
87
- if stream_output:
88
- def generate_with_callback(callback=None, **kwargs):
89
- kwargs.setdefault(
90
- "stopping_criteria", transformers.StoppingCriteriaList()
91
- )
92
- kwargs["stopping_criteria"].append(
93
- Stream(callback_func=callback)
94
- )
95
- with torch.no_grad():
96
- model.generate(**kwargs)
97
-
98
- def generate_with_streaming(**kwargs):
99
- return Iteratorize(
100
- generate_with_callback, kwargs, callback=None
101
- )
102
-
103
- with generate_with_streaming(**generate_params) as generator:
104
- for output in generator:
105
- decoded_output = tokenizer.decode(output)
106
-
107
- if output[-1] in [tokenizer.eos_token_id]:
108
- break
109
-
110
- yield decoded_output.split("### Response:")[-1].strip()
111
- return
112
 
113
  with torch.no_grad():
114
  generation_output = model.generate(
@@ -151,21 +109,6 @@ def run_instruction(
151
  return response
152
 
153
 
154
- def perceptrix(prompt, stop=None):
155
- instructions = """You are Comprehensive Robotics Yielding Sophisticated Technology And Logistics (CRYSTAL), an AI robot developed by Vatsal Dutt to be the most advanced robot in the world. You will be provided with prompts and other information to help the user."""
156
- answer = ''.join(run_instruction(
157
- instructions,
158
- "User: "+prompt+"\nCRYSTAL:",
159
- temperature=0.6,
160
- top_p=0.6,
161
- top_k=200,
162
- repetition_penalty=1.1,
163
- max_new_tokens=256,
164
- stream_output=False,
165
- ))
166
- return answer
167
-
168
-
169
  def search_keyword(prompt):
170
  instructions = """Prompt:Time: Fri, 23 August 2023 2:30PM\nWeather: 73F\nHow many friends have I told you about?
171
  Search Keyword:Friends
@@ -277,8 +220,4 @@ if object_distance > 10:
277
  max_new_tokens=256,
278
  stream_output=False,
279
  ))
280
- return answer
281
-
282
-
283
- if __name__ == "__main__":
284
- perceptrix("Hello! How are you?")
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, GenerationConfig
 
2
  from utils import setup_device
 
3
  import torch
4
  import tqdm
5
  import os
 
8
 
9
  model_path = "models/CRYSTAL-instruct" if model_name == None else model_name
10
 
 
 
 
11
  device = setup_device()
12
 
13
  bnb_config = BitsAndBytesConfig(
 
20
  model = AutoModelForCausalLM.from_pretrained(
21
  model_name,
22
  torch_dtype=torch.float32 if device == "cpu" else torch.bfloat16,
 
23
  device_map="auto",
24
  trust_remote_code=True,
25
  low_cpu_mem_usage=True,
 
30
  tokenizer = AutoTokenizer.from_pretrained(
31
  model_name,
32
  trust_remote_code=True,
33
+ use_fast=True,
34
  )
35
 
36
  PROMPT = '''### Instruction:
 
47
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
48
  model.eval()
49
 
 
 
 
 
50
 
51
  def evaluate(
52
  prompt='',
 
67
  repetition_penalty=repetition_penalty,
68
  **kwargs,
69
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  with torch.no_grad():
72
  generation_output = model.generate(
 
109
  return response
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def search_keyword(prompt):
113
  instructions = """Prompt:Time: Fri, 23 August 2023 2:30PM\nWeather: 73F\nHow many friends have I told you about?
114
  Search Keyword:Friends
 
220
  max_new_tokens=256,
221
  stream_output=False,
222
  ))
223
+ return answer
 
 
 
 
Perceptrix/streamer.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from queue import Queue
2
+ from typing import TYPE_CHECKING, Optional
3
+
4
+
5
+ if TYPE_CHECKING:
6
+ from transformers.models.auto import AutoTokenizer
7
+
8
+
9
+ class BaseStreamer:
10
+ """
11
+ Base class from which `.generate()` streamers should inherit.
12
+ """
13
+
14
+ def put(self, value):
15
+ """Function that is called by `.generate()` to push new tokens"""
16
+ raise NotImplementedError()
17
+
18
+ def end(self):
19
+ """Function that is called by `.generate()` to signal the end of generation"""
20
+ raise NotImplementedError()
21
+
22
+
23
+ class TextStreamer(BaseStreamer):
24
+ """
25
+ Simple text streamer that prints the token(s) to stdout as soon as entire words are formed.
26
+
27
+ <Tip warning={true}>
28
+
29
+ The API for the streamer classes is still under development and may change in the future.
30
+
31
+ </Tip>
32
+
33
+ Parameters:
34
+ tokenizer (`AutoTokenizer`):
35
+ The tokenized used to decode the tokens.
36
+ skip_prompt (`bool`, *optional*, defaults to `False`):
37
+ Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
38
+ decode_kwargs (`dict`, *optional*):
39
+ Additional keyword arguments to pass to the tokenizer's `decode` method.
40
+
41
+ Examples:
42
+
43
+ ```python
44
+ >>> from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
45
+
46
+ >>> tok = AutoTokenizer.from_pretrained("gpt2")
47
+ >>> model = AutoModelForCausalLM.from_pretrained("gpt2")
48
+ >>> inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
49
+ >>> streamer = TextStreamer(tok)
50
+
51
+ >>> # Despite returning the usual output, the streamer will also print the generated text to stdout.
52
+ >>> _ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
53
+ An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
54
+ ```
55
+ """
56
+
57
+ def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, save_file="reply.txt", **decode_kwargs):
58
+ self.tokenizer = tokenizer
59
+ self.skip_prompt = skip_prompt
60
+ self.decode_kwargs = decode_kwargs
61
+
62
+ # variables used in the streaming process
63
+ self.token_cache = []
64
+ self.print_len = 0
65
+ self.next_tokens_are_prompt = True
66
+ self.save_file = save_file
67
+
68
+ def put(self, value):
69
+ """
70
+ Receives tokens, decodes them, and prints them to stdout as soon as they form entire words.
71
+ """
72
+ if len(value.shape) > 1 and value.shape[0] > 1:
73
+ raise ValueError("TextStreamer only supports batch size 1")
74
+ elif len(value.shape) > 1:
75
+ value = value[0]
76
+
77
+ if self.skip_prompt and self.next_tokens_are_prompt:
78
+ self.next_tokens_are_prompt = False
79
+ return
80
+
81
+ # Add the new token to the cache and decodes the entire thing.
82
+ self.token_cache.extend(value.tolist())
83
+ text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
84
+
85
+ # After the symbol for a new line, we flush the cache.
86
+ if text.endswith("\n"):
87
+ printable_text = text[self.print_len :]
88
+ self.token_cache = []
89
+ self.print_len = 0
90
+ # If the last token is a CJK character, we print the characters.
91
+ elif len(text) > 0 and self._is_chinese_char(ord(text[-1])):
92
+ printable_text = text[self.print_len :]
93
+ self.print_len += len(printable_text)
94
+ # Otherwise, prints until the last space char (simple heuristic to avoid printing incomplete words,
95
+ # which may change with the subsequent token -- there are probably smarter ways to do this!)
96
+ else:
97
+ printable_text = text[self.print_len : text.rfind(" ") + 1]
98
+ self.print_len += len(printable_text)
99
+
100
+ self.on_finalized_text(printable_text)
101
+
102
+ def end(self):
103
+ """Flushes any remaining cache and prints a newline to stdout."""
104
+ # Flush the cache, if it exists
105
+ if len(self.token_cache) > 0:
106
+ text = self.tokenizer.decode(self.token_cache, **self.decode_kwargs)
107
+ printable_text = text[self.print_len :]
108
+ self.token_cache = []
109
+ self.print_len = 0
110
+ else:
111
+ printable_text = ""
112
+
113
+ self.next_tokens_are_prompt = True
114
+ self.on_finalized_text(printable_text, stream_end=True)
115
+
116
+ def on_finalized_text(self, text: str, stream_end: bool = False):
117
+ """Prints the new text to stdout. If the stream is ending, also prints a newline."""
118
+ print(text, flush=True, end="" if not stream_end else None)
119
+ with open(f"./database/{self.save_file}", 'a') as reply:
120
+ reply.write(text)
121
+
122
+
123
+ def _is_chinese_char(self, cp):
124
+ """Checks whether CP is the codepoint of a CJK character."""
125
+ # This defines a "chinese character" as anything in the CJK Unicode block:
126
+ # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
127
+ #
128
+ # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
129
+ # despite its name. The modern Korean Hangul alphabet is a different block,
130
+ # as is Japanese Hiragana and Katakana. Those alphabets are used to write
131
+ # space-separated words, so they are not treated specially and handled
132
+ # like the all of the other languages.
133
+ if (
134
+ (cp >= 0x4E00 and cp <= 0x9FFF)
135
+ or (cp >= 0x3400 and cp <= 0x4DBF) #
136
+ or (cp >= 0x20000 and cp <= 0x2A6DF) #
137
+ or (cp >= 0x2A700 and cp <= 0x2B73F) #
138
+ or (cp >= 0x2B740 and cp <= 0x2B81F) #
139
+ or (cp >= 0x2B820 and cp <= 0x2CEAF) #
140
+ or (cp >= 0xF900 and cp <= 0xFAFF)
141
+ or (cp >= 0x2F800 and cp <= 0x2FA1F) #
142
+ ): #
143
+ return True
144
+
145
+ return False
146
+
147
+
148
+ class TextIteratorStreamer(TextStreamer):
149
+ """
150
+ Streamer that stores print-ready text in a queue, to be used by a downstream application as an iterator. This is
151
+ useful for applications that benefit from acessing the generated text in a non-blocking way (e.g. in an interactive
152
+ Gradio demo).
153
+
154
+ <Tip warning={true}>
155
+
156
+ The API for the streamer classes is still under development and may change in the future.
157
+
158
+ </Tip>
159
+
160
+ Parameters:
161
+ tokenizer (`AutoTokenizer`):
162
+ The tokenized used to decode the tokens.
163
+ skip_prompt (`bool`, *optional*, defaults to `False`):
164
+ Whether to skip the prompt to `.generate()` or not. Useful e.g. for chatbots.
165
+ timeout (`float`, *optional*):
166
+ The timeout for the text queue. If `None`, the queue will block indefinitely. Useful to handle exceptions
167
+ in `.generate()`, when it is called in a separate thread.
168
+ decode_kwargs (`dict`, *optional*):
169
+ Additional keyword arguments to pass to the tokenizer's `decode` method.
170
+
171
+ Examples:
172
+
173
+ ```python
174
+ >>> from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
175
+ >>> from threading import Thread
176
+
177
+ >>> tok = AutoTokenizer.from_pretrained("gpt2")
178
+ >>> model = AutoModelForCausalLM.from_pretrained("gpt2")
179
+ >>> inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
180
+ >>> streamer = TextIteratorStreamer(tok)
181
+
182
+ >>> # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
183
+ >>> generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=20)
184
+ >>> thread = Thread(target=model.generate, kwargs=generation_kwargs)
185
+ >>> thread.start()
186
+ >>> generated_text = ""
187
+ >>> for new_text in streamer:
188
+ ... generated_text += new_text
189
+ >>> generated_text
190
+ 'An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,'
191
+ ```
192
+ """
193
+
194
+ def __init__(
195
+ self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, timeout: Optional[float] = None, **decode_kwargs
196
+ ):
197
+ super().__init__(tokenizer, skip_prompt, **decode_kwargs)
198
+ self.text_queue = Queue()
199
+ self.stop_signal = None
200
+ self.timeout = timeout
201
+
202
+ def on_finalized_text(self, text: str, stream_end: bool = False):
203
+ """Put the new text in the queue. If the stream is ending, also put a stop signal in the queue."""
204
+ self.text_queue.put(text, timeout=self.timeout)
205
+ if stream_end:
206
+ self.text_queue.put(self.stop_signal, timeout=self.timeout)
207
+
208
+ def __iter__(self):
209
+ return self
210
+
211
+ def __next__(self):
212
+ value = self.text_queue.get(timeout=self.timeout)
213
+ if value == self.stop_signal:
214
+ raise StopIteration()
215
+ else:
216
+ return value
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Run crystal.py
2
+ Train LLM `pip install -e .` inside finetuning folder
3
+ Install Speaker Identification `pip install .` and `pip install -r requirements/requirements_lightning.txt requirements/requirements_asr.txt`
SoundScribe/speak.py CHANGED
@@ -9,8 +9,8 @@ import wave
9
 
10
  FRAMES_PER_BUFFER = 1000
11
  FORMAT = pyaudio.paInt16
12
- CHANNELS = 1
13
- RATE = 23500
14
 
15
 
16
  device = setup_device()
 
9
 
10
  FRAMES_PER_BUFFER = 1000
11
  FORMAT = pyaudio.paInt16
12
+ CHANNELS = 2
13
+ RATE = 26000
14
 
15
 
16
  device = setup_device()
SoundScribe/transcribe.py CHANGED
@@ -17,32 +17,33 @@ silence_duration = 0
17
  output_file = sf.SoundFile(
18
  'database/recording.wav', mode='w', samplerate=SAMPLE_RATE, channels=CHANNELS)
19
 
20
-
21
- model = whisper.load_model("base")
22
  transcription_in_progress = False
23
  queued = False
 
 
24
 
25
 
26
  def transcribe(audio):
 
 
 
27
  result = model.transcribe(audio)
28
  transcription = result['text']
29
  # user = find_user("database/recording.wav")
30
  user = "Vatsal"
31
- if user != "Crystal":
32
- with open('./database/input.txt', 'w', encoding="utf-8") as write_to:
33
- write_to.write(transcription[1:])
34
  return transcription, user
35
 
36
 
37
  def transcription():
38
  global transcription_in_progress
39
- transcription, user = transcribe_api('database/recording.wav')
 
40
  print("-"*100)
41
- print(f'Transcription: {transcription} from user {user}')
42
  print("-"*100)
43
  transcription_in_progress = False
44
 
45
- def listen(model, stream):
46
  global transcription_in_progress
47
  global queued
48
  global silence_duration
@@ -73,6 +74,8 @@ def listen(model, stream):
73
  transcription()
74
  queued = False
75
 
 
 
76
  silence_duration = 0
77
  output_file.close()
78
  audio_data = None
@@ -84,7 +87,7 @@ def live_listen():
84
  with sd.InputStream(channels=CHANNELS, blocksize=BLOCKSIZE, samplerate=SAMPLE_RATE) as stream:
85
  print("STARTING LIVE TRANSCRIPTION")
86
  while True:
87
- listen(model, stream)
88
 
89
 
90
  if __name__ == "__main__":
 
17
  output_file = sf.SoundFile(
18
  'database/recording.wav', mode='w', samplerate=SAMPLE_RATE, channels=CHANNELS)
19
 
 
 
20
  transcription_in_progress = False
21
  queued = False
22
+ first_run = True
23
+ transcription_text = ""
24
 
25
 
26
  def transcribe(audio):
27
+ if first_run:
28
+ model = whisper.load_model("base")
29
+ first_run = False
30
  result = model.transcribe(audio)
31
  transcription = result['text']
32
  # user = find_user("database/recording.wav")
33
  user = "Vatsal"
 
 
 
34
  return transcription, user
35
 
36
 
37
  def transcription():
38
  global transcription_in_progress
39
+ global transcription_text
40
+ transcription_text, user = transcribe_api('database/recording.wav')
41
  print("-"*100)
42
+ print(f'Transcription: {transcription_text} from user {user}')
43
  print("-"*100)
44
  transcription_in_progress = False
45
 
46
+ def listen(stream):
47
  global transcription_in_progress
48
  global queued
49
  global silence_duration
 
74
  transcription()
75
  queued = False
76
 
77
+ with open('./database/input.txt', 'w', encoding="utf-8") as write_to:
78
+ write_to.write(transcription_text[1:])
79
  silence_duration = 0
80
  output_file.close()
81
  audio_data = None
 
87
  with sd.InputStream(channels=CHANNELS, blocksize=BLOCKSIZE, samplerate=SAMPLE_RATE) as stream:
88
  print("STARTING LIVE TRANSCRIPTION")
89
  while True:
90
+ listen(stream)
91
 
92
 
93
  if __name__ == "__main__":
api_host.py CHANGED
@@ -21,7 +21,8 @@ app = Flask(__name__)
21
  def home():
22
  return jsonify({'message': 'WORKING'})
23
 
24
- def handle_request(func, *args):
 
25
  try:
26
  result = func(*args)
27
  return jsonify({'message': result})
@@ -36,7 +37,7 @@ def _locate_object():
36
  image_data = np.array(image_data, dtype=np.uint8)
37
  image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
38
  cv2.imwrite('API.jpg', image)
39
- return handle_request(locate_object, prompt, "API.jpg")
40
 
41
  @app.route('/vqa', methods=['POST', 'GET'])
42
  def _vqa():
@@ -45,7 +46,7 @@ def _vqa():
45
  image_data = np.array(image_data, dtype=np.uint8)
46
  image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
47
  cv2.imwrite('API.jpg', image)
48
- return handle_request(answer_question, prompt, "API.jpg")
49
 
50
  @app.route('/object_description', methods=['POST', 'GET'])
51
  def _object_description():
@@ -53,27 +54,27 @@ def _object_description():
53
  image_data = np.array(image_data, dtype=np.uint8)
54
  image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
55
  cv2.imwrite('API.jpg', image)
56
- return handle_request(find_object_description, "API.jpg")
57
 
58
  @app.route('/perceptrix', methods=['POST', 'GET'])
59
  def _perceptrix():
60
  prompt = request.json['prompt']
61
- return handle_request(perceptrix, prompt)
62
 
63
  @app.route('/robotix', methods=['POST', 'GET'])
64
  def _robotix():
65
  prompt = request.json['prompt']
66
- return handle_request(robotix, prompt)
67
 
68
  @app.route('/search_keyword', methods=['POST', 'GET'])
69
  def _search_keyword():
70
  prompt = request.json['prompt']
71
- return handle_request(search_keyword, prompt)
72
 
73
  @app.route('/identify_objects_from_text', methods=['POST', 'GET'])
74
  def _identify_objects_from_text():
75
  prompt = request.json['prompt']
76
- return handle_request(identify_objects_from_text, prompt)
77
 
78
  @app.route('/transcribe', methods=['POST', 'GET'])
79
  def _upload_audio():
 
21
  def home():
22
  return jsonify({'message': 'WORKING'})
23
 
24
+
25
+ def thread_task(func, *args):
26
  try:
27
  result = func(*args)
28
  return jsonify({'message': result})
 
37
  image_data = np.array(image_data, dtype=np.uint8)
38
  image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
39
  cv2.imwrite('API.jpg', image)
40
+ return thread_task(locate_object, prompt, "API.jpg")
41
 
42
  @app.route('/vqa', methods=['POST', 'GET'])
43
  def _vqa():
 
46
  image_data = np.array(image_data, dtype=np.uint8)
47
  image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
48
  cv2.imwrite('API.jpg', image)
49
+ return thread_task(answer_question, prompt, "API.jpg")
50
 
51
  @app.route('/object_description', methods=['POST', 'GET'])
52
  def _object_description():
 
54
  image_data = np.array(image_data, dtype=np.uint8)
55
  image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
56
  cv2.imwrite('API.jpg', image)
57
+ return thread_task(find_object_description, "API.jpg")
58
 
59
  @app.route('/perceptrix', methods=['POST', 'GET'])
60
  def _perceptrix():
61
  prompt = request.json['prompt']
62
+ return thread_task(perceptrix, prompt)
63
 
64
  @app.route('/robotix', methods=['POST', 'GET'])
65
  def _robotix():
66
  prompt = request.json['prompt']
67
+ return thread_task(robotix, prompt)
68
 
69
  @app.route('/search_keyword', methods=['POST', 'GET'])
70
  def _search_keyword():
71
  prompt = request.json['prompt']
72
+ return thread_task(search_keyword, prompt)
73
 
74
  @app.route('/identify_objects_from_text', methods=['POST', 'GET'])
75
  def _identify_objects_from_text():
76
  prompt = request.json['prompt']
77
+ return thread_task(identify_objects_from_text, prompt)
78
 
79
  @app.route('/transcribe', methods=['POST', 'GET'])
80
  def _upload_audio():
crystal.py CHANGED
@@ -16,7 +16,7 @@ if USE_CLOUD_API:
16
  from utils import answer_question, find_object_description, locate_object, perceptrix, robotix, identify_objects_from_text, search_keyword
17
  else:
18
  from CircumSpect import answer_question, find_object_description, locate_object
19
- from Perceptrix.engine import perceptrix, robotix, identify_objects_from_text, search_keyword
20
 
21
 
22
  SPEECH_COMMAND = "Speak"
@@ -30,7 +30,7 @@ AUTOMATION_COMAND = "Home Automation"
30
  weather = None
31
 
32
  device = setup_device()
33
- print("INITIALIZING CRYSTAL - DETECTED DEVICE:", str(device).upper(),
34
  "Acceleration" if str(device) != "cpu" else "")
35
 
36
 
@@ -103,7 +103,7 @@ def output(response, input_text):
103
 
104
  if helper:
105
  record_chat("Helper", helper)
106
- response = perceptrix(response+"\nHelper: "+helper)
107
  output(response)
108
 
109
 
@@ -168,16 +168,15 @@ while True:
168
 
169
  relevant_history += "\n"+"\n".join(full_history[-3:])
170
 
171
- relevant_history = f"{relevant_history}\n{username}: " + \
172
- "\n" + input_text + "\nCRYSTAL: "
173
- response = str(perceptrix(relevant_history))
174
  response = "<###CRYSTAL-INTERNAL###> Speech\n"+response
175
  with open("./database/input.txt", 'w') as clearfile:
176
  clearfile.write("")
177
 
178
  if current_events != "":
179
  print("CRYSTAL sees:", current_events)
180
- response = str(perceptrix(input_text))
181
  current_events = ""
182
 
183
- output(response, input_text)
 
16
  from utils import answer_question, find_object_description, locate_object, perceptrix, robotix, identify_objects_from_text, search_keyword
17
  else:
18
  from CircumSpect import answer_question, find_object_description, locate_object
19
+ from Perceptrix import perceptrix, robotix, identify_objects_from_text, search_keyword
20
 
21
 
22
  SPEECH_COMMAND = "Speak"
 
30
  weather = None
31
 
32
  device = setup_device()
33
+ print("INITIALIZING CRYSTAL - DEVICE:", str(device).upper(),
34
  "Acceleration" if str(device) != "cpu" else "")
35
 
36
 
 
103
 
104
  if helper:
105
  record_chat("Helper", helper)
106
+ response = perceptrix("<|im_start|>CRYSTAL\n"+response+"<|im_end|>\n<|im_start|>Helper\n"+helper+"\n<|im_start|>CRYSTAL\n")
107
  output(response)
108
 
109
 
 
168
 
169
  relevant_history += "\n"+"\n".join(full_history[-3:])
170
 
171
+ prompt = f"{relevant_history}\n<|im_start|>{username}" + input_text + "<|im_end|>\n<|im_start|>CRYSTAL\n"
172
+ response = perceptrix(prompt)
 
173
  response = "<###CRYSTAL-INTERNAL###> Speech\n"+response
174
  with open("./database/input.txt", 'w') as clearfile:
175
  clearfile.write("")
176
 
177
  if current_events != "":
178
  print("CRYSTAL sees:", current_events)
179
+ response = perceptrix(input_text)
180
  current_events = ""
181
 
182
+ output(response, input_text)
database/audio.wav CHANGED
Binary files a/database/audio.wav and b/database/audio.wav differ
 
database/chat_history.jsonl CHANGED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"ID": "Vatsal", "message": "What is your name"}
2
+ {"ID": "CRYSTAL", "message": "<###CRYSTAL-INTERNAL###> Speech\nMy name is CRYSTAL - Comprehensive Robotics Yielding Sophisticated Technologies and Logistics"}
database/current_frame.jpg CHANGED
database/current_frame_vqa.jpg ADDED
database/input.txt CHANGED
@@ -1 +1 @@
1
- Hello. Can you hear me?
 
1
+ Tell me what can you do, Crystal.
database/recording.wav CHANGED
Binary files a/database/recording.wav and b/database/recording.wav differ
 
internet.py CHANGED
@@ -156,4 +156,4 @@ if __name__ == "__main__":
156
 
157
  weather = f"{location} is {name} with {temperature} and {details}"
158
  print(weather)
159
- print(web_scraper("top news", True))
 
156
 
157
  weather = f"{location} is {name} with {temperature} and {details}"
158
  print(weather)
159
+ print(web_scraper("top news"))
robot.py CHANGED
@@ -53,6 +53,50 @@ def find(object):
53
  pass
54
 
55
  def grab():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  pass
57
 
58
 
 
53
  pass
54
 
55
  def grab():
56
+ """To find all components of quadrilateral ABCD, given sides AB, BC, CD, and angles A, B, and C, we can use the following steps:
57
+
58
+ Find the length of side AD.
59
+ We can use the law of cosines to find the length of AD:
60
+
61
+ AD^2 = AB^2 + BC^2 - 2 * AB * BC * cos(C)
62
+ AD = sqrt(AB^2 + BC^2 - 2 * AB * BC * cos(C))
63
+ AD = sqrt(192^2 + 116^2 - 2 * 192 * 116 * cos(118))
64
+ AD = 154.7
65
+ Find the angles of triangle ABC.
66
+ We can use the law of sines to find the angles of triangle ABC:
67
+
68
+ sin(A) / BC = sin(B) / AC
69
+ sin(C) / AB = sin(A) / AC
70
+ AC = sin(A) * BC / sin(B)
71
+ AC = sin(100) * 116 / sin(95)
72
+ AC = 120.7
73
+ sin(C) / AC = sin(B) / AB
74
+ BC = sin(C) * AC / sin(B)
75
+ BC = sin(118) * 120.7 / sin(95)
76
+ BC = 126.8
77
+ Now that we know the lengths of all sides of triangle ABC, we can use the law of cosines to find the angles B and C:
78
+
79
+ cos(B) = (AC^2 + AB^2 - BC^2) / (2 * AC * AB)
80
+ B = acos((AC^2 + AB^2 - BC^2) / (2 * AC * AB))
81
+ B = 92.9°
82
+ cos(C) = (BC^2 + AB^2 - AC^2) / (2 * BC * AB)
83
+ C = acos((BC^2 + AB^2 - AC^2) / (2 * BC * AB))
84
+ C = 119°
85
+ Find the angle of triangle ADC.
86
+ The angle of triangle ADC is the sum of angles A and B, minus 180 degrees:
87
+
88
+ ADC = A + B - 180°
89
+ ADC = 100 + 92.9 - 180°
90
+ ADC = -11.1°
91
+ Now that we know all of the components of quadrilateral ABCD, we can keep decreasing side CD by 1 unit until it is left with 70, and find the angles A, B, and C every time we decrease 1 unit.
92
+
93
+ To do this, we can use the following steps:
94
+
95
+ Update the length of side CD.
96
+ Find the new length of side AD using the law of cosines.
97
+ Find the new angles of triangle ABC using the law of sines and law of cosines.
98
+ Find the new angle of triangle ADC by subtracting 180 degrees from the sum of angles A and B.
99
+ We can repeat these steps until CD is equal to 70."""
100
  pass
101
 
102
 
utils.py CHANGED
@@ -9,9 +9,11 @@ import os
9
 
10
  API_URL = 'https://bceb7f41087d-7754001953109090881.ngrok-free.app/'
11
 
 
12
  def get_time():
13
  return datetime.datetime.now().strftime('%a %d %b %Y %I:%M %p')
14
 
 
15
  def load_chat():
16
  full_history = []
17
  sorted_list = []
@@ -22,14 +24,14 @@ def load_chat():
22
  id = chat_message['ID']
23
  message = chat_message['message']
24
  if id != prev_id:
25
- full_history.append(f"{id}: {message}\n")
26
  else:
27
  full_history[-1] += message+"\n"
28
 
29
  prev_id = id
30
 
31
  for chat in full_history:
32
- if chat.startswith("CRYSTAL: ") or chat.startswith("Helper: "):
33
  sorted_list[-1] += "\n"+chat
34
  else:
35
  sorted_list.append(chat)
@@ -40,7 +42,7 @@ def load_chat():
40
  def record_chat(role, message):
41
  new_message = {
42
  "ID": role,
43
- "message": message[0]
44
  }
45
 
46
  with open('./database/chat_history.jsonl', 'a') as history:
@@ -68,7 +70,7 @@ def check_api_usage():
68
  else:
69
  raise RuntimeError(
70
  "Unauthorized access! This action will be reported immediately!")
71
-
72
  return USE_CLOUD_API
73
 
74
 
@@ -114,10 +116,10 @@ def search_keyword(prompt):
114
 
115
  def answer_question(prompt, frame):
116
  url = API_URL+"vqa"
117
- if type(frame) == str:
118
- frame = cv2.imread(frame)
119
-
120
  _, image_data = cv2.imencode('.jpg', frame)
 
121
  image = image_data.tolist()
122
 
123
  payload = {'image': image,
@@ -167,6 +169,7 @@ def setup_device():
167
  device = torch.device("cpu")
168
  return device
169
 
 
170
  def transcribe(audio):
171
  url = API_URL + "transcribe"
172
  with open(audio, 'rb') as audio_file:
@@ -177,7 +180,4 @@ def transcribe(audio):
177
  print(transcription)
178
  # user = find_user("database/recording.wav")
179
  user = "Vatsal"
180
- if user != "Crystal":
181
- with open('./database/input.txt', 'w', encoding="utf-8") as write_to:
182
- write_to.write(transcription[1:])
183
  return transcription, user
 
9
 
10
  API_URL = 'https://bceb7f41087d-7754001953109090881.ngrok-free.app/'
11
 
12
+
13
  def get_time():
14
  return datetime.datetime.now().strftime('%a %d %b %Y %I:%M %p')
15
 
16
+
17
  def load_chat():
18
  full_history = []
19
  sorted_list = []
 
24
  id = chat_message['ID']
25
  message = chat_message['message']
26
  if id != prev_id:
27
+ full_history.append(f"<|im_start|>{id}\n{message}<|im_end|>\n")
28
  else:
29
  full_history[-1] += message+"\n"
30
 
31
  prev_id = id
32
 
33
  for chat in full_history:
34
+ if chat.startswith("<|im_start|>CRYSTAL") or chat.startswith("<|im_start|>Helper"):
35
  sorted_list[-1] += "\n"+chat
36
  else:
37
  sorted_list.append(chat)
 
42
  def record_chat(role, message):
43
  new_message = {
44
  "ID": role,
45
+ "message": message
46
  }
47
 
48
  with open('./database/chat_history.jsonl', 'a') as history:
 
70
  else:
71
  raise RuntimeError(
72
  "Unauthorized access! This action will be reported immediately!")
73
+
74
  return USE_CLOUD_API
75
 
76
 
 
116
 
117
  def answer_question(prompt, frame):
118
  url = API_URL+"vqa"
119
+ frame = cv2.imread(frame)
120
+
 
121
  _, image_data = cv2.imencode('.jpg', frame)
122
+
123
  image = image_data.tolist()
124
 
125
  payload = {'image': image,
 
169
  device = torch.device("cpu")
170
  return device
171
 
172
+
173
  def transcribe(audio):
174
  url = API_URL + "transcribe"
175
  with open(audio, 'rb') as audio_file:
 
180
  print(transcription)
181
  # user = find_user("database/recording.wav")
182
  user = "Vatsal"
 
 
 
183
  return transcription, user