Spaces:
Sleeping
Sleeping
Stefan
commited on
Commit
·
5b6e243
1
Parent(s):
e250f84
fix(spaces): remove types
Browse files- embedding.py +1 -1
- processing.py +2 -2
embedding.py
CHANGED
@@ -8,7 +8,7 @@ model = AutoModel.from_pretrained("intfloat/e5-large-v2")
|
|
8 |
EMBEDDING_CHAR_LIMIT = 512
|
9 |
|
10 |
|
11 |
-
def average_pool(last_hidden_states: Tensor, attention_mask: Tensor)
|
12 |
last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
|
13 |
return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
|
14 |
|
|
|
8 |
EMBEDDING_CHAR_LIMIT = 512
|
9 |
|
10 |
|
11 |
+
def average_pool(last_hidden_states: Tensor, attention_mask: Tensor):
|
12 |
last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
|
13 |
return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
|
14 |
|
processing.py
CHANGED
@@ -30,7 +30,7 @@ def unmark(text):
|
|
30 |
return __md.convert(text)
|
31 |
|
32 |
|
33 |
-
def clean_md(text: str)
|
34 |
cleantext = re.sub(HTMLR, "", text)
|
35 |
cleantext = re.sub(LIGHTGALLERY, "", cleantext)
|
36 |
para = cleantext.split("\n#")
|
@@ -42,7 +42,7 @@ def clean_md(text: str) -> list[str]:
|
|
42 |
start_seq_length = num_tokens_from_str("passage: ")
|
43 |
|
44 |
|
45 |
-
def truncate_to_sequences(text: str, max_char=EMBEDDING_CHAR_LIMIT)
|
46 |
sequence_length = num_tokens_from_str(text) // (max_char - start_seq_length) + 1
|
47 |
length = len(text)
|
48 |
separator = length // sequence_length
|
|
|
30 |
return __md.convert(text)
|
31 |
|
32 |
|
33 |
+
def clean_md(text: str):
|
34 |
cleantext = re.sub(HTMLR, "", text)
|
35 |
cleantext = re.sub(LIGHTGALLERY, "", cleantext)
|
36 |
para = cleantext.split("\n#")
|
|
|
42 |
start_seq_length = num_tokens_from_str("passage: ")
|
43 |
|
44 |
|
45 |
+
def truncate_to_sequences(text: str, max_char=EMBEDDING_CHAR_LIMIT):
|
46 |
sequence_length = num_tokens_from_str(text) // (max_char - start_seq_length) + 1
|
47 |
length = len(text)
|
48 |
separator = length // sequence_length
|