Spaces:
Paused
Paused
:boom: [Fix] slow tokenizer issue, and finish by stop_sequences
Browse files
messagers/message_composer.py
CHANGED
|
@@ -152,7 +152,10 @@ class MessageComposer:
|
|
| 152 |
# https://huggingface.co/openchat/openchat-3.5-0106
|
| 153 |
# https://huggingface.co/01-ai/Yi-1.5-34B-Chat
|
| 154 |
elif self.model in ["openchat-3.5", "command-r-plus", "gemma-7b", "yi-1.5-34b"]:
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
| 156 |
self.merged_str = tokenizer.apply_chat_template(
|
| 157 |
messages, tokenize=False, add_generation_prompt=True
|
| 158 |
)
|
|
|
|
| 152 |
# https://huggingface.co/openchat/openchat-3.5-0106
|
| 153 |
# https://huggingface.co/01-ai/Yi-1.5-34B-Chat
|
| 154 |
elif self.model in ["openchat-3.5", "command-r-plus", "gemma-7b", "yi-1.5-34b"]:
|
| 155 |
+
# https://discuss.huggingface.co/t/error-with-new-tokenizers-urgent/2847/5
|
| 156 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 157 |
+
self.model_fullname, use_fast=False
|
| 158 |
+
)
|
| 159 |
self.merged_str = tokenizer.apply_chat_template(
|
| 160 |
messages, tokenize=False, add_generation_prompt=True
|
| 161 |
)
|
networks/huggingface_streamer.py
CHANGED
|
@@ -157,15 +157,15 @@ class HuggingfaceStreamer:
|
|
| 157 |
|
| 158 |
content = self.parse_line(line)
|
| 159 |
|
| 160 |
-
if content.strip() == self.stop_sequences:
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
else:
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
|
| 170 |
output = self.message_outputer.output(
|
| 171 |
content=content, content_type=content_type
|
|
|
|
| 157 |
|
| 158 |
content = self.parse_line(line)
|
| 159 |
|
| 160 |
+
# if content.strip() == self.stop_sequences:
|
| 161 |
+
# content_type = "Finished"
|
| 162 |
+
# logger.success("\n[Finished]")
|
| 163 |
+
# is_finished = True
|
| 164 |
+
# else:
|
| 165 |
+
content_type = "Completions"
|
| 166 |
+
if line_count == 1:
|
| 167 |
+
content = content.lstrip()
|
| 168 |
+
logger.back(content, end="")
|
| 169 |
|
| 170 |
output = self.message_outputer.output(
|
| 171 |
content=content, content_type=content_type
|