fix: support sglang>=0.5.4
Browse files- draft/qwen2.py +13 -2
draft/qwen2.py
CHANGED
|
@@ -43,7 +43,14 @@ from sglang.srt.layers.vocab_parallel_embedding import (
|
|
| 43 |
ParallelLMHead,
|
| 44 |
VocabParallelEmbedding,
|
| 45 |
)
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
|
| 48 |
from sglang.srt.model_loader.weight_utils import (
|
| 49 |
default_weight_loader,
|
|
@@ -273,7 +280,11 @@ class Qwen2Model(nn.Module):
|
|
| 273 |
config.vocab_size,
|
| 274 |
config.hidden_size,
|
| 275 |
quant_config=quant_config,
|
| 276 |
-
enable_tp=not
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
prefix=add_prefix("embed_tokens", prefix),
|
| 278 |
)
|
| 279 |
else:
|
|
|
|
| 43 |
ParallelLMHead,
|
| 44 |
VocabParallelEmbedding,
|
| 45 |
)
|
| 46 |
+
try:
|
| 47 |
+
from sglang.srt.managers.schedule_batch import global_server_args_dict
|
| 48 |
+
except ImportError:
|
| 49 |
+
global_server_args_dict = None
|
| 50 |
+
try:
|
| 51 |
+
from sglang.srt.server_args import get_global_server_args
|
| 52 |
+
except ImportError:
|
| 53 |
+
get_global_server_args = None
|
| 54 |
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
|
| 55 |
from sglang.srt.model_loader.weight_utils import (
|
| 56 |
default_weight_loader,
|
|
|
|
| 280 |
config.vocab_size,
|
| 281 |
config.hidden_size,
|
| 282 |
quant_config=quant_config,
|
| 283 |
+
enable_tp=not (
|
| 284 |
+
global_server_args_dict["enable_dp_attention"]
|
| 285 |
+
if global_server_args_dict is not None
|
| 286 |
+
else get_global_server_args().enable_dp_attention
|
| 287 |
+
),
|
| 288 |
prefix=add_prefix("embed_tokens", prefix),
|
| 289 |
)
|
| 290 |
else:
|