12 lines
576 B
Markdown
12 lines
576 B
Markdown
|
|
|
||
|
|
virtualenv vllm-env
|
||
|
|
|
||
|
|
source vllm-env/bin/activate deactivate
|
||
|
|
|
||
|
|
export http_proxy=http://172.28.21.225:7890
|
||
|
|
export https_proxy=http://172.28.21.225:7890
|
||
|
|
|
||
|
|
pip install -U vllm --pre --index-url https://pypi.org/simple --extra-index-url https://wheels.vllm.ai/nightly
|
||
|
|
pip install git+https://github.com/huggingface/transformers.git
|
||
|
|
|
||
|
|
vllm serve zai-org/GLM-4.7-Flash --tensor-parallel-size 1 --speculative-config.method mtp --speculative-config.num_speculative_tokens 1 --tool-call-parser glm47 --reasoning-parser glm45 --enable-auto-tool-choice --served-model-name glm-4.7-flash
|