查看状态
nvidia-smi
启动
启动 9B
nohup bash /home/szs/LLM-Deploy/scripts/start_qwen35_9b_sglang.sh >> /home/szs/LLM-Deploy/logs/sglang_9b.log 2>&1 & echo $! > /home/szs/LLM-Deploy/sglang_9b.pid
查日志 tail -f /home/szs/LLM-Deploy/logs/sglang_9b.log
停止
pkill -f ‘/home/szs/LLM-Deploy/scripts/start_qwen35_9b_sglang.sh’ # 发送 SIGTERM
pkill -9 -f ‘/home/szs/LLM-Deploy/scripts/start_qwen35_9b_sglang.sh’ # 必要时强制
启动 27B
nohup bash /home/szs/LLM-Deploy/scripts/start_qwen35_sglang.sh >> /home/szs/LLM-Deploy/logs/sglang.log 2>&1 & echo $! > /home/szs/LLM-Deploy/sglang.pid
查日志 tail -f /home/szs/LLM-Deploy/logs/sglang.log
停止
pkill -f ‘/home/szs/LLM-Deploy/scripts/start_qwen35_sglang.sh’ pkill -9 -f ‘/home/szs/LLM-Deploy/scripts/start_qwen35_sglang.sh’
请求
7B请求
win
curl.exe -s -X POST “http://127.0.0.1:30001/v1/chat/completions” -H “Content-Type: application/json” -H “Authorization: Bearer sk-7895” -d ”{“model”:“Qwen3.5-9B”,“messages”:[{“role”:“system”,“content”:“请说中文”},{“role”:“user”,“content”:“你是谁”}],“max_tokens”:964}“
Linux
curl -s -X POST “http://127.0.0.1:30001/v1/chat/completions” -H “Content-Type: application/json” -H “Authorization: Bearer sk-7895” -d ’{“model”:“Qwen3.5-9B”,“messages”:[{“role”:“system”,“content”:“请说中文”},{“role”:“user”,“content”:“你是谁”}],“max_tokens”:964}’ | jq .
27B请求
curl -s -X POST ‘http://127.0.0.1:30000/v1/chat/completions’
-H ‘Content-Type: application/json’
-H ‘Authorization: Bearer sk-7895’
-d ’{“model”:“Qwen3.5-27B”,“messages”:[{“role”:“user”,“content”:“你好”}],“max_tokens”:64}’ | jq .
curl -s -X POST “http://127.0.0.1:30000/v1/chat/completions” -H “Content-Type: application/json” -H “Authorization: Bearer sk-7895” -d ”{“model”:“Qwen3.5-27B”,“messages”:[{“role”:“user”,“content”:“你好”}],“max_tokens”:64}”