We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 922778e commit 09366e6Copy full SHA for 09366e6
skypilot-tmp.yaml
@@ -12,6 +12,7 @@ envs:
12
MODEL_NAME: Qwen/Qwen2.5-VL-32B-Instruct-AWQ
13
HF_TOKEN: TOKEN_HERE
14
VLLM_USE_V1: 1
15
+ PYTHONUTF8: 1
16
17
resources:
18
cloud: gcp
@@ -49,4 +50,14 @@ run: |
49
50
--model $MODEL_NAME \
51
--host 0.0.0.0 \
52
--max-model-len 64000 \
- --limit-mm-per-prompt "image=15"
53
+ --limit-mm-per-prompt '{"image":15}'
54
+
55
+# curl http://SERVER_IP:8000/v1/chat/completions \
56
+# -H "Content-Type: application/json" \
57
+# -d '{
58
+# "model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
59
+# "messages": [
60
+# {"role": "system", "content": "You are a helpful assistant."},
61
+# {"role": "user", "content": "What is 2 + 2?"}
62
+# ]
63
+# }'
0 commit comments