-
Notifications
You must be signed in to change notification settings - Fork 21
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
52 lines (49 loc) · 1.15 KB
/
docker-compose.yml
File metadata and controls
52 lines (49 loc) · 1.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Not using networks as they seems to break on WSLv2
services:
oramacore:
build:
context: .
environment:
- RUST_LOG=oramacore=trace,warn
volumes:
- ./config-docker.yaml:/app/config.yaml
ports:
- "8080:8080"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [ gpu ]
depends_on:
- python-ai-server
- vllm
restart: unless-stopped
vllm:
image: vllm/vllm-openai:v0.8.3
command: --model Qwen/Qwen2.5-3B-Instruct --host 0.0.0.0 --port 8000 --enable-auto-tool-choice --tool-call-parser hermes
ports:
- "8000:8000"
environment:
- HF_TOKEN=${HF_TOKEN}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [ gpu ]
restart: unless-stopped
envoy:
image: envoyproxy/envoy:v1.26-latest
ports:
- "80:80"
- "9901:9901"
volumes:
- ./envoy/envoy.yaml:/etc/envoy/envoy.yaml
depends_on:
- oramacore
- python-ai-server
- vllm
restart: unless-stopped