feat: add vLLM setup script for modular installation

cmagina · cmagina · commit 0d542174a4c2 · 2026-03-30T17:18:50.000-04:00
Add scripts/setup_vllm.sh to support vLLM installation and configuration within containers. This script: - Downloads vLLM source from GitHub when not mounted as a volume - Installs build dependencies for vLLM compilation - Supports installing vLLM wheels from PyPI (release, nightly) - Provides flexible configuration via INSTALL_VLLM environment variable The script supports multiple installation modes: - source: Build from source (with auto-download if not mounted) - release/nightly: Install wheels from PyPI - skip: Skip vLLM installation This is part of the modular script architecture introduced in PR #115. Signed-off-by: Craig Magina <cmagina@redhat.com>
diff --git a/.github/workflows/amd-image.yml b/.github/workflows/amd-image.yml
@@ -15,6 +15,7 @@ on: # yamllint disable-line rule:truthy
       - scripts/setup_torch.sh
       - scripts/setup_triton.sh
       - scripts/setup_user.sh
+      - scripts/setup_vllm.sh
       - scripts/setup.sh
   pull_request:
     paths:
@@ -27,6 +28,7 @@ on: # yamllint disable-line rule:truthy
       - scripts/setup_helion.sh
       - scripts/setup_torch.sh
       - scripts/setup_triton.sh
+      - scripts/setup_vllm.sh
       - scripts/setup_user.sh
       - scripts/setup.sh
   schedule:
diff --git a/.github/workflows/cpu-image.yml b/.github/workflows/cpu-image.yml
@@ -14,6 +14,7 @@ on: # yamllint disable-line rule:truthy
       - scripts/setup_helion.sh
       - scripts/setup_torch.sh
       - scripts/setup_triton.sh
+      - scripts/setup_vllm.sh
       - scripts/setup_user.sh
       - scripts/setup.sh
   pull_request:
@@ -27,6 +28,7 @@ on: # yamllint disable-line rule:truthy
       - scripts/setup_helion.sh
       - scripts/setup_torch.sh
       - scripts/setup_triton.sh
+      - scripts/setup_vllm.sh
       - scripts/setup_user.sh
       - scripts/setup.sh
   schedule:
diff --git a/.github/workflows/nvidia-image.yml b/.github/workflows/nvidia-image.yml
@@ -14,6 +14,7 @@ on: # yamllint disable-line rule:truthy
       - scripts/setup_helion.sh
       - scripts/setup_torch.sh
       - scripts/setup_triton.sh
+      - scripts/setup_vllm.sh
       - scripts/setup_user.sh
       - scripts/setup.sh
   pull_request:
@@ -27,6 +28,7 @@ on: # yamllint disable-line rule:truthy
       - scripts/setup_helion.sh
       - scripts/setup_torch.sh
       - scripts/setup_triton.sh
+      - scripts/setup_vllm.sh
       - scripts/setup_user.sh
       - scripts/setup.sh
   schedule:
diff --git a/Makefile b/Makefile
@@ -46,11 +46,13 @@ INSTALL_NSIGHT ?=false
 llvm_path ?=
 helion_path ?=
 torch_path ?=
+vllm_path ?=
 user_path ?=
 INSTALL_HELION ?= skip # Options: release, source, skip
 INSTALL_LLVM ?= skip # Options: source, skip
 INSTALL_TORCH ?= skip # Options: nightly, release, source, skip, test
 INSTALL_TRITON ?= source # Options: release, source, skip
+INSTALL_VLLM ?= skip # Options: nightly, release, source, skip
 INSTALL_JUPYTER ?= true
 USE_CCACHE ?= 0
 CUDA_VERSION ?= 12-8
@@ -129,6 +131,9 @@ define run_container
 	if [ -n "$(torch_path)" ]; then \
 		volume_arg+=" -v $(torch_path):/workspace/torch$(SELINUXFLAG)"; \
 	fi; \
+	if [ -n "$(vllm_path)" ]; then \
+		volume_arg+=" -v $(vllm_path):/workspace/vllm$(SELINUXFLAG)"; \
+	fi; \
 	if [ -n "$(user_path)" ]; then \
 		volume_arg+=" -v $(user_path):/workspace/user$(SELINUXFLAG)"; \
 	fi; \
@@ -170,7 +175,7 @@ define run_container
 	if [ "$(CUSTOM_LLVM)" = "false" ]; then \
 		install_llvm="-e INSTALL_LLVM=$(INSTALL_LLVM)"; \
 	fi; \
-	env_vars="-e USERNAME=$(USER) -e TORCH_VERSION=$(torch_version) -e CUSTOM_LLVM=$(CUSTOM_LLVM) -e INSTALL_TOOLS=$(DEMO_TOOLS) -e INSTALL_JUPYTER=$(INSTALL_JUPYTER) -e NOTEBOOK_PORT=$(NOTEBOOK_PORT) -e INSTALL_HELION=$(INSTALL_HELION) -e INSTALL_TORCH=$(INSTALL_TORCH) -e INSTALL_TRITON=$(INSTALL_TRITON) -e USE_CCACHE=$(USE_CCACHE) -e MAX_JOBS=$(MAX_JOBS)"; \
+	env_vars="-e USERNAME=$(USER) -e TORCH_VERSION=$(torch_version) -e CUSTOM_LLVM=$(CUSTOM_LLVM) -e INSTALL_TOOLS=$(DEMO_TOOLS) -e INSTALL_JUPYTER=$(INSTALL_JUPYTER) -e NOTEBOOK_PORT=$(NOTEBOOK_PORT) -e INSTALL_HELION=$(INSTALL_HELION) -e INSTALL_TORCH=$(INSTALL_TORCH) -e INSTALL_TRITON=$(INSTALL_TRITON) -e INSTALL_VLLM=$(INSTALL_VLLM) -e USE_CCACHE=$(USE_CCACHE) -e MAX_JOBS=$(MAX_JOBS)"; \
 	if [ "$(create_user)" = "true" ]; then \
 		$(CTR_CMD) run -e CREATE_USER=$(create_user) $$env_vars $$install_llvm $$port_arg \
 		-e USER_UID=`id -u $(USER)` -e USER_GID=`id -g $(USER)` $$gpu_args $$profiling_args $$keep_ns_arg \
diff --git a/dockerfiles/Dockerfile.triton b/dockerfiles/Dockerfile.triton
@@ -69,6 +69,7 @@ COPY scripts/setup_helion.sh setup_helion.sh
 COPY scripts/setup_llvm.sh setup_llvm.sh
 COPY scripts/setup_torch.sh setup_torch.sh
 COPY scripts/setup_triton.sh setup_triton.sh
+COPY scripts/setup_vllm.sh setup_vllm.sh
 COPY scripts/setup_user.sh setup_user.sh
 COPY scripts/setup.sh setup.sh
 COPY scripts/install_software.sh install_software.sh
diff --git a/dockerfiles/Dockerfile.triton-amd b/dockerfiles/Dockerfile.triton-amd
@@ -87,6 +87,7 @@ COPY scripts/setup_helion.sh setup_helion.sh
 COPY scripts/setup_llvm.sh setup_llvm.sh
 COPY scripts/setup_torch.sh setup_torch.sh
 COPY scripts/setup_triton.sh setup_triton.sh
+COPY scripts/setup_vllm.sh setup_vllm.sh
 COPY scripts/setup_user.sh setup_user.sh
 COPY scripts/setup.sh setup.sh
 COPY scripts/install_software.sh install_software.sh
diff --git a/dockerfiles/Dockerfile.triton-cpu b/dockerfiles/Dockerfile.triton-cpu
@@ -66,6 +66,7 @@ COPY scripts/setup_helion.sh setup_helion.sh
 COPY scripts/setup_llvm.sh setup_llvm.sh
 COPY scripts/setup_torch.sh setup_torch.sh
 COPY scripts/setup_triton.sh setup_triton.sh
+COPY scripts/setup_vllm.sh setup_vllm.sh
 COPY scripts/setup_user.sh setup_user.sh
 COPY scripts/setup.sh setup.sh
 COPY scripts/install_software.sh install_software.sh
diff --git a/scripts/setup.sh b/scripts/setup.sh
@@ -30,6 +30,7 @@ declare -a SAVE_VARS=(
 	"INSTALL_TOOLS"
 	"INSTALL_TORCH"
 	"INSTALL_TRITON"
+	"INSTALL_VLLM"
 	"MAX_JOBS"
 	"PIP_TRITON_VERSION"
 	"ROCM_VERSION"
@@ -77,6 +78,10 @@ if [ "${INSTALL_TRITON:-skip}" != "skip" ]; then
 	run_as_user ./setup_triton.sh "$INSTALL_TRITON"
 fi
 
+if [ "${INSTALL_VLLM:-skip}" != "skip" ]; then
+	run_as_user ./setup_vllm.sh "$INSTALL_VLLM"
+fi
+
 if [ "${INSTALL_TORCH:-skip}" != "skip" ]; then
 	run_as_user ./setup_torch.sh "$INSTALL_TORCH"
 fi
diff --git a/scripts/setup_vllm.sh b/scripts/setup_vllm.sh
@@ -0,0 +1,207 @@
+#! /bin/bash -e
+
+trap "echo -e '\nScript interrupted. Exiting gracefully.'; exit 1" SIGINT
+
+# Copyright (C) 2024-2025 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+set -euo pipefail
+
+WORKSPACE=${WORKSPACE:-${HOME}}
+
+VLLM_REPO=https://github.com/vllm-project/vllm.git
+VLLM_DIR="${WORKSPACE}/vllm"
+
+declare -a PIP_INSTALL_ARGS
+PIP_VLLM_INDEX_URL_BASE=https://wheels.vllm.ai
+
+pip_install() {
+	if command -v uv &>/dev/null; then
+		uv pip install "$@"
+	else
+		pip install "$@"
+	fi
+}
+
+# Extract the major.minor version from ROCM_VERSION, e.g. 6.4 from 6.4.4
+get_rocm_version() {
+	[[ "$ROCM_VERSION" =~ ^([0-9]+\.[0-9]+) ]] && echo "${BASH_REMATCH[1]}" ||
+		echo "$ROCM_VERSION"
+}
+
+setup_src() {
+	if [ ! -d "${VLLM_DIR}" ]; then
+		echo "Cloning the vLLM repo $VLLM_REPO to $VLLM_DIR ..."
+		git clone "$VLLM_REPO" "$VLLM_DIR"
+
+		if [ ! -d "$VLLM_DIR" ]; then
+			echo "$VLLM_DIR not found. ERROR Cloning repository..."
+			exit 1
+		else
+			pushd "$VLLM_DIR" 1>/dev/null || exit 1
+			git submodule sync
+			git submodule update --init --recursive
+
+			if [ -n "${VLLM_GITREF:-}" ]; then
+				git checkout "$VLLM_GITREF"
+			fi
+
+			echo "Install pre-commit hooks into your local vLLM git repo (one-time)"
+			pip_install pre-commit
+			pre-commit install
+			popd 1>/dev/null
+		fi
+	else
+		echo "vLLM repo already present, not cloning ..."
+	fi
+}
+
+install_build_deps() {
+	pushd "$VLLM_DIR" 1>/dev/null || exit 1
+
+	if [ "${INSTALL_TORCH:-}" = "source" ]; then
+		echo "Using existing torch source build ..."
+		python use_existing_torch.py
+	else
+		echo "Installing Torch as a vLLM dependency ..."
+		"${WORKSPACE}"/setup_torch.sh release
+	fi
+
+	if [ -n "${CUDA_VERSION:-}" ]; then
+		VLLM_TARGET_DEVICE=cuda
+
+		if [ -e requirements/cuda.txt ]; then
+			echo "Installing vLLM CUDA build dependencies ..."
+			pip_install --prerelease=allow -r requirements/cuda.txt
+		fi
+	elif [ -n "${ROCM_VERSION:-}" ]; then
+		VLLM_TARGET_DEVICE=rocm
+
+		pip_install --upgrade numba \
+			scipy \
+			"huggingface-hub[cli,hf_transfer]" \
+			setuptools_scm
+
+		pip_install "numpy<2"
+
+		if [ -e requirements/rocm.txt ]; then
+			echo "Installing vLLM ROCm build dependencies ..."
+			pip_install --prerelease=allow -r requirements/rocm.txt
+		fi
+	elif [ "${TRITON_CPU_BACKEND:-0}" -eq 1 ]; then
+		VLLM_TARGET_DEVICE=cpu
+
+		if [ -e requirements/cpu.txt ]; then
+			echo "Installing vLLM CPU build dependencies ..."
+			pip_install --prerelease=allow -r requirements/cpu.txt
+		fi
+	fi
+
+	if [ -f requirements/build.txt ]; then
+		echo "Installing vLLM build dependencies ..."
+		pip_install --prerelease=allow -r requirements/build.txt
+	fi
+
+	popd 1>/dev/null
+
+	echo "Set the target device for vLLM build ..."
+	tee -a "${HOME}/.bashrc" <<EOF
+
+# Target device for vLLM build
+export VLLM_TARGET_DEVICE=$VLLM_TARGET_DEVICE
+EOF
+	echo "Run 'source ${HOME}/.bashrc' before building vLLM"
+}
+
+install_whl() {
+	echo "Installing vLLM from PyPI ..."
+
+	if [ -n "${PIP_VLLM_EXTRA_INDEX_URL:-}" ]; then
+		echo "Using the specified index, $PIP_VLLM_EXTRA_INDEX_URL"
+		PIP_INSTALL_ARGS+=("--index-url" "$PIP_VLLM_EXTRA_INDEX_URL")
+	elif [ -n "${VLLM_COMMIT:-}" ]; then
+		echo "Using the build from commit $VLLM_COMMIT ..."
+		PIP_VLLM_EXTRA_INDEX_URL="--extra-index-url ${PIP_VLLM_INDEX_URL_BASE}/${VLLM_COMMIT}"
+	elif command -v uv &>/dev/null; then
+		if [ -n "${UV_TORCH_BACKEND:-}" ]; then
+			echo "Using the specified uv backend, $UV_TORCH_BACKEND"
+		elif [ -n "${ROCM_VERSION:-}" ]; then
+			echo "Using the torch ROCm version $ROCM_VERSION backend"
+			UV_TORCH_BACKEND="rocm$(get_rocm_version)"
+		elif ((${TRITON_CPU_BACKEND:-0} == 1)); then
+			echo "Using the torch CPU backend"
+			UV_TORCH_BACKEND=cpu
+		elif [ -n "${CUDA_VERSION:-}" ]; then
+			echo "Using the torch CUDA version $CUDA_VERSION backend"
+			UV_TORCH_BACKEND="cu${CUDA_VERSION/[.-]/}"
+		else
+			echo "Using the torch auto backend"
+			UV_TORCH_BACKEND=auto
+		fi
+
+		PIP_INSTALL_ARGS+=("--torch-backend" "$UV_TORCH_BACKEND")
+	elif ! command -v uv &>/dev/null && [ -n "${UV_TORCH_BACKEND:-}" ]; then
+		echo "Error: UV_TORCH_BACKEND is set to $UV_TORCH_BACKEND but uv is not available."
+		exit 1
+	fi
+
+	if [ -n "${PIP_VLLM_VERSION:-}" ]; then
+		echo "Installing specified version $PIP_VLLM_VERSION"
+		PIP_VLLM_VERSION="==$PIP_VLLM_VERSION"
+	fi
+
+	pip_install -U --force-reinstall "${PIP_INSTALL_ARGS[@]}" "vllm${PIP_VLLM_VERSION:-}" \
+
+	# Fix up LD_LIBRARY_PATH for CUDA
+	"${WORKSPACE}"/ldpretend.sh
+}
+
+usage() {
+	cat >&2 <<EOF
+Usage: $(basename "$0") [COMMAND]
+    source     Download vLLM's source (if needed) and install the build deps
+    release    Install vLLM
+    nightly    Install the vLLM nightly wheel
+EOF
+}
+
+##
+## Main
+##
+
+if [ $# -ne 1 ]; then
+	usage
+	exit 1
+fi
+
+COMMAND=${1,,}
+
+case $COMMAND in
+source)
+	setup_src
+	install_build_deps
+	;;
+release)
+	install_whl
+	;;
+nightly)
+	PIP_VLLM_EXTRA_INDEX_URL="--extra-index-url ${PIP_VLLM_INDEX_URL_BASE}/nightly"
+	install_whl
+	;;
+*)
+	usage
+	exit 1
+	;;
+esac