Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ jobs:
if: success()
run: |
echo "🔐 Running admin tests..."
uv run pytest -n auto -m "need_admin" --reruns 1
uv run pytest -n auto -m "need_admin" --reruns 1
6 changes: 4 additions & 2 deletions .github/workflows/python-pubulish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
uses: astral-sh/setup-uv@v2
with:
version: "latest"
enable-cache: true
enable-cache: true

- name: Set up Python 3.11
run: uv python install 3.11
Expand All @@ -24,6 +24,8 @@ jobs:
uv sync --all-extras --all-groups

- name: Run tests
env:
ROCK_SERVICE_STATUS_DIR: /tmp/service_status
run: |
uv run pytest -m "not need_ray and not need_admin" --reruns 1 -n auto

Expand All @@ -33,7 +35,7 @@ jobs:
if: github.event_name == 'release' && github.event.action == 'published'
permissions:
contents: read
id-token: write
id-token: write
steps:
- uses: actions/checkout@v4

Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
authors = [{ name = "chatos@alibaba" }]
requires-python = "<4.0,>=3.10"
name = "rl-rock"
version = "1.0.1"
version = "1.0.2"
description = "ROCK-Reinforcement Open Construction Kit"
readme = "README.md"
dependencies = [
Expand All @@ -26,6 +26,7 @@ dependencies = [
"rich",
"oss2",
"pyyaml",
"tzdata",
]

[project.optional-dependencies]
Expand Down
28 changes: 15 additions & 13 deletions requirements_admin.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ aiosqlite==0.21.0
# via rl-rock
alibabacloud-cr20181201==2.0.5
# via rl-rock
alibabacloud-credentials==1.0.2
alibabacloud-credentials==1.0.3
# via
# alibabacloud-gateway-pop
# alibabacloud-gateway-spi
Expand Down Expand Up @@ -97,7 +97,7 @@ anyio==4.11.0
# httpx
# rl-rock
# starlette
apscheduler==3.11.0
apscheduler==3.11.1
# via
# alibabacloud-credentials
# rl-rock
Expand All @@ -107,7 +107,7 @@ async-timeout==5.0.1 ; python_full_version < '3.11.3'
# via
# aiohttp
# redis
attrs==25.3.0
attrs==25.4.0
# via
# aiohttp
# jsonschema
Expand All @@ -119,26 +119,26 @@ bashlex==0.18
# via rl-rock
bfi==1.0.4
# via reasoning-gym
boto3==1.40.61
boto3==1.40.66
# via rl-rock
botocore==1.40.62
botocore==1.40.66
# via
# boto3
# s3transfer
build==1.2.2.post1
build==1.3.0
# via rl-rock
cachetools==5.5.2
cachetools==6.2.1
# via google-auth
cellpylib==2.4.0
# via reasoning-gym
certifi==2025.7.14
certifi==2025.10.5
# via
# httpcore
# httpx
# requests
cffi==2.0.0
# via cryptography
charset-normalizer==3.4.2
charset-normalizer==3.4.4
# via requests
click==8.3.0
# via
Expand All @@ -151,7 +151,7 @@ colorama==0.4.6 ; os_name == 'nt' or sys_platform == 'win32'
# click
# colorful
# tqdm
colorful==0.5.7
colorful==0.5.8
# via ray
constantly==23.10.4
# via twisted
Expand All @@ -170,7 +170,7 @@ cryptography==39.0.1
# rl-rock
cycler==0.12.1
# via matplotlib
darabonba-core==1.0.3
darabonba-core==1.0.4
# via alibabacloud-tea-openapi
distlib==0.4.0
# via virtualenv
Expand Down Expand Up @@ -497,8 +497,10 @@ typing-extensions==4.15.0
# virtualenv
typing-inspection==0.4.2
# via pydantic
tzdata==2025.2 ; sys_platform == 'win32'
# via tzlocal
tzdata==2025.2
# via
# rl-rock
# tzlocal
tzlocal==5.3.1
# via apscheduler
urllib3==2.5.0
Expand Down
12 changes: 7 additions & 5 deletions requirements_sandbox_actor.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ certifi==2025.10.5
# httpcore
# httpx
# requests
cffi==2.0.0 ; platform_python_implementation != 'PyPy'
cffi==2.0.0
# via cryptography
charset-normalizer==3.4.4
# via requests
Expand All @@ -118,7 +118,7 @@ contourpy==1.3.3 ; python_full_version >= '3.11'
# via matplotlib
crcmod==1.7
# via oss2
cryptography==44.0.3
cryptography==39.0.1
# via
# alibabacloud-darabonba-signature-util
# alibabacloud-openapi-util
Expand Down Expand Up @@ -266,7 +266,7 @@ psutil==7.1.3
# via nacos-sdk-python
pycosat==0.6.6
# via reasoning-gym
pycparser==2.23 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
pycparser==2.23 ; implementation_name != 'PyPy'
# via cffi
pycryptodome==3.23.0
# via
Expand Down Expand Up @@ -353,8 +353,10 @@ typing-extensions==4.15.0
# typing-inspection
typing-inspection==0.4.2
# via pydantic
tzdata==2025.2 ; sys_platform == 'win32'
# via tzlocal
tzdata==2025.2
# via
# rl-rock
# tzlocal
tzlocal==5.3.1
# via apscheduler
urllib3==2.5.0
Expand Down
5 changes: 5 additions & 0 deletions rock/admin/entrypoints/sandbox_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
SandboxWriteFileRequest,
)
from rock.admin.proto.response import SandboxStartResponse
from rock.common.constants import GET_STATUS_SWITCH
from rock.deployments.config import DockerDeploymentConfig
from rock.sandbox.sandbox_manager import SandboxManager
from rock.utils import handle_exceptions
Expand Down Expand Up @@ -83,6 +84,10 @@ async def get_sandbox_statistics(sandbox_id: str):
@sandbox_router.get("/get_status")
@handle_exceptions(error_message="get sandbox status failed")
async def get_status(sandbox_id: str):
if sandbox_manager.rock_config.nacos_provider is None or sandbox_manager.rock_config.nacos_provider.get_switch_status(
GET_STATUS_SWITCH
):
return RockResponse(result=await sandbox_manager.get_status_v2(sandbox_id))
return RockResponse(result=await sandbox_manager.get_status(sandbox_id))


Expand Down
1 change: 0 additions & 1 deletion rock/admin/entrypoints/sandbox_proxy_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def set_sandbox_proxy_service(service: SandboxProxyService):
global sandbox_proxy_service
sandbox_proxy_service = service


@sandbox_proxy_router.post("/execute")
@handle_exceptions(error_message="execute command failed")
async def execute(command: SandboxCommand) -> RockResponse[CommandResponse]:
Expand Down
2 changes: 1 addition & 1 deletion rock/admin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import uvicorn
from fastapi import FastAPI, Request
from starlette.middleware.cors import CORSMiddleware
from starlette.responses import JSONResponse, Response, StreamingResponse
from starlette.responses import JSONResponse

from rock import env_vars
from rock.admin.core.ray_service import RayService
Expand Down
1 change: 1 addition & 0 deletions rock/common/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GET_STATUS_SWITCH = "get_status_v2_enabled"
5 changes: 3 additions & 2 deletions rock/deployments/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from rock.deployments.hooks.abstract import CombinedDeploymentHook, DeploymentHook
from rock.deployments.runtime_env import DockerRuntimeEnv, LocalRuntimeEnv, PipRuntimeEnv, UvRuntimeEnv
from rock.deployments.sandbox_validator import DockerSandboxValidator
from rock.deployments.status import ServiceStatus
from rock.deployments.status import PersistedServiceStatus, ServiceStatus
from rock.logger import init_logger
from rock.rocklet import PACKAGE_NAME, REMOTE_EXECUTABLE_NAME
from rock.rocklet.exceptions import DeploymentNotStartedError, DockerPullError
Expand Down Expand Up @@ -62,7 +62,7 @@ def __init__(
self._stop_time = datetime.datetime.now() + datetime.timedelta(minutes=self._config.auto_clear_time)
self._check_stop_task = None
self._container_name = None
self._service_status = ServiceStatus()
self._service_status = PersistedServiceStatus()
if self._config.container_name:
self.set_container_name(self._config.container_name)
if env_vars.ROCK_WORKER_ENV_TYPE == "docker":
Expand Down Expand Up @@ -275,6 +275,7 @@ async def start(self):

if self._container_name is None:
self.set_container_name(self._get_container_name())
self._service_status.init_status_path(sandbox_id=self._container_name)
executor = get_executor()
loop = asyncio.get_running_loop()
await loop.run_in_executor(executor, self._pull_image)
Expand Down
57 changes: 57 additions & 0 deletions rock/deployments/status.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import json
import os
from typing import Any

from pydantic import BaseModel, Field

from rock import env_vars
from rock.deployments.constants import Status


Expand Down Expand Up @@ -68,3 +71,57 @@ def from_dict(cls, data: dict[str, Any]) -> "ServiceStatus":
port_mapping[int(port_value)] = mapping

return cls(phases=phases, port_mapping=port_mapping)

@classmethod
def from_content(cls, content: str) -> "ServiceStatus":
"""Create ServiceStatus from JSON file."""
try:
data = json.loads(content)
service_status = cls.from_dict(data)
return service_status
except Exception as e:
raise Exception(f"parse service status failed:{str(e)}")


class PersistedServiceStatus(ServiceStatus):
json_path: str | None = None

def init_status_path(self, sandbox_id: str):
self.json_path = PersistedServiceStatus.gen_service_status_path(sandbox_id)
os.makedirs(os.path.dirname(self.json_path), exist_ok=True)

def _save_to_file(self):
"""Save ServiceStatus to the file specified by json_path"""
if self.json_path:
try:
with open(self.json_path, "w") as f:
json.dump(self.to_dict(), f, indent=2)
except Exception as e:
# Error handling to prevent file write failures from affecting the main process
raise Exception(f"save service status failed: {str(e)}")

def add_phase(self, phase_name: str, status: PhaseStatus):
super().add_phase(phase_name, status)
self._save_to_file()

def update_status(self, phase_name: str, status: Status, message: str):
super().update_status(phase_name, status, message)
self._save_to_file()

def add_port_mapping(self, local_port: int, container_port: int):
super().add_port_mapping(local_port, container_port)
self._save_to_file()

@classmethod
def from_content(cls, content: str) -> "ServiceStatus":
"""Create ServiceStatus from JSON file."""
try:
data = json.loads(content)
service_status = cls.from_dict(data)
return service_status
except Exception as e:
raise Exception(f"parse service status failed:{str(e)}")

@staticmethod
def gen_service_status_path(sandbox_id: str) -> str:
return f"{env_vars.ROCK_SERVICE_STATUS_DIR}/{sandbox_id}.json"
4 changes: 4 additions & 0 deletions rock/env_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
ROCK_LOGGING_PATH: str | None = None
ROCK_LOGGING_FILE_NAME: str | None = None
ROCK_LOGGING_LEVEL: str | None = None
ROCK_SERVICE_STATUS_DIR: str | None = None
ROCK_CONFIG: str | None = None
ROCK_CONFIG_DIR_NAME: str | None = None
ROCK_BASE_URL: str | None = "http://localhost:8080"
ROCK_WORKER_ROCKLET_PORT: int | None = None
ROCK_SANDBOX_STARTUP_TIMEOUT_SECONDS: int = 180
ROCK_CODE_SANDBOX_BASE_URL: str | None = None
ROCK_ENVHUB_BASE_URL: str | None = "http://localhost:8081"
Expand Down Expand Up @@ -59,9 +61,11 @@
"ROCK_LOGGING_PATH": lambda: os.getenv("ROCK_LOGGING_PATH"),
"ROCK_LOGGING_FILE_NAME": lambda: os.getenv("ROCK_LOGGING_FILE_NAME", "rocklet.log"),
"ROCK_LOGGING_LEVEL": lambda: os.getenv("ROCK_LOGGING_LEVEL", "INFO"),
"ROCK_SERVICE_STATUS_DIR": lambda: os.getenv("ROCK_SERVICE_STATUS_DIR", "/data/service_status"),
"ROCK_CONFIG": lambda: os.getenv("ROCK_CONFIG"),
"ROCK_CONFIG_DIR_NAME": lambda: os.getenv("ROCK_CONFIG_DIR_NAME", "rock-conf"),
"ROCK_BASE_URL": lambda: os.getenv("ROCK_BASE_URL", "http://localhost:8080"),
"ROCK_WORKER_ROCKLET_PORT": lambda: int(val) if (val := os.getenv("ROCK_WORKER_ROCKLET_PORT")) else None,
"ROCK_SANDBOX_STARTUP_TIMEOUT_SECONDS": lambda: int(os.getenv("ROCK_SANDBOX_STARTUP_TIMEOUT_SECONDS", "180")),
"ROCK_CODE_SANDBOX_BASE_URL": lambda: os.getenv("ROCK_CODE_SANDBOX_BASE_URL", ""),
"ROCK_ENVHUB_BASE_URL": lambda: os.getenv("ROCK_ENVHUB_BASE_URL", "http://localhost:8081"),
Expand Down
4 changes: 2 additions & 2 deletions rock/sandbox/base_manager.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import asyncio
import time

import ray
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
import ray

from rock.admin.core.redis_key import ALIVE_PREFIX
from rock.admin.metrics.constants import MetricsConstants
Expand Down Expand Up @@ -125,7 +125,7 @@ async def _collect_system_resource_metrics(self):
available_cpu = available_resources.get("CPU", 0)
available_mem = available_resources.get("memory", 0) / 1024**3
return total_cpu, total_mem, available_cpu, available_mem

async def _collect_sandbox_meta(self) -> tuple[int, dict[str, dict[str, str]]]:
meta: dict = {}
cnt = 0
Expand Down
1 change: 1 addition & 0 deletions rock/sandbox/gem_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
EnvStepRequest,
EnvStepResponse,
)
from rock.admin.core.ray_service import RayService
from rock.admin.proto.response import SandboxStartResponse, SandboxStatusResponse
from rock.config import RockConfig
from rock.deployments.config import DockerDeploymentConfig
Expand Down
Loading
Loading