meta-pytorch
diff --git a/‎.github/workflows/docker-build.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/docker-build.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/environments.md‎
Lines changed: 19 additions & 0 deletions b/‎docs/environments.md‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎docs/environments/chess.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/environments/chess.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎envs/chess_env/README.md‎
Lines changed: 112 additions & 0 deletions b/‎envs/chess_env/README.md‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎envs/chess_env/__init__.py‎
Lines changed: 34 additions & 0 deletions b/‎envs/chess_env/__init__.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎envs/chess_env/client.py‎
Lines changed: 101 additions & 0 deletions b/‎envs/chess_env/client.py‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎envs/chess_env/models.py‎
Lines changed: 69 additions & 0 deletions b/‎envs/chess_env/models.py‎
Lines changed: 69 additions & 0 deletions
@@ -81,6 +81,8 @@ jobs:
             dockerfile: envs/git_env/server/Dockerfile
           - name: connect4_env
             dockerfile: envs/connect4_env/server/Dockerfile
+          - name: chess-env
+            dockerfile: envs/chess_env/server/Dockerfile
           - name: tbench2-env
             dockerfile: envs/tbench2_env/server/Dockerfile
           - name: textarena-env
 
@@ -33,7 +33,7 @@ jobs:
       - name: Install dependencies
         run: |
           uv sync --all-extras
-          uv pip install pytest numpy nltk smolagents
+          uv pip install pytest numpy nltk smolagents python-chess moonfish
 
       - name: Run tests
         run: |
 
@@ -205,6 +205,25 @@ The OpenEnv community has built a catalog of ready-to-run environments that cove
     </div>
   </div>
 
+  <div class="environment-card">
+    <div class="environment-card__body">
+      <span class="environment-card__tag">Chess</span>
+      <p class="environment-card__description">
+        Chess RL environment powered by the moonfish engine with configurable opponents, PSQT evaluation, and full rules support.
+      </p>
+    </div>
+    <div class="environment-card__links">
+      <a class="environment-card__icon" href="/OpenEnv/environments/chess/" aria-label="Chess docs">
+        <svg viewBox="0 0 24 24" aria-hidden="true" focusable="false">
+          <path d="M6 3c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h12c1.1 0 2-.9 2-2V9l-6-6H6zm8 1.5L18.5 9H14V4.5z" fill="currentColor"/>
+        </svg>
+      </a>
+      <a class="environment-card__icon environment-card__icon--hf" href="https://huggingface.co/spaces/luccabb/moonfish_chess" target="_blank" rel="noreferrer noopener" aria-label="Chess on Hugging Face">
+        <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="" aria-hidden="true" />
+      </a>
+    </div>
+  </div>
+
   <div class="environment-card">
     <div class="environment-card__body">
       <span class="environment-card__tag">Unity</span>
 
@@ -0,0 +1,2 @@
+--8<-- "../../envs/chess_env/README.md"
+
@@ -0,0 +1,112 @@
+# Chess Environment
+
+A chess reinforcement learning environment for OpenEnv, powered by the [moonfish](https://github.com/luccabb/moonfish) chess engine.
+
+## Features
+
+- **Full chess rules** via python-chess library
+- **Configurable opponent**: moonfish engine, random moves, or self-play
+- **Position evaluation**: Uses moonfish's PSQT-based evaluation
+- **Standard OpenEnv interface**: reset(), step(), state
+
+## Quick Start
+
+### Using Docker
+
+```bash
+# Build the image
+docker build -t chess-env:latest -f envs/chess_env/server/Dockerfile .
+
+# Run the server
+docker run -p 8000:8000 chess-env:latest
+```
+
+### Using the Client
+
+```python
+from envs.chess_env import ChessEnv, ChessAction
+
+# Connect to server
+with ChessEnv(base_url="http://localhost:8000") as env:
+    # Reset for a new game
+    result = env.reset()
+    print(f"Starting position: {result.observation.fen}")
+    print(f"Legal moves: {result.observation.legal_moves}")
+
+    # Make a move
+    result = env.step(ChessAction(move="e2e4"))
+    print(f"Reward: {result.reward}, Done: {result.done}")
+
+    # Play until game ends
+    while not result.done:
+        # Your policy here
+        move = result.observation.legal_moves[0]
+        result = env.step(ChessAction(move=move))
+
+    print(f"Game result: {result.observation.result}")
+```
+
+## Observation Space
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `fen` | str | Board position in FEN notation |
+| `legal_moves` | List[str] | Legal moves in UCI format |
+| `is_check` | bool | Whether current player is in check |
+| `done` | bool | Whether game has ended |
+| `reward` | float | Reward for last action |
+| `result` | str | Game result ("1-0", "0-1", "1/2-1/2") |
+
+## Action Space
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `move` | str | UCI format move (e.g., "e2e4", "e7e8q") |
+
+## Rewards
+
+| Outcome | Reward |
+|---------|--------|
+| Win | +1.0 |
+| Loss | -1.0 |
+| Draw | 0.0 |
+| Illegal move | -0.1 |
+
+## Configuration
+
+The environment supports these configuration options:
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `opponent` | "moonfish" | Opponent type: "moonfish", "random", or None |
+| `opponent_depth` | 2 | Search depth for moonfish opponent |
+| `max_moves` | 500 | Maximum half-moves before draw |
+| `agent_color` | None | Agent color: "white", "black", or None (alternate each episode) |
+| `gamma` | 0.99 | Discount factor for temporal credit assignment |
+
+## Temporal Discounting
+
+For RL training, the environment computes temporally discounted rewards at episode end. This helps with credit assignment in long games where only the final outcome is known.
+
+When an episode ends, the terminal observation's `metadata` includes:
+- `discounted_rewards`: List of discounted rewards for each agent move
+- `gamma`: The discount factor used
+
+The formula is `r_t = γ^(T-1-t) × R_final` where:
+- `T` = total agent moves
+- `t` = move index (0-indexed)
+- `R_final` = terminal reward (+1, -1, or 0)
+
+Example for a 5-move win with γ=0.99:
+```
+Move 0: 0.99^4 × 1.0 = 0.961
+Move 1: 0.99^3 × 1.0 = 0.970
+Move 2: 0.99^2 × 1.0 = 0.980
+Move 3: 0.99^1 × 1.0 = 0.990
+Move 4: 0.99^0 × 1.0 = 1.000
+```
+
+## Links
+
+- [moonfish GitHub](https://github.com/luccabb/moonfish)
+- [Play online](https://huggingface.co/spaces/luccabb/moonfish_chess)
@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Chess Environment for OpenEnv.
+
+This module provides OpenEnv integration for chess, using the moonfish
+chess engine for position evaluation and opponent play.
+
+Example:
+    >>> from envs.chess_env import ChessEnv, ChessAction
+    >>>
+    >>> # Connect to a running server or start via Docker
+    >>> env = ChessEnv.from_docker_image("chess-env:latest")
+    >>>
+    >>> # Reset and interact
+    >>> result = env.reset()
+    >>> print(result.observation.fen)
+    >>> print(result.observation.legal_moves)
+    >>>
+    >>> result = env.step(ChessAction(move="e2e4"))
+    >>> print(result.reward, result.done)
+    >>>
+    >>> # Cleanup
+    >>> env.close()
+"""
+
+from .client import ChessEnv
+from .models import ChessAction, ChessObservation, ChessState
+
+__all__ = ["ChessEnv", "ChessAction", "ChessObservation", "ChessState"]
@@ -0,0 +1,101 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Chess Environment Client.
+
+This module provides the client for connecting to a Chess Environment server
+via WebSocket for persistent sessions.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict
+
+from openenv.core.client_types import StepResult
+from openenv.core.env_client import EnvClient
+
+from .models import ChessAction, ChessObservation, ChessState
+
+
+class ChessEnv(EnvClient[ChessAction, ChessObservation, ChessState]):
+    """
+    Client for Chess Environment.
+
+    This client maintains a persistent WebSocket connection to the environment
+    server, enabling efficient multi-step interactions with lower latency.
+
+    Uses the moonfish chess engine for opponent moves and position evaluation.
+
+    Example:
+        >>> with ChessEnv(base_url="http://localhost:8000") as client:
+        ...     result = client.reset()
+        ...     print(result.observation.fen)
+        ...     print(result.observation.legal_moves)
+        ...
+        ...     result = client.step(ChessAction(move="e2e4"))
+        ...     print(result.reward, result.done)
+    """
+
+    def _step_payload(self, action: ChessAction) -> Dict[str, Any]:
+        """
+        Convert ChessAction to JSON payload for step request.
+
+        Args:
+            action: ChessAction instance with UCI move string.
+
+        Returns:
+            Dictionary representation suitable for JSON encoding.
+        """
+        return {
+            "move": action.move,
+        }
+
+    def _parse_result(self, payload: Dict[str, Any]) -> StepResult[ChessObservation]:
+        """
+        Parse server response into StepResult[ChessObservation].
+
+        Args:
+            payload: JSON response from server.
+
+        Returns:
+            StepResult with ChessObservation.
+        """
+        obs_data = payload.get("observation", {})
+
+        observation = ChessObservation(
+            fen=obs_data.get("fen", ""),
+            legal_moves=obs_data.get("legal_moves", []),
+            is_check=obs_data.get("is_check", False),
+            done=obs_data.get("done", False),
+            reward=obs_data.get("reward", 0.0),
+            result=obs_data.get("result"),
+            metadata=obs_data.get("metadata", {}),
+        )
+
+        return StepResult(
+            observation=observation,
+            reward=observation.reward,
+            done=observation.done,
+        )
+
+    def _parse_state(self, payload: Dict[str, Any]) -> ChessState:
+        """
+        Parse server response into ChessState object.
+
+        Args:
+            payload: JSON response from /state endpoint.
+
+        Returns:
+            ChessState object with environment state information.
+        """
+        return ChessState(
+            episode_id=payload.get("episode_id", ""),
+            fen=payload.get("fen", ""),
+            current_player=payload.get("current_player", "white"),
+            move_history=payload.get("move_history", []),
+            step_count=payload.get("step_count", 0),
+        )
@@ -0,0 +1,69 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Data models for Chess Environment.
+
+This module defines the Action, Observation, and State types for chess games
+via the OpenEnv interface. Uses the moonfish chess engine for move search
+and position evaluation.
+"""
+
+from __future__ import annotations
+
+from typing import List, Optional
+
+from pydantic import Field
+
+from openenv.core.env_server import Action, Observation, State
+
+
+class ChessAction(Action):
+    """
+    Action for Chess environment.
+
+    Attributes:
+        move: UCI format move string (e.g., "e2e4", "e7e8q" for promotion).
+    """
+
+    move: str
+
+
+class ChessObservation(Observation):
+    """
+    Observation for Chess environment.
+
+    Attributes:
+        fen: Board position in FEN notation.
+        legal_moves: List of legal moves in UCI format.
+        is_check: Whether the current player is in check.
+        done: Whether the game is over.
+        reward: Reward for the last action.
+        result: Game result string if game is over (e.g., "1-0", "0-1", "1/2-1/2").
+    """
+
+    fen: str = ""
+    legal_moves: List[str] = Field(default_factory=list)
+    is_check: bool = False
+    result: Optional[str] = None
+
+
+class ChessState(State):
+    """
+    State for Chess environment.
+
+    Attributes:
+        episode_id: Unique ID for the current game.
+        fen: Current board position in FEN notation.
+        current_player: "white" or "black".
+        move_history: List of moves played in UCI format.
+        step_count: Number of half-moves played.
+    """
+
+    fen: str = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"
+    current_player: str = "white"
+    move_history: List[str] = Field(default_factory=list)
+    step_count: int = 0
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+--8<-- "../../envs/chess_env/README.md"`
	`2`	`+`