diff --git a/.github/workflows/badge-examples.yml b/.github/workflows/badge-examples.yml index 9aa38e45..fc17b757 100644 --- a/.github/workflows/badge-examples.yml +++ b/.github/workflows/badge-examples.yml @@ -10,6 +10,7 @@ on: - Examples - Tinker - Examples - Azure - Examples - Claude Code + - Examples - RAG types: [completed] workflow_dispatch: @@ -37,5 +38,6 @@ jobs: { workflow: 'examples-tinker.yml', label: 'examples-tinker.stable', variants: ['stable'] }, { workflow: 'examples-azure.yml', label: 'examples-azure.stable', variants: ['stable'] }, { workflow: 'examples-claude-code.yml', label: 'examples-claude-code.stable', variants: ['stable'] }, + { workflow: 'examples-rag.yml', label: 'examples-rag.stable', variants: ['stable'] }, ]; await badgeAggregation({ github, context, core, dependencies }); diff --git a/.github/workflows/badge-latest.yml b/.github/workflows/badge-latest.yml index b7b2413b..82d088c5 100644 --- a/.github/workflows/badge-latest.yml +++ b/.github/workflows/badge-latest.yml @@ -7,6 +7,8 @@ on: - Examples - Spider - Examples - APO - Examples - Unsloth + - Examples - RAG + - Examples - Claude Code - GPU Test types: [completed] @@ -32,6 +34,8 @@ jobs: { workflow: 'examples-spider.yml', label: 'spider.latest', variants: ['latest'] }, { workflow: 'examples-apo.yml', label: 'apo.latest', variants: ['latest'] }, { workflow: 'examples-unsloth.yml', label: 'unsloth.latest', variants: ['latest'] }, + { workflow: 'examples-claude-code.yml', label: 'claude-code.latest', variants: ['latest'] }, + { workflow: 'examples-rag.yml', label: 'rag.latest', variants: ['latest'] }, { workflow: 'tests-full.yml', label: 'tests-full.latest', variants: ['latest'] }, ]; await badgeAggregation({ github, context, core, dependencies }); diff --git a/.github/workflows/badge-rag.yml b/.github/workflows/badge-rag.yml new file mode 100644 index 00000000..459f0fe2 --- /dev/null +++ b/.github/workflows/badge-rag.yml @@ -0,0 +1,29 @@ +name: Badge - RAG + +on: + workflow_run: + workflows: + - Examples - RAG + types: [completed] + + workflow_dispatch: + +permissions: + actions: read + contents: read + +jobs: + badge: + if: ${{ github.event_name == 'workflow_dispatch' || (github.event_name == 'workflow_run' && github.event.workflow_run.head_branch == 'main') }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/github-script@v8 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const badgeAggregation = require('./scripts/badge_aggregation.js'); + const dependencies = [ + { workflow: 'examples-rag.yml', label: 'rag', variants: ['legacy', 'stable'] }, + ]; + await badgeAggregation({ github, context, core, dependencies }); diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..4734aab3 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,38 @@ +# Repository Guidelines + +## Architecture Overview +Agent Lightning runs through a continuous loop: runners and tracers emit spans, `LightningStore` (`agentlightning/store/`) keeps them synchronized, and algorithms in `agentlightning/algorithm/` consume those traces to improve behavior. + +## Project Structure & Module Organization +- `agentlightning/`: adapters, execution stack, training loop, tracer, reward logic, and the `agl` CLI. +- `docs/` & `examples/`: narrative and procedural docs (assets in `docs/assets/`, navigation in `mkdocs.yml`) plus runnable workflows whose READMEs point to their companion how-to guides. `docs/how-to` covers task-focused instructions, while `docs/tutorials` explains concepts and subsystems. +- `dashboard/`, `scripts/`, `tests/`: UI bundles, release/dataset/CI automation, and mirrored coverage of the runtime tree. Record download steps rather than committing binaries. + +## Build, Test, and Development Commands +- `uv sync --group dev` — provision tooling once per environment. +- `uv run --no-sync pytest -v` — execute the full suite; add a path or `-k expr` to narrow the run. +- `uv run --no-sync pyright` — enforce static typing parity with CI. +- `uv run --no-sync pre-commit run --all-files --show-diff-on-failure` and `uv run --no-sync mkdocs build --strict` — keep formatting tidy and documentation valid. +Always commit the refreshed `uv.lock` when dependencies shift, and mention optional groups (VERL, APO, GPU) in PR notes. + +## Coding Style & Naming Conventions +- Target `requires-python >= 3.10`, four-space indentation, 120-character lines (though docstrings may run longer), and formatter-owned diffs (Black + isort, `black` profile). Use `snake_case` for modules, functions, and variables; `PascalCase` for classes and React components; lowercase hyphenation for CLI flags, branch names, and TypeScript filenames. +- Maintain exhaustive type hints (pyright enforces them) and prefer shared dataclasses or Pydantic models from `agentlightning.types`. +- Author Google-style docstrings for new modules or public methods—succinct descriptions, no redundant type info, no redundant `Key features/components` bullet points, and `[][]` syntax for cross-references. +- Writing logs is encouraged, especially for long functions with multiple steps and try-except blocks that catch all exceptions. Use `logging.getLogger(__name__)` to get loggers. Distinguish between DEBUG, INFO, WARNING, and ERROR logs. + +## Testing Guidelines +- Mirror runtime directories under `tests/` and match filenames for quick traceability. +- Parametrize pytest cases and apply markers (`openai`, `gpu`, `agentops`, `mongo`, `llmproxy`) so optional suites can be skipped via selectors like `-m "not mongo"` yet still exercised in CI. +- Lean on fixtures, favor real stores/spans/agents over mocks, and drive coverage across the majority of branches. +- If an imported module is missing from the environment, check whether `uv sync` has been run with the right groups. Do not make stubs for external dependencies unless necessary. + +## Example Contributions +- Ship each example with a README that includes smoke-test instructions so maintainers can validate quickly. The README must contain an "Included Files" section summarizing every file and its role. +- Keep runnable example modules self-contained with a module-level docstring describing CLI usage. Document important or educational classes/functions with targeted docstrings and inline comments where clarity matters. +- Add a CI workflow per example named `examples-.yml` in `.github/workflows/`. Register it in `badge-.yml`, `badge-examples.yml`, and `badge-latest.yml` when applicable so badges stay accurate. + +## Commit & Pull Request Guidelines +- Branch from a fresh `main` using `feature/`, `fix/`, `docs/`, or `chore/`. +- Write imperative, scoped commits, reference issues with `Fixes #123`, and rerun pre-commit plus the relevant pytest/doc builds before pushing. +- Use PR descriptions to summarize intent, list verification commands, call out dependency or docs-navigation updates, and link new docs/examples via `mkdocs.yml` or `examples/README.md`. Include logs for dashboard changes. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 120000 index 00000000..47dc3e3d --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1 @@ +AGENTS.md \ No newline at end of file diff --git a/docs/community/contributing.md b/docs/community/contributing.md index 651550a6..83abe06b 100644 --- a/docs/community/contributing.md +++ b/docs/community/contributing.md @@ -46,6 +46,7 @@ Bonus points for examples that: - Ship CI or self-test coverage so we know they still work as the core evolves. **Otherwise, we would have to mark the example as unmaintained because we won't be able to test the examples manually before each release.** - Include a [`docs/how-to/`]({{ src("docs/how-to/") }}) guide (or a detailed README if no how-to exists) without duplicating content in multiple places. - Favor simple, dependency-light code over heavy abstractions. +- Ship a README that documents smoke-test instructions and includes an "Included Files" section summarizing every file and its role; keep the runnable module self-contained with a module-level docstring explaining CLI usage, plus targeted docstrings or inline comments for educational functions/classes. !!! warning "Please discuss first" @@ -126,13 +127,13 @@ After `uv sync`, run commands via `uv run ...` (add `--no-sync` once the environ Formatting and linting are enforced through [pre-commit](https://pre-commit.com/). Install once, then run before each push: ```bash -uv run pre-commit install -uv run pre-commit run --all-files --show-diff-on-failure --color=always +uv run --no-sync pre-commit install +uv run --no-sync pre-commit run --all-files --show-diff-on-failure --color=always ``` Once installed, the hooks run automatically on every `git commit`. Running the pre-commit hooks locally keeps CI green and diffs manageable. -### 3. Branch From a Fresh `main` +### 3. Branch from Fresh `main` and Code Start all work from the latest upstream state: @@ -165,20 +166,28 @@ Use lowercase with hyphens, e.g., `feature/async-runner-hooks`. Remember to register new docs in [`mkdocs.yml`]({{ src("mkdocs.yml") }}), add examples to [examples/README]({{ src("examples/README.md") }}), and update the [Examples Catalog](../how-to/examples-catalog.md). +Before you start coding, bring the shared coding conventions with you: + +- Target `requires-python >= 3.10`, four-space indentation, ~120-character lines (docstrings may run longer), and formatter-owned diffs (Black + isort with the `black` profile). +- Use `snake_case` for modules, functions, and variables; `PascalCase` for classes and React components; lowercase hyphenation for CLI flags, branch names, and TypeScript filenames. +- Maintain exhaustive type hints (pyright enforces them), write succinct Google-style docstrings (with `[][]` cross-references). +- Prefer dataclasses or Pydantic models from `agentlightning.types`. +- Log via `logging.getLogger(__name__)` with targeted DEBUG/INFO/WARNING/ERROR calls—especially for long multi-step functions or broad `try/except` blocks. + ### 4. Test and Validate -Most contributions require automated checks. Prefix commands with `uv run` so they use the project environment. +Most contributions require automated checks. Once `uv sync` locks dependencies, prefix commands with `uv run --no-sync ...` so they share the same environment as CI. **Full test suite** ```bash -uv run pytest -v +uv run --no-sync pytest -v ``` **Targeted tests** ```bash -uv run pytest tests/path/to/test_file.py -k test_name +uv run --no-sync pytest tests/path/to/test_file.py -k test_name ``` **Optional/gated tests:** GPU-specific suites or API-dependent tests run automatically when the required hardware or environment variables (such as `OPENAI_API_KEY`) are present. @@ -186,7 +195,7 @@ uv run pytest tests/path/to/test_file.py -k test_name **Static analysis:** ```bash -uv run pyright +uv run --no-sync pyright ``` If you have touched code under `examples/`, you should run the example-specific smoke tests. Each directory includes a README with example-specific smoke tests—run those too. @@ -196,8 +205,8 @@ If you have touched code under `examples/`, you should run the example-specific Keep API references under [docs/reference]({{ src("docs/reference/") }}) up to date. Doc-only changes should still build cleanly: ```bash - uv run mkdocs serve --strict # live reload - uv run mkdocs build --strict # CI-equivalent + uv run --no-sync mkdocs serve --strict # live reload + uv run --no-sync mkdocs build --strict # CI-equivalent ``` `--strict` elevates warnings to errors so you catch issues before CI. @@ -205,7 +214,7 @@ If you have touched code under `examples/`, you should run the example-specific Before opening a PR, double-check the basics: - Run `uv lock` if you changed dependencies. -- Run `uv run pre-commit run --all-files` (hooks installed via `pre-commit install` run automatically on `git commit`, but rerun them if you amended history). +- Run `uv run --no-sync pre-commit run --all-files --show-diff-on-failure` (hooks installed via `pre-commit install` run automatically on `git commit`, but rerun them if you amended history). - Execute the relevant commands from the test list above. - Validate each affected example via its README instructions. diff --git a/examples/README.md b/examples/README.md index 3cc41610..6ac25b01 100644 --- a/examples/README.md +++ b/examples/README.md @@ -9,8 +9,8 @@ This catalog highlights the examples shipped with Agent-lightning. | [calc_x](./calc_x) | VERL-powered math reasoning agent training that uses AutoGen with an MCP calculator tool. | [![calc_x workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-calc-x.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-calc-x.yml) | | [claude_code](./claude_code) | Claude Code SWE-bench harness that records Agent-lightning traces across Anthropic, vLLM, and OpenAI-compatible backends. | [![claude_code workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-claude-code.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-claude-code.yml) | | [minimal](./minimal) | Bite-sized programs that demonstrate how individual Agent-lightning building blocks behave in isolation. | [![minimal workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-unit.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/badge-unit.yml) | -| [rag](./rag) | Retrieval-Augmented Generation pipeline targeting the MuSiQue dataset with Wikipedia retrieval. | **Unmaintained** — last verified with Agent-lightning v0.1.1 | -| [search_r1](./search_r1) | Framework-free Search-R1 reinforcement learning training workflow with a retrieval backend. | **Unmaintained** — last verified with Agent-lightning v0.1.2 | +| [rag](./rag) | Retrieval-Augmented Generation pipeline targeting the MuSiQue dataset with Wikipedia retrieval. | [![rag workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-rag.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-rag.yml) | +| [search_r1](./search_r1) | Framework-free Search-R1 reinforcement learning training workflow with a retrieval backend. | **Last verified with Agent-lightning v0.1.2** | | [spider](./spider) | Text-to-SQL reinforcement learning training on the Spider dataset using LangGraph. | [![spider workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-spider.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-spider.yml) | | [tinker](./tinker) | Reinforcement learning with Tinker as the backend training service. | [![tinker workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-tinker.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-tinker.yml) | | [unsloth](./unsloth) | Supervised fine-tuning example powered by Unsloth with 4-bit quantization and LoRA. | [![unsloth workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/badge-unsloth.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-unsloth.yml) | diff --git a/examples/rag/README.md b/examples/rag/README.md index 205b1158..fc54d120 100644 --- a/examples/rag/README.md +++ b/examples/rag/README.md @@ -1,5 +1,7 @@ # RAG Agent Example +[![rag workflow status](https://github.com/microsoft/agent-lightning/actions/workflows/examples-rag.yml/badge.svg)](https://github.com/microsoft/agent-lightning/actions/workflows/examples-rag.yml) + This example demonstrates training a Retrieval-Augmented Generation (RAG) agent using Agent-Lightning with retrieval capabilities. The agent answers multi-hop questions from a tiny MuSiQue dataset by retrieving and reasoning over Wikipedia passages. ## Overview