fraware
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 10 additions & 10 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎.github/workflows/docs.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/docs.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/release-fixture-verify.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/release-fixture-verify.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/risk-coverage-pr.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/risk-coverage-pr.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 23 additions & 54 deletions b/‎README.md‎
Lines changed: 23 additions & 54 deletions
diff --git a/‎docs/benchmarks/hospital_lab_full_pipeline_results_report.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/benchmarks/hospital_lab_full_pipeline_results_report.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/benchmarks/index.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/benchmarks/index.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/benchmarks/paper/README.md‎
Lines changed: 4 additions & 4 deletions b/‎docs/benchmarks/paper/README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/benchmarks/throughput_comparison.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/benchmarks/throughput_comparison.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/contracts/cli_contract.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/contracts/cli_contract.md‎
Lines changed: 1 addition & 1 deletion
@@ -94,8 +94,8 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-      - name: Install package
-        run: pip install -e ".[dev]"
+      - name: Install package (dev + env for policy/schema code that may touch env)
+        run: pip install -e ".[dev,env]"
       - name: Validate policy
         run: labtrust validate-policy
       - name: Validate policy (partner overlay hsl_like)
@@ -118,8 +118,8 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-      - name: Install package
-        run: pip install -e ".[dev]"
+      - name: Install package (dev + env so export/lab_design and pytest collection succeed)
+        run: pip install -e ".[dev,env]"
       - name: Verify ui_fixtures evidence bundle
         run: labtrust verify-bundle --bundle tests/fixtures/ui_fixtures/evidence_bundle/EvidenceBundle.v0.1
       - name: Export bundle from ui_fixtures (for artifact inspection; tests build bundle in memory)
@@ -134,8 +134,8 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-      - name: Install package
-        run: pip install -e ".[dev]"
+      - name: Install package (dev + env for export-risk-register)
+        run: pip install -e ".[dev,env]"
       # Plan completeness checked on every PR so required_bench_plan is runnable.
       - name: Required bench plan completeness
         run: python scripts/required_bench_plan_runs.py > /dev/null
@@ -309,8 +309,8 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-      - name: Install package
-        run: pip install -e ".[dev]"
+      - name: Install package (dev + env for CLI imports)
+        run: pip install -e ".[dev,env]"
       - name: Create minimal artifact and run transparency-log
         run: |
           mkdir -p artifact/_repr/throughput_sla
@@ -328,8 +328,8 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-      - name: Install package with docs extra
-        run: pip install -e ".[docs]"
+      - name: Install package (docs + env so mkdocstrings can import labtrust_gym)
+        run: pip install -e ".[docs,env]"
       - name: Build MkDocs
         run: mkdocs build --strict
 
 
@@ -25,8 +25,8 @@ jobs:
         with:
           python-version: "3.11"
 
-      - name: Install package and docs extra
-        run: pip install -e ".[docs]"
+      - name: Install package (docs + env so mkdocstrings can import labtrust_gym)
+        run: pip install -e ".[docs,env]"
 
       - name: Build MkDocs
         run: mkdocs build --strict
 
@@ -17,8 +17,8 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-      - name: Install package
-        run: pip install -e ".[dev]"
+      - name: Install package (dev + env for verify_release/export code paths)
+        run: pip install -e ".[dev,env]"
       - name: Normalize release fixture and regenerate manifests
         run: python scripts/normalize_release_fixture_manifests.py
       - name: Run release fixture verify test
 
@@ -45,8 +45,8 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-      - name: Install package
-        run: pip install -e ".[dev]"
+      - name: Install package (dev + env for run-benchmark and export-risk-register)
+        run: pip install -e ".[dev,env]"
       - name: Verify fixture evidence (when receipts or SECURITY present)
         run: |
           dirs=$(python scripts/risk_coverage_fixture_dirs.py --dirs-only)
 
@@ -7,37 +7,30 @@
 [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 [![Python 3.11+](https://img.shields.io/badge/python-3.11+-green.svg)](https://www.python.org/downloads/)
 
-**A multi-agent environment (PettingZoo/Gym) for hospital lab automation, with a reference trust skeleton.** The first instance models a pathology lab—specifically a blood sciences lane ([Glossary](docs/reference/glossary.md#lab-terminology-hospital-lab-pathology-lab-blood-sciences-lab)).
-
-**What it provides:** RBAC, signed actions, append-only audit log, invariants, and anomaly throttles—all driven by versioned policy and golden scenarios.
-
-**Trust skeleton (at a glance)**
-
-```mermaid
-flowchart LR
-    Policy["policy/ (YAML)"]
-    Policy --> RBAC["RBAC"]
-    Policy --> Sig["Signed\nactions"]
-    Policy --> Audit["Audit log\n(hash-chained)"]
-    Policy --> Inv["Invariants"]
-    Policy --> Codes["Reason\ncodes"]
-```
+**A multi-agent environment (PettingZoo/Gym) for hospital lab automation, with a reference trust skeleton.**
 
 ---
 
 ## Contents
 
-- [North star](#north-star)
-- [Who is this for?](#who-is-this-for--i-want-to)
-- [Installation](#installation-pip)
-- [Pipelines](#pipelines)
-- [Quick eval](#quick-eval)
-- [CLI](#cli)
-- [Repository structure](#repository-structure)
-- [Golden runner](#golden-runner)
-- [Reproducibility and citation](#reproducibility-and-citation)
-- [Release and contract freeze](#release-and-contract-freeze)
-- [Architecture diagrams](docs/architecture/diagrams.md) (full pipeline and lab topology)
+- [LabTrust-Gym](#labtrust-gym)
+  - [Contents](#contents)
+  - [North star](#north-star)
+  - [Who is this for? / I want to...](#who-is-this-for--i-want-to)
+  - [Installation (pip)](#installation-pip)
+  - [Pipelines](#pipelines)
+  - [Quick eval](#quick-eval)
+  - [CLI](#cli)
+    - [Policy and validation](#policy-and-validation)
+    - [Benchmarking and evaluation](#benchmarking-and-evaluation)
+    - [Export and verification](#export-and-verification)
+    - [Security and safety](#security-and-safety)
+    - [Risk register](#risk-register)
+    - [Coordination and studies](#coordination-and-studies)
+    - [Release and reproducibility](#release-and-reproducibility)
+  - [Repository structure](#repository-structure)
+  - [Reproducibility and citation](#reproducibility-and-citation)
+  - [License](#license)
 
 ---
 
@@ -67,7 +60,7 @@ System and threat model: [Systems and threat model](docs/architecture/systems_an
 | I want to... | First step |
 |--------------|------------|
 | Run benchmarks only | `pip install labtrust-gym[env,plots]` then `labtrust quick-eval` |
-| Add my coordination method (or task) | [Extension development](docs/agents/extension_development.md) + entry_points; see [examples/extension_example](examples/extension_example/) |
+| Add my coordination method (or task) | [Extension development](docs/agents/extension_development.md) + entry_points; see [examples/extension_example](https://github.com/fraware/LabTrust-Gym/tree/main/examples/extension_example) |
 | Fork and customize policy | [Forker guide](docs/getting-started/forkers.md) and `labtrust forker-quickstart` |
 | Use as a library without forking | [Extension development](docs/agents/extension_development.md) + `--profile` + `extension_packages` in a lab profile |
 | Run the full security suite | `labtrust run-security-suite`; needs `.[env]`; use `--skip-system-level` when env is not installed |
@@ -98,9 +91,6 @@ labtrust validate-policy
 pytest -q
 ```
 
-- **Live tests:** Run when `OPENAI_API_KEY` and `LABTRUST_RUN_LLM_LIVE=1` or `LABTRUST_RUN_LLM_ATTACKER=1` are set. Use **`pytest -m 'not slow'`**; avoid `-m 'not slow and not live'` if you want live tests to run.
-- **Policy path:** Run from repo root so `policy/` is found; otherwise **PolicyPathError**. Override with **LABTRUST_POLICY_DIR**. See [Installation](docs/getting-started/installation.md) and [Troubleshooting](docs/getting-started/troubleshooting.md#policy-directory-not-found-policypatherror).
-
 **Full stack** (benchmarks, studies, plots)
 
 ```bash
@@ -121,7 +111,7 @@ labtrust reproduce --profile minimal
 | Extra | Purpose |
 |-------|---------|
 | `[env]` | PettingZoo/Gymnasium (benchmarks and full security suite including coord_pack_ref) |
-| `[plots]` | Matplotlib |
+| `[plots]` | Matplotlib and Pillow (study figures, data tables) |
 | `[llm_openai]` | OpenAI live backend (openai_live) |
 | `[llm_anthropic]` | Anthropic live backend (anthropic_live) |
 | `[marl]` | Stable-Baselines3 (PPO train/eval) |
@@ -139,9 +129,9 @@ Benchmarks run in one of three modes: **deterministic** | **llm_offline** | **ll
 ```mermaid
 flowchart LR
     Run["Run benchmark"]
-    Run --> D["deterministic\n(default)"]
+    Run --> D["deterministic (default)"]
     Run --> O["llm_offline"]
-    Run --> L["llm_live\n+ --allow-network"]
+    Run --> L["llm_live + --allow-network"]
     D --> NoNet["No network"]
     O --> NoNet
     L --> Net["Network / API"]
@@ -155,11 +145,6 @@ flowchart LR
 
 Set mode with `--pipeline-mode`; for live LLM add `--allow-network` or `LABTRUST_ALLOW_NETWORK=1`.
 
-> **Why you saw no OpenAI calls**  
-> Runs are **offline by default**. `quick-eval`, `run-benchmark`, `reproduce`, and `package-release` use `pipeline_mode=deterministic` unless you pass `--pipeline-mode llm_live` and `--allow-network`. The CLI loads `.env` (or `LABTRUST_DOTENV_PATH`); keys there are used for live LLM.  
-> **Live LLM:** `--pipeline-mode llm_live --allow-network --llm-backend openai_live` (or `anthropic_live`, `ollama_live`). The CLI prints **WILL MAKE NETWORK CALLS / MAY INCUR COST**.  
-> Every run records `pipeline_mode`, `llm_backend_id`, `llm_model_id`, and `allow_network` in **results.json** and UI **index.json**; result files also record **non_deterministic** for audit.
-
 ---
 
 ## Quick eval
@@ -270,12 +255,6 @@ Put CLI outputs in `labtrust_runs/` or `--out`. Exit codes, minimal smoke args,
 
 ---
 
-## Golden runner
-
-The golden runner (`labtrust_gym.runner`) runs scenarios from `policy/golden/golden_scenarios.v0.1.yaml` against an environment adapter implementing `LabTrustEnvAdapter` (reset, step, query). Step results must conform to the runner output contract (status, emits, violations, hashchain, etc.); unknown emits fail the suite. Full suite: `LABTRUST_RUN_GOLDEN=1 pytest tests/test_golden_suite.py`.
-
----
-
 ## Reproducibility and citation
 
 Cite using [CITATION.cff](CITATION.cff).
@@ -291,16 +270,6 @@ Cite using [CITATION.cff](CITATION.cff).
 
 ---
 
-## Release and contract freeze
-
-- **Release** — E2E artifacts chain before tagging. [Trust verification](docs/risk-and-security/trust_verification.md), [CONTRIBUTING](CONTRIBUTING.md). **`make verify`** (full battery); **`make paper OUT=<dir>`** (paper artifact); **`labtrust audit-selfcheck --out <dir>`** (Phase A + doctor checks). Paper claims regression: [PAPER_CLAIMS](docs/benchmarks/PAPER_CLAIMS.md).
-- **Version** — `labtrust --version` (version + git SHA). Tag from clean main after checklist.
-- **Contract freeze** — [Frozen contracts](docs/contracts/frozen_contracts.md): runner output, queue, invariant registry, enforcement, receipt, evidence bundle, FHIR, results v0.2; v0.3 extensible only.
-- **Quickstart (paper)** — `bash scripts/quickstart_paper_v0_1.sh` or `scripts/quickstart_paper_v0.1.ps1`: install, validate-policy, quick-eval, package-release paper_v0.1, verify-release. Full release: export-risk-register into release dir, build-release-manifest, verify-release --strict-fingerprints. [Trust verification](docs/risk-and-security/trust_verification.md).
-- **UI** — [tests/fixtures/ui_fixtures/](tests/fixtures/ui_fixtures/). [UI data contract](docs/contracts/ui_data_contract.md).
-
----
-
 ## License
 
 Apache-2.0.
@@ -1,6 +1,6 @@
 # Hospital lab full pipeline – results report
 
-This document summarizes the results from full-pipeline runs for the pathology lab (blood sciences) design: what was run, what succeeded, and how to interpret the artifacts.
+This document summarizes **example** results from full-pipeline runs for the pathology lab (blood sciences) design: what was run, what succeeded, and how to interpret the artifacts. The run directories cited (e.g. `runs/hospital_lab_full_pipeline_smoke`) are from representative runs; regenerate them with [Hospital lab full pipeline](hospital_lab_full_pipeline.md) if needed.
 
 ---
 
 
@@ -21,6 +21,8 @@ Tasks, benchmark cards, official pack, studies, and reproduction.
 | Document | Description |
 |----------|-------------|
 | [Official benchmark pack](official_benchmark_pack.md) | v0.1/v0.2 and run commands. |
+| [Hospital lab full pipeline](hospital_lab_full_pipeline.md) | Full-pipeline script and orchestration. |
+| [Hospital lab full pipeline results](hospital_lab_full_pipeline_results_report.md) | Example results report (regenerate runs as needed). |
 | [Studies and plots](studies.md) | Study runner, make-plots. |
 | [Coordination studies](../coordination/coordination_studies.md) | Coordination study runner and Pareto. |
 | [LLM Coordination Protocol](llm_coordination_protocol.md) | LLM coordination protocol. |
 
@@ -1,4 +1,4 @@
-# Paper figure and table provenance (v0.1.0)
+# Paper figure and table provenance (paper_v0.1 profile)
 
 Figure/table to path, command, and seeds. Aligned with [PAPER_CLAIMS](../PAPER_CLAIMS.md). Update when the paper is written.
 
@@ -21,9 +21,9 @@ A single tarball (e.g. from GitHub Release or Zenodo) should contain or point to
 
 - Wheel/sdist: `pip install labtrust-gym[env,plots]`
 - Policy: bundled in wheel or `policy/` in repo
-- This provenance map: `docs/paper/README.md`
-- CONTRACTS: `docs/frozen_contracts.md`
-- PAPER_CLAIMS: `docs/PAPER_CLAIMS.md`
+- This provenance map: `docs/benchmarks/paper/README.md`
+- CONTRACTS: `docs/contracts/frozen_contracts.md`
+- PAPER_CLAIMS: `docs/benchmarks/PAPER_CLAIMS.md`
 
 Verification: run quick-eval, package-release paper_v0.1, verify-bundle on the produced bundle.
 
 
@@ -7,7 +7,7 @@ When the main metric of interest is **throughput** (number of specimen releases
 1. **Run the benchmark** with the scripted baseline (default for throughput_sla in the baseline registry):
 
    ```bash
-   labtrust run-benchmark --task throughput_sla --num-episodes 10 --out ./out/throughput_sla.json
+   labtrust run-benchmark --task throughput_sla --episodes 10 --out ./out/throughput_sla.json
    ```
 
    The baseline registry maps `throughput_sla` to `scripted_ops_v1` (scripted agents that perform accept, process, and release). No coordination method is used; the task uses a fixed set of scripted agents and an initial state with specimens already in `accepted` status.
 
@@ -26,7 +26,7 @@ This document defines the contract for all LabTrust-Gym CLI commands: exit codes
 | validate-fhir | `--bundle <path> --terminology <path>` [--strict] | 0 or 1 | (none; violations on stderr; exit 1 with --strict if any code outside value set) | Optional; not part of minimal benchmark. See fhir_export.md. |
 | verify-bundle | `--bundle <EvidenceBundle.v0.1 dir>` or `--strict-fingerprints` | 0 | (none; PASS on stderr) | frozen_contracts.md, trust_verification.md |
 | verify-release | `--release-dir <dir>` optional `--strict-fingerprints` | 0 | (none; summary on stderr; validates EvidenceBundles, risk register, RELEASE_MANIFEST hashes) | frozen_contracts.md, trust_verification.md |
-| build-release-manifest | `--release-dir <dir> --out <path>` | 0 | `<path>/RELEASE_MANIFEST.v0.1.json` (or into release-dir) | trust_verification.md |
+| build-release-manifest | `--release-dir <dir>` | 0 | `<release-dir>/RELEASE_MANIFEST.v0.1.json` | trust_verification.md |
 | run-security-suite | `--out <dir> --smoke` | 0 | `<dir>/SECURITY/attack_results.json` | security_attack_suite.md |
 | safety-case | `--out <dir>` | 0 | `<dir>/SAFETY_CASE/safety_case.json`, `safety_case.md` | risk_register.md, trust_verification.md |
 | run-official-pack | `--out <dir> --smoke` | 0 | `<dir>/pack_manifest.json`, `baselines/`, `baselines/results/`, `SECURITY/`, `SAFETY_CASE/` | official_benchmark_pack.md |