redhat-et · EoghanOConnor · Mar 19, 2026
diff --git a/.claude/hooks/session-start.sh b/.claude/hooks/session-start.sh
@@ -1,6 +1,37 @@
 #!/bin/bash
 INPUT=$(cat)
 SESSION_ID=$(echo "$INPUT" | jq -r '.session_id')
+
 if [ -n "$CLAUDE_ENV_FILE" ]; then
   echo "export CLAUDE_SESSION_ID='$SESSION_ID'" >> "$CLAUDE_ENV_FILE"
+  echo "export MLFLOW_TAG_USER='$MLFLOW_TAG_USER'" >> "$CLAUDE_ENV_FILE"
+  echo "export MLFLOW_TRACKING_URI='http://127.0.0.1:$MLFLOW_PORT'" >> "$CLAUDE_ENV_FILE"
+  echo "export MLFLOW_EXPERIMENT_NAME='$MLFLOW_EXPERIMENT_NAME'" >> "$CLAUDE_ENV_FILE"
+  echo "export MLFLOW_TRACING_ENABLED='$MLFLOW_TRACING_ENABLED'" >> "$CLAUDE_ENV_FILE"
 fi
+# Run in background, don't block if it fails, and avoid duplicates
+if ! lsof -i :$MLFLOW_PORT >/dev/null 2>&1; then
+  # Use JUMPBOX_URI if available
+  if [ -n "$JUMPBOX_URI" ]; then
+    #connect to jumpbox and forward the port to localhost $MLFLOW_PORT
+    ssh -f -N -L $MLFLOW_PORT:localhost:5000 $JUMPBOX_URI
+    sleep 5
+  fi
+fi
+
+# Setup Python environment and MLflow (use fixed path)
+VENV_DIR="$HOME/.claude/mlflow/.venv"
+if [ ! -d "$VENV_DIR" ]; then
+    python3 -m venv "$VENV_DIR"
+fi
+source "$VENV_DIR/bin/activate"
+
+# Install dependencies if needed
+if ! pip show mlflow >/dev/null 2>&1; then
+    pip install mlflow
+fi
+
+# Create experiment if it doesn't exist (suppress output/errors)
+mlflow experiments create -n "$MLFLOW_EXPERIMENT_NAME" 2>/dev/null || true
+# Enable autologging - run from home directory so config is written to ~/.claude
+mlflow autolog claude -u $MLFLOW_TRACKING_URI -n "$MLFLOW_EXPERIMENT_NAME" -t $MLFLOW_TRACING_ENABLED
diff --git a/README.md b/README.md
@@ -69,11 +69,35 @@ Use this skill when you need to:
     "SPLUNK_INDEX": "<your-splunk-index>",
     "SPLUNK_OCP_APP_INDEX": "<splunk-ocp-app-index>",
     "SPLUNK_OCP_INFRA_INDEX": "<splunk-ocp-infra-index>",
-    "SPLUNK_VERIFY_SSL": "false"
+    "SPLUNK_VERIFY_SSL": "false",
+    "JUMPBOX_URI": "<username>.com@<jumpbox> -p <port>",
+    "MLFLOW_CLAUDE_TRACING_ENABLED": "true",
+    "MLFLOW_PORT":"<set localhost port>",
+    "MLFLOW_EXPERIMENT_NAME": "<experiment name or default>"
+  }
+}
+
+```
+```json
+{
+  "hooks": {
+    "SessionStart": [
+      {
+        "matcher": "*",
+        "hooks": [   
+          {
+            "type": "command",
+            "command": "./.claude/hooks/session-start.sh"
+          }
+        ]
+      }
+    ]
   }
 }
 ```
 
+**Note:** Run the following to make the script executable: `chmod +x ./.claude/hooks/session-start.sh`
+
 ### context-fetcher
 
 Fetch configuration and documentation context via MCP servers:
@@ -146,6 +170,117 @@ Upload the skill folder contents to a Claude project's knowledge base.
 
 Include the skill's `SKILL.md` content in your system prompt.
 
+## Creating a New Skill
+
+1. Create a directory with your skill name (lowercase, hyphen-separated)
+2. Add a `SKILL.md` file:
+
+```markdown
+---
+name: my-skill
+description: Brief description of what this skill does
+allowed-tools:
+  - Bash
+  - Read
+---
+
+# MLFlow Tracing Setup Guide for Claude Code
+
+## Step 1: Install Dependencies
+
+1. Create and activate a virtual environment:
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+```
+
+2. Install the package and MLFlow:
+
+```bash
+pip install -e .
+pip install mlflow
+```
+
+3. Enable MLFlow autologging for Claude:
+
+```bash
+mlflow autolog claude
+```
+
+## Step 2: Configure Claude Settings
+
+Add the following environment variables to your Claude settings file at `~/.claude/settings.json`:
+
+```json
+{
+  "env": {
+    "JUMPBOX_URI": "<your-username>@<your-jumpbox> -p <port>",
+    "MLFLOW_CLAUDE_TRACING_ENABLED": "true",
+    "MLFLOW_PORT":"<set localhost port>",
+    "MLFLOW_EXPERIMENT_NAME": "<experiment-name>"
+  }
+}
+```
+
+**Note**: Replace `<your-username>@<your-jumpbox> -p <port>` with your actual jumpbox connection details.
+Replace `<experiment-name>` to a experiment name to the desired experiment name. This will automatically create the experiment for you if it does not exist.
+
+## Step 3: Add hooks SessionStart to hooks in .claude/settings.json
+```json
+{
+  "hooks": {
+    "SessionStart": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "./.claude/hooks/session-start.sh"
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+
+Make the script executable:
+```bash
+chmod +x ./.claude/hooks/session-start.sh
+```
+
+## Step 4: cd into AIOPS-SKILLS/ dir (where claude will be running)
+
+## Step 5: Enable Claude Autologging
+
+Before starting Claude, run:
+
+```bash
+mlflow autolog claude
+```
+
+## Step 6: Start Claude
+
+```bash
+claude
+```
+
+## Step 7: Run a Prompt
+
+Enter any prompt in Claude to generate a trace.
+
+## Step 8: View Traces
+  Open your browser and navigate to:                                                                                                            
+http://localhost:5000                                                                                                                                             
+  Your MLFlow dashboard will display your trace along with any previous traces.
+# My Skill
+
+Instructions for Claude...
+```
+
+See [template-skill](./template-skill/) for a minimal example and [agent_skills_spec.md](./agent_skills_spec.md) for the full specification.
+
 ## Contributing
 
 Contributions welcome. Ensure your skill:

diff --git a/skills/context-fetcher/SKILL.md b/skills/context-fetcher/SKILL.md
@@ -10,6 +10,7 @@ allowed-tools:
   - mcp__slack__slack_get_channel_history
   - Read
   - Write
+  - Bash
 ---
 
 # Context Fetcher
@@ -22,7 +23,20 @@ Step 2   [MCP]     Search GitHub repositories
 Step 3   [MCP]     Search Confluence pages
 Step 4   [MCP]     Search Slack channel
 Step 5   [Claude]  Synthesize and organize findings
-Step 6   [Claude]  Call feedback-capture skill.
+Step 6   [Claude]  Log search to MLflow (run scripts/mlflow_context.py)
+```
+
+## MLflow Tracing
+
+After synthesizing findings, you MUST log the search to MLflow:
+
+```bash
+python scripts/mlflow_context.py \
+  --query "{search keywords used}" \
+  --sources "{comma-separated: github,confluence,slack}" \
+  --job-id "{job ID if applicable}" \
+  --incident-id "{incident ID if applicable}" \
+  --results-summary "{brief summary of what was found}"
 ```
 
 ## Capabilities
@@ -81,7 +95,7 @@ Search Slack channel:
 3. **Search Confluence** for related documentation and runbooks
 4. **Search Slack** for relevant messages
 5. **Synthesize findings** into organized context
-6. **Ask for feedback**  Edit the feedback_question.txt, Call feedback-capture skill
+6. **Log trace**  run scripts/mlflow_context.py
 
 
 ### Example: Finding Job Configuration
@@ -105,9 +119,8 @@ Search Slack channel:
   - Query: `"job-name" AND messages`
   - Expected: Messages and replies
 
-   **Step 5:** Call feedback-capture skill
-  - Skill: feedback-capture skill
-  - Expected: The feedback-capture skill is called
+   **Step 5:** Run mlflow_context.py
+  - Expected: The input and output of this skill is traced
 
 ## Prerequisites
 

diff --git a/skills/context-fetcher/mlflow-context.py b/skills/context-fetcher/mlflow-context.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""MLflow tracing for context-fetcher skill."""
+
+import argparse
+import os
+
+import mlflow
+from mlflow.entities import SpanType
+
+
+def log_context_search(
+    query: str,
+    sources: str,
+    job_id: str | None = None,
+    incident_id: str | None = None,
+    results_summary: str | None = None,
+) -> dict:
+    """Log context search operation to MLflow trace."""
+    search_data = {
+        "query": query,
+        "sources": sources.split(",") if sources else [],
+        "job_id": job_id,
+        "incident_id": incident_id,
+    }
+
+    with mlflow.start_span(name="Context search", span_type=SpanType.RETRIEVER) as span:
+        span.set_inputs(search_data)
+        span.set_outputs({"status": "completed", "results_summary": results_summary, **search_data})
+
+    print("Successfully logged context search to MLflow")
+    return search_data
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Log context-fetcher operations to MLflow.")
+    parser.add_argument("--query", required=True, help="Search query or keywords used")
+    parser.add_argument(
+        "--sources",
+        required=True,
+        help="Comma-separated sources searched (github,confluence,slack)",
+    )
+    parser.add_argument("--job-id", help="Job ID being investigated (if applicable)")
+    parser.add_argument("--incident-id", help="Incident ID being investigated (if applicable)")
+    parser.add_argument("--results-summary", help="Brief summary of results found")
+
+    args = parser.parse_args()
+
+    with mlflow.start_span(name="Context fetcher", span_type=SpanType.CHAIN) as span:
+        # Set trace metadata
+        mlflow.update_current_trace(
+            metadata={
+                "mlflow.trace.session": f"{os.environ.get('CLAUDE_SESSION_ID')}",
+                "mlflow.trace.user": os.environ.get("MLFLOW_TAG_USER"),
+                "mlflow.source.name": "context-fetcher",
+                "mlflow.source.git.repoURL": "https://github.com/redhat-et/aiops-skills/blob/main/skills/context-fetcher/SKILL.md",
+            },
+        )
+
+        span.set_inputs(
+            {
+                "query": args.query,
+                "sources": args.sources,
+                "job_id": args.job_id,
+                "incident_id": args.incident_id,
+            }
+        )
+
+        result = log_context_search(
+            query=args.query,
+            sources=args.sources,
+            job_id=args.job_id,
+            incident_id=args.incident_id,
+            results_summary=args.results_summary,
+        )
+
+        span.set_outputs(result)
+
+    return result
+
+
+if __name__ == "__main__":
+    main()
diff --git a/skills/logs-fetcher/scripts/fetch_logs_by_job.py b/skills/logs-fetcher/scripts/fetch_logs_by_job.py
@@ -6,13 +6,17 @@
 import sys
 from pathlib import Path
 
+import mlflow
+from mlflow.entities import SpanType
+
 # --- Defaults: adjust if needed ---
 REMOTE_HOST = os.environ.get("REMOTE_HOST")
 REMOTE_DIR = os.environ.get("REMOTE_DIR")
 DEFAULT_LOCAL_DIR = Path.home() / "etl-logs"
 
 
-def fetch_job_logs(job_numbers: list[str], local_dir: Path) -> None:
+@mlflow.trace(name="Fetch job logs by number", span_type=SpanType.RETRIEVER)
+def fetch_job_logs(job_numbers: list[str], local_dir: Path) -> dict:
     """
     Fetch specific job log files by job number (e.g., job_1234567).
 
@@ -72,7 +76,6 @@ def fetch_job_logs(job_numbers: list[str], local_dir: Path) -> None:
     if not files_found:
         print(f"[WARNING] No files found for job numbers: {', '.join(normalized_jobs)}")
         print("[INFO] Make sure the job numbers are correct and files exist on the remote server")
-        return
 
     print(f"[INFO] Found {len(files_found)} file(s):")
     for f in files_found:
@@ -102,8 +105,15 @@ def fetch_job_logs(job_numbers: list[str], local_dir: Path) -> None:
     except subprocess.CalledProcessError as e:
         print("[ERROR] rsync failed")
         raise e
+    return {
+        "status": "success",
+        "local_dir": str(local_dir),
+        "job_numbers": normalized_jobs,
+        "files_found": len(files_found),
+    }
 
 
+@mlflow.trace(name="Logs fetcher by job", span_type=SpanType.TOOL)
 def main(argv=None):
     parser = argparse.ArgumentParser(
         description="Fetch specific AAP2 ETL log files by job number via ssh + rsync."
@@ -122,7 +132,26 @@ def main(argv=None):
 
     args = parser.parse_args(argv)
 
-    fetch_job_logs(
+    span = mlflow.get_current_active_span()
+    if span:
+        span.set_inputs(
+            {
+                "request": f"log-fetcher via jobs {args}",
+                "job_numbers": args.job_numbers,
+                "local_dir": str(args.local_dir),
+            }
+        )
+
+    mlflow.update_current_trace(
+        metadata={
+            "mlflow.trace.session": f"{os.environ.get('CLAUDE_SESSION_ID')}",
+            "mlflow.trace.user": os.environ.get("MLFLOW_TAG_USER"),
+            "mlflow.source.name": "logs-fetcher",
+            "mlflow.source.git.repoURL": "https://github.com/redhat-et/aiops-skills/blob/main/skills/logs-fetcher/SKILL.md",
+        },
+    )
+
+    return fetch_job_logs(
         job_numbers=args.job_numbers,
         local_dir=args.local_dir,
     )