Skip to content

Gemini-LLM GCS Artifact Test #41

Gemini-LLM GCS Artifact Test

Gemini-LLM GCS Artifact Test #41

Workflow file for this run

name: Gemini-LLM GCS Artifact Test
on:
workflow_dispatch:
inputs:
seed_notes:
description: "Seed notes.json into BigQuery before analysis"
required: false
type: boolean
default: false
use_prod_service_url:
description: "Call the Cloud Run service after downloading artifacts"
required: false
type: boolean
default: false
prompt_file:
description: "Select a prompt to use for analysis"
required: true
default: "multi-analyze.txt"
type: choice
options:
- multi-analyze.txt
- sentry-release-health.txt
permissions:
contents: read
env:
GCP_PROJECT_ID: moz-mobile-tools
BQ_DATASET: vertex_ai_tool
SERVICE_URL: ${{ secrets.LLM_PROD_SERVICE_URL }}
CRASH_URI: gs://testops-llm-artifacts/crashes/minidumps/examples/crash_example.txt
ANR_URI: gs://testops-llm-artifacts/anr/examples/anr_example.txt
IMG_URI: gs://testops-llm-artifacts/images/examples/iOS/1.png
SENTRY_IOS_URI: gs://testops-llm-artifacts/sentry/firefox-ios/latest.json
LOCAL_ARTIFACT_DIR: artifacts
jobs:
seed-notes:
if: ${{ inputs.seed_notes == true }}
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Install dependencies
run: pip install -r llm-cloud-run/requirements.txt
- name: Authenticate to Google Cloud (JSON key)
uses: google-github-actions/auth@v3
with:
credentials_json: ${{ secrets.GCP_SA_VERTEX_AI }}
- name: Seed notes into BigQuery
working-directory: llm-cloud-run
env:
GCP_PROJECT: ${{ env.GCP_PROJECT_ID }}
BQ_PROJECT: ${{ env.GCP_PROJECT_ID }}
BQ_DATASET: ${{ env.BQ_DATASET }}
run: python seed_notes.py
- name: Setup gcloud
uses: google-github-actions/setup-gcloud@v3
with:
project_id: ${{ env.GCP_PROJECT_ID }}
- name: Validate seeded data
run: |
set -euo pipefail
PROJECT="${GCP_PROJECT_ID}"
DATASET="${BQ_DATASET}"
NOTES_TABLE="${PROJECT}.${DATASET}.demo_Notes"
EMBEDDINGS_TABLE="${PROJECT}.${DATASET}.demo_NoteEmbeddings"
echo "::group::Checking demo_Notes"
NOTES_COUNT=$(bq query --nouse_legacy_sql --format=json \
"SELECT COUNT(*) AS cnt FROM \`${NOTES_TABLE}\`" | jq -r '.[0].cnt')
echo "Notes count: ${NOTES_COUNT}"
if [ "${NOTES_COUNT}" -eq 0 ]; then
echo "::error::demo_Notes is empty — seed did not write any rows"
exit 1
fi
NOTES_MISSING_FIELDS=$(bq query --nouse_legacy_sql --format=json \
"SELECT note_id FROM \`${NOTES_TABLE}\`
WHERE content IS NULL
OR signature IS NULL
OR match_regex IS NULL" | jq -r 'length')
echo "Notes with missing fields: ${NOTES_MISSING_FIELDS}"
if [ "${NOTES_MISSING_FIELDS}" -gt 0 ]; then
echo "::warning::${NOTES_MISSING_FIELDS} note(s) have NULL content, signature, or match_regex"
fi
echo "::endgroup::"
echo "::group::Checking demo_NoteEmbeddings"
EMB_COUNT=$(bq query --nouse_legacy_sql --format=json \
"SELECT COUNT(*) AS cnt FROM \`${EMBEDDINGS_TABLE}\`" | jq -r '.[0].cnt')
echo "Embeddings count: ${EMB_COUNT}"
if [ "${EMB_COUNT}" -eq 0 ]; then
echo "::error::demo_NoteEmbeddings is empty — seed did not write embeddings"
exit 1
fi
echo "::endgroup::"
echo "::group::Checking notes ↔ embeddings join"
ORPHAN_NOTES=$(bq query --nouse_legacy_sql --format=json \
"SELECT n.note_id
FROM \`${NOTES_TABLE}\` n
LEFT JOIN \`${EMBEDDINGS_TABLE}\` e ON n.note_id = e.note_id
WHERE e.note_id IS NULL" | jq -r 'length')
echo "Notes without embeddings: ${ORPHAN_NOTES}"
if [ "${ORPHAN_NOTES}" -gt 0 ]; then
echo "::error::${ORPHAN_NOTES} note(s) have no matching embedding"
exit 1
fi
echo "::endgroup::"
{
echo "### ✅ Seed Validation Passed"
echo ""
echo "- **Notes:** ${NOTES_COUNT}"
echo "- **Embeddings:** ${EMB_COUNT}"
echo "- **Orphan notes (no embedding):** ${ORPHAN_NOTES}"
} >> "$GITHUB_STEP_SUMMARY"
manual-run:
needs: [seed-notes]
# Always run this job, even when seed-notes is skipped
if: ${{ always() && !failure() && !cancelled() }}
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Authenticate to Google Cloud (JSON key)
uses: google-github-actions/auth@v3
with:
credentials_json: ${{ secrets.GCP_SA_VERTEX_AI }}
- name: Setup gcloud
uses: google-github-actions/setup-gcloud@v3
with:
project_id: ${{ env.GCP_PROJECT_ID }}
- name: Set gcloud project (quiet)
run: |
gcloud --quiet config set project "$GCP_PROJECT_ID"
- name: Download artifacts from GCS
run: |
mkdir -p "${LOCAL_ARTIFACT_DIR}"
gcloud storage cp "${CRASH_URI}" "${LOCAL_ARTIFACT_DIR}/crash_example.txt"
gcloud storage cp "${ANR_URI}" "${LOCAL_ARTIFACT_DIR}/anr_example.txt"
gcloud storage cp "${IMG_URI}" "${LOCAL_ARTIFACT_DIR}/1.png"
gcloud storage cp "${SENTRY_IOS_URI}" "${LOCAL_ARTIFACT_DIR}/sentry_release_health.json"
echo "Downloaded files:"
ls -la "${LOCAL_ARTIFACT_DIR}"
- name: (Optional) POST artifacts as JSON to Cloud Run
if: ${{ inputs.use_prod_service_url == true }}
run: |
set -euo pipefail
CRASH_FILE="${LOCAL_ARTIFACT_DIR}/crash_example.txt"
ANR_FILE="${LOCAL_ARTIFACT_DIR}/anr_example.txt"
IMG_FILE="${LOCAL_ARTIFACT_DIR}/1.png"
TOKEN="$(gcloud auth print-identity-token --audiences="${SERVICE_URL}")"
PROMPT_FILE=".github/prompts/${{ inputs.prompt_file }}"
PROMPT=$(<"$PROMPT_FILE")
# Write combined crash + ANR content to a temp file
CONTENT_FILE="$(mktemp)"
{
printf 'Crash:\n'
cat "$CRASH_FILE"
printf '\n\nANR:\n'
cat "$ANR_FILE"
} > "$CONTENT_FILE"
# Post multipart form data: prompt, content (from file), and image
RESPONSE_FILE="$(mktemp)"
curl --fail-with-body -sS -X POST \
-H "Authorization: Bearer ${TOKEN}" \
-F "prompt=$(printf "%s" "$PROMPT")" \
-F "content=<${CONTENT_FILE};type=text/plain; charset=utf-8" \
-F "image=@${IMG_FILE};type=image/png" \
-o "$RESPONSE_FILE" \
"${SERVICE_URL}/analyze"
# Extract output from JSON response and write to GitHub summary
OUTPUT=$(jq -r '.output' "$RESPONSE_FILE")
{
echo "### 🧠 LLM Analysis Summary"
echo ""
echo "$OUTPUT"
} >> "$GITHUB_STEP_SUMMARY"
rm -f "$CONTENT_FILE" "$RESPONSE_FILE"