AISmithLab
diff --git a/‎scripts/enrich_global_privacy.py‎
Lines changed: 97 additions & 0 deletions b/‎scripts/enrich_global_privacy.py‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎scripts/scrape_global_privacy.py‎
Lines changed: 192 additions & 0 deletions b/‎scripts/scrape_global_privacy.py‎
Lines changed: 192 additions & 0 deletions
diff --git a/‎src/components/GlobalJurisdictionDetail.tsx‎
Lines changed: 107 additions & 0 deletions b/‎src/components/GlobalJurisdictionDetail.tsx‎
Lines changed: 107 additions & 0 deletions
@@ -0,0 +1,97 @@
+"""
+Use Claude to generate proper jurisdiction info for each country.
+Reads globalJurisdictions.json, enriches with AI, writes back.
+
+Usage:
+    python3 scripts/enrich_global_privacy.py
+"""
+
+import json
+import logging
+import os
+import time
+from pathlib import Path
+
+import anthropic
+from dotenv import load_dotenv
+
+_PROJECT_ROOT = Path(__file__).resolve().parents[1]
+for _env in (_PROJECT_ROOT / "files" / ".env", _PROJECT_ROOT / ".env"):
+    if _env.exists():
+        load_dotenv(_env, override=True)
+        break
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+log = logging.getLogger(__name__)
+
+DATA_PATH = Path(__file__).resolve().parents[1] / "src" / "data" / "globalJurisdictions.json"
+API_KEY = os.getenv("ANTHROPIC_API_KEY")
+MODEL = "claude-haiku-4-5"
+
+PROMPT = """For the country "{name}", provide a concise privacy/data protection jurisdiction summary.
+
+Return ONLY a valid JSON object with these fields:
+{{
+  "overview": "2-3 sentence overview of the country's data protection framework. What laws exist, when were they enacted, what do they cover? If no comprehensive law exists, say so.",
+  "authority": "Name of the data protection authority/regulator and a 1-sentence description of their role. If none exists, say 'No dedicated data protection authority.'",
+  "enforcementStyle": "1-2 sentences on how enforcement works — penalties, approach (strict vs lenient), notable actions. If limited enforcement, say so.",
+  "mainLaws": ["Array of 1-3 main privacy/data protection laws with year, e.g. 'LGPD (2020)', 'PIPEDA (2000)'. Empty array if no specific law."]
+}}
+
+Be factual and concise. If a country has no data protection law, still provide useful context about the privacy landscape."""
+
+
+def main():
+    if not API_KEY:
+        raise RuntimeError("ANTHROPIC_API_KEY not set")
+
+    client = anthropic.Anthropic(api_key=API_KEY)
+    data = json.loads(DATA_PATH.read_text(encoding="utf-8"))
+    log.info(f"Enriching {len(data)} countries...")
+
+    enriched = 0
+    for i, country in enumerate(data):
+        name = country["name"]
+
+        # Skip if already has good data (overview > 100 chars and doesn't start with navigation text)
+        existing = country.get("overview", "")
+        if (
+            len(existing) > 100
+            and not existing.startswith("Explore DLA")
+            and not existing.startswith("Data protection laws in")
+            and "Quick links" not in existing
+        ):
+            log.info(f"[{i+1}/{len(data)}] {name}: already enriched, skipping")
+            continue
+
+        log.info(f"[{i+1}/{len(data)}] Enriching {name}...")
+
+        try:
+            msg = client.messages.create(
+                model=MODEL,
+                max_tokens=512,
+                system="Return ONLY a valid JSON object — no markdown, no explanation.",
+                messages=[{"role": "user", "content": PROMPT.format(name=name)}],
+            )
+            raw = msg.content[0].text.strip()
+            if raw.startswith("```"):
+                raw = raw.split("\n", 1)[1].rsplit("```", 1)[0]
+            result = json.loads(raw)
+
+            country["overview"] = result.get("overview", "")
+            country["authority"] = result.get("authority", "")
+            country["enforcementStyle"] = result.get("enforcementStyle", "")
+            country["mainLaws"] = result.get("mainLaws", [])
+            enriched += 1
+
+        except Exception as e:
+            log.warning(f"  {name}: failed — {e}")
+
+        time.sleep(0.2)
+
+    DATA_PATH.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
+    log.info(f"Done. Enriched {enriched}/{len(data)} countries. Saved to {DATA_PATH}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,192 @@
+"""
+Scrape DLA Piper Data Protection handbook for global privacy law info.
+Outputs src/data/globalJurisdictions.json with country-level privacy law data.
+
+Usage:
+    python3 scripts/scrape_global_privacy.py
+"""
+
+import json
+import re
+import time
+import logging
+from pathlib import Path
+
+import requests
+from bs4 import BeautifulSoup
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+log = logging.getLogger(__name__)
+
+BASE_URL = "https://www.dlapiperdataprotection.com/"
+OUTPUT = Path(__file__).resolve().parents[1] / "src" / "data" / "globalJurisdictions.json"
+
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
+}
+
+# ISO 3166-1 alpha-2 to numeric mapping (for map highlighting)
+ALPHA2_TO_NUMERIC = {
+    "AL": "008", "DZ": "012", "AO": "024", "AR": "032", "AM": "051",
+    "AU": "036", "AT": "040", "AZ": "031", "BS": "044", "BH": "048",
+    "BD": "050", "BB": "052", "BY": "112", "BE": "056", "BJ": "204",
+    "BM": "060", "BO": "068", "BA": "070", "BW": "072", "BR": "076",
+    "BN": "096", "BG": "100", "BF": "854", "KH": "116", "CA": "124",
+    "CV": "132", "KY": "136", "CL": "152", "CN": "156", "CO": "170",
+    "CG": "178", "CD": "180", "CR": "188", "HR": "191", "CY": "196",
+    "CZ": "203", "DK": "208", "DO": "214", "EC": "218", "EG": "818",
+    "SV": "222", "EE": "233", "ET": "231", "FI": "246", "FR": "250",
+    "DE": "276", "GH": "288", "GR": "300", "GT": "320", "HK": "344",
+    "HU": "348", "IS": "352", "IN": "356", "ID": "360", "IE": "372",
+    "IL": "376", "IT": "380", "JM": "388", "JP": "392", "JO": "400",
+    "KZ": "398", "KE": "404", "KR": "410", "KW": "414", "LV": "428",
+    "LT": "440", "LU": "442", "MO": "446", "MY": "458", "MT": "470",
+    "MU": "480", "MX": "484", "MD": "498", "MA": "504", "MZ": "508",
+    "NL": "528", "NZ": "554", "NG": "566", "NO": "578", "OM": "512",
+    "PK": "586", "PA": "591", "PY": "600", "PE": "604", "PH": "608",
+    "PL": "616", "PT": "620", "QA": "634", "RO": "642", "RU": "643",
+    "RW": "646", "SA": "682", "SN": "686", "RS": "688", "SG": "702",
+    "SK": "703", "SI": "705", "ZA": "710", "ES": "724", "LK": "144",
+    "SE": "752", "CH": "756", "TW": "158", "TZ": "834", "TH": "764",
+    "TN": "788", "TR": "792", "UG": "800", "UA": "804", "AE": "784",
+    "GB": "826", "US": "840", "UY": "858", "VN": "704", "ZM": "894",
+    "ZW": "716", "CI": "384", "GN": "324", "MG": "450", "NE": "562",
+    "TG": "768", "TD": "148", "MW": "454", "NA": "516", "SZ": "748",
+    "LS": "426", "GY": "328", "TT": "780", "BZ": "084", "HN": "340",
+    "NI": "558", "CU": "192", "HT": "332", "MM": "104", "LA": "418",
+    "NP": "524", "MN": "496", "GE": "268", "UZ": "860", "TM": "795",
+    "KG": "417", "TJ": "762", "AF": "004", "IQ": "368", "SY": "760",
+    "LB": "422", "LY": "434", "SD": "729", "ER": "232", "DJ": "262",
+    "SO": "706", "YE": "887", "IR": "364", "CM": "120", "GA": "266",
+    "GQ": "226", "CF": "140", "SS": "728",
+}
+
+
+def get_country_codes() -> list[dict]:
+    """Extract country codes and names from the main page."""
+    resp = requests.get(BASE_URL, headers=HEADERS, timeout=30)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "lxml")
+
+    countries = []
+    seen = set()
+    # Find country select options
+    for opt in soup.select("select option"):
+        code = opt.get("value", "").strip()
+        name = opt.get_text(strip=True)
+        if code and name and len(code) <= 3 and code not in seen and code != "Select an option":
+            seen.add(code)
+            countries.append({"code": code, "name": name})
+
+    # If nothing found from select, parse JS
+    if not countries:
+        for m in re.finditer(r'"(\w{2,3})"\s*:\s*"([^"]+)"', resp.text):
+            code, name = m.group(1), m.group(2)
+            if code not in seen and len(name) > 2:
+                seen.add(code)
+                countries.append({"code": code, "name": name})
+
+    return countries
+
+
+def scrape_country(code: str, name: str) -> dict | None:
+    """Scrape a single country page for privacy law data."""
+    url = f"{BASE_URL}?c={code}"
+    try:
+        resp = requests.get(url, headers=HEADERS, timeout=30)
+        resp.raise_for_status()
+    except Exception as e:
+        log.warning(f"Failed to fetch {name} ({code}): {e}")
+        return None
+
+    soup = BeautifulSoup(resp.text, "lxml")
+    main = soup.select_one("main.page-content") or soup
+
+    # Extract sections by headings
+    sections = {}
+    current = None
+    for el in main.find_all(["h2", "h3", "p", "li"]):
+        if el.name in ("h2", "h3"):
+            heading = el.get_text(strip=True)
+            if 3 < len(heading) < 100:
+                current = heading.lower()
+                sections[current] = []
+        elif current and el.name in ("p", "li"):
+            text = el.get_text(strip=True)
+            if text and len(text) > 15:
+                sections[current].append(text)
+
+    # Extract key fields
+    overview = ""
+    authority = ""
+    enforcement = ""
+    laws = []
+
+    for heading, texts in sections.items():
+        combined = " ".join(texts[:5])[:600]
+        if not combined:
+            continue
+
+        if any(k in heading for k in ("definition of personal data", "law", "authority")):
+            if "authority" in heading and not authority:
+                authority = combined
+            elif not overview and "definition" in heading:
+                overview = combined
+
+        if any(k in heading for k in ("enforcement", "sanction", "penalt")):
+            enforcement = combined
+
+        if "collection" in heading and not overview:
+            overview = combined
+
+    # Build a general overview from the first substantial section if still empty
+    if not overview:
+        for texts in sections.values():
+            combined = " ".join(texts[:3])
+            if len(combined) > 100:
+                overview = combined[:600]
+                break
+
+    # Get ISO numeric code for map
+    numeric_code = ALPHA2_TO_NUMERIC.get(code[:2], "")
+
+    return {
+        "code": code,
+        "name": name,
+        "numericCode": numeric_code,
+        "overview": overview,
+        "authority": authority,
+        "mainLaws": laws,
+        "enforcementStyle": enforcement,
+    }
+
+
+def main():
+    log.info("Fetching country list...")
+    countries = get_country_codes()
+    log.info(f"Found {len(countries)} countries")
+
+    results = []
+    for i, country in enumerate(countries):
+        code = country["code"]
+        name = country["name"]
+
+        # Skip special regional entries
+        if len(code) > 2 and code not in ("BQ1",):
+            log.info(f"[{i+1}/{len(countries)}] Skipping regional entry {name} ({code})")
+            continue
+
+        log.info(f"[{i+1}/{len(countries)}] Scraping {name} ({code})...")
+        data = scrape_country(code, name)
+        if data:
+            results.append(data)
+
+        time.sleep(0.3)
+
+    OUTPUT.parent.mkdir(parents=True, exist_ok=True)
+    OUTPUT.write_text(json.dumps(results, indent=2, ensure_ascii=False), encoding="utf-8")
+    log.info(f"Wrote {len(results)} countries to {OUTPUT}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,107 @@
+import { AlertCircle } from "lucide-react";
+
+interface GlobalJurisdiction {
+  code: string;
+  name: string;
+  numericCode: string;
+  overview: string;
+  authority: string;
+  mainLaws: string[];
+  enforcementStyle: string;
+}
+
+interface GlobalJurisdictionDetailProps {
+  country: GlobalJurisdiction;
+}
+
+export default function GlobalJurisdictionDetail({ country }: GlobalJurisdictionDetailProps) {
+  return (
+    <div className="space-y-6">
+      {/* Header */}
+      <div className="brutalist-border bg-card p-6">
+        <h2 className="text-3xl font-bold tracking-tight uppercase mb-2">
+          {country.name}
+        </h2>
+        <p className="text-sm font-mono text-muted-foreground">
+          Data protection overview
+        </p>
+      </div>
+
+      {/* No Cases Banner */}
+      <div className="brutalist-border bg-yellow-50 p-6 flex items-start gap-4">
+        <AlertCircle className="w-6 h-6 text-yellow-600 shrink-0 mt-0.5" />
+        <div>
+          <p className="font-bold text-sm uppercase tracking-wide mb-1">
+            No cases from this jurisdiction in Privacy Jury yet
+          </p>
+          <p className="text-sm text-muted-foreground">
+            Stay tuned! We're expanding our coverage to include enforcement actions from more jurisdictions worldwide.
+          </p>
+        </div>
+      </div>
+
+      {/* Overview */}
+      {country.overview && (
+        <section>
+          <h3 className="text-lg font-bold tracking-tight mb-2 uppercase">\ Overview</h3>
+          <div className="h-[3px] bg-border mb-3" />
+          <div className="brutalist-border bg-card p-5">
+            <p className="text-sm leading-relaxed">{country.overview}</p>
+          </div>
+        </section>
+      )}
+
+      {/* Authority */}
+      {country.authority && (
+        <section>
+          <h3 className="text-lg font-bold tracking-tight mb-2 uppercase">\ Enforcement Authority</h3>
+          <div className="h-[3px] bg-border mb-3" />
+          <div className="brutalist-border bg-card p-5">
+            <p className="text-sm leading-relaxed">{country.authority}</p>
+          </div>
+        </section>
+      )}
+
+      {/* Main Laws */}
+      {country.mainLaws && country.mainLaws.length > 0 && (
+        <section>
+          <h3 className="text-lg font-bold tracking-tight mb-2 uppercase">\ Main Privacy Laws</h3>
+          <div className="h-[3px] bg-border mb-3" />
+          <div className="space-y-2">
+            {country.mainLaws.map((law, i) => (
+              <div key={i} className="brutalist-border bg-card p-4" style={{ borderLeftWidth: "4px", borderLeftColor: "#FFD700" }}>
+                <p className="text-sm font-bold">{law}</p>
+              </div>
+            ))}
+          </div>
+        </section>
+      )}
+
+      {/* Enforcement Style */}
+      {country.enforcementStyle && (
+        <section>
+          <h3 className="text-lg font-bold tracking-tight mb-2 uppercase">\ Enforcement Style</h3>
+          <div className="h-[3px] bg-border mb-3" />
+          <div className="brutalist-border bg-card p-5">
+            <p className="text-sm leading-relaxed">{country.enforcementStyle}</p>
+          </div>
+        </section>
+      )}
+
+      {/* Source */}
+      <p className="text-xs text-muted-foreground font-mono text-center">
+        Source:{" "}
+        <a
+          href={`https://www.dlapiperdataprotection.com/?c=${country.code}`}
+          target="_blank"
+          rel="noopener noreferrer"
+          className="underline hover:text-foreground"
+        >
+          DLA Piper Data Protection Laws of the World
+        </a>
+      </p>
+    </div>
+  );
+}
+
+export type { GlobalJurisdiction };