Skip to content

Commit f96c422

Browse files
viki shiviki shi
authored andcommitted
remove edpb
1 parent 588b13c commit f96c422

10 files changed

Lines changed: 1912 additions & 60 deletions

scripts/enrich_global_privacy.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
"""
2+
Use Claude to generate proper jurisdiction info for each country.
3+
Reads globalJurisdictions.json, enriches with AI, writes back.
4+
5+
Usage:
6+
python3 scripts/enrich_global_privacy.py
7+
"""
8+
9+
import json
10+
import logging
11+
import os
12+
import time
13+
from pathlib import Path
14+
15+
import anthropic
16+
from dotenv import load_dotenv
17+
18+
_PROJECT_ROOT = Path(__file__).resolve().parents[1]
19+
for _env in (_PROJECT_ROOT / "files" / ".env", _PROJECT_ROOT / ".env"):
20+
if _env.exists():
21+
load_dotenv(_env, override=True)
22+
break
23+
24+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
25+
log = logging.getLogger(__name__)
26+
27+
DATA_PATH = Path(__file__).resolve().parents[1] / "src" / "data" / "globalJurisdictions.json"
28+
API_KEY = os.getenv("ANTHROPIC_API_KEY")
29+
MODEL = "claude-haiku-4-5"
30+
31+
PROMPT = """For the country "{name}", provide a concise privacy/data protection jurisdiction summary.
32+
33+
Return ONLY a valid JSON object with these fields:
34+
{{
35+
"overview": "2-3 sentence overview of the country's data protection framework. What laws exist, when were they enacted, what do they cover? If no comprehensive law exists, say so.",
36+
"authority": "Name of the data protection authority/regulator and a 1-sentence description of their role. If none exists, say 'No dedicated data protection authority.'",
37+
"enforcementStyle": "1-2 sentences on how enforcement works — penalties, approach (strict vs lenient), notable actions. If limited enforcement, say so.",
38+
"mainLaws": ["Array of 1-3 main privacy/data protection laws with year, e.g. 'LGPD (2020)', 'PIPEDA (2000)'. Empty array if no specific law."]
39+
}}
40+
41+
Be factual and concise. If a country has no data protection law, still provide useful context about the privacy landscape."""
42+
43+
44+
def main():
45+
if not API_KEY:
46+
raise RuntimeError("ANTHROPIC_API_KEY not set")
47+
48+
client = anthropic.Anthropic(api_key=API_KEY)
49+
data = json.loads(DATA_PATH.read_text(encoding="utf-8"))
50+
log.info(f"Enriching {len(data)} countries...")
51+
52+
enriched = 0
53+
for i, country in enumerate(data):
54+
name = country["name"]
55+
56+
# Skip if already has good data (overview > 100 chars and doesn't start with navigation text)
57+
existing = country.get("overview", "")
58+
if (
59+
len(existing) > 100
60+
and not existing.startswith("Explore DLA")
61+
and not existing.startswith("Data protection laws in")
62+
and "Quick links" not in existing
63+
):
64+
log.info(f"[{i+1}/{len(data)}] {name}: already enriched, skipping")
65+
continue
66+
67+
log.info(f"[{i+1}/{len(data)}] Enriching {name}...")
68+
69+
try:
70+
msg = client.messages.create(
71+
model=MODEL,
72+
max_tokens=512,
73+
system="Return ONLY a valid JSON object — no markdown, no explanation.",
74+
messages=[{"role": "user", "content": PROMPT.format(name=name)}],
75+
)
76+
raw = msg.content[0].text.strip()
77+
if raw.startswith("```"):
78+
raw = raw.split("\n", 1)[1].rsplit("```", 1)[0]
79+
result = json.loads(raw)
80+
81+
country["overview"] = result.get("overview", "")
82+
country["authority"] = result.get("authority", "")
83+
country["enforcementStyle"] = result.get("enforcementStyle", "")
84+
country["mainLaws"] = result.get("mainLaws", [])
85+
enriched += 1
86+
87+
except Exception as e:
88+
log.warning(f" {name}: failed — {e}")
89+
90+
time.sleep(0.2)
91+
92+
DATA_PATH.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
93+
log.info(f"Done. Enriched {enriched}/{len(data)} countries. Saved to {DATA_PATH}")
94+
95+
96+
if __name__ == "__main__":
97+
main()

scripts/scrape_global_privacy.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
"""
2+
Scrape DLA Piper Data Protection handbook for global privacy law info.
3+
Outputs src/data/globalJurisdictions.json with country-level privacy law data.
4+
5+
Usage:
6+
python3 scripts/scrape_global_privacy.py
7+
"""
8+
9+
import json
10+
import re
11+
import time
12+
import logging
13+
from pathlib import Path
14+
15+
import requests
16+
from bs4 import BeautifulSoup
17+
18+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
19+
log = logging.getLogger(__name__)
20+
21+
BASE_URL = "https://www.dlapiperdataprotection.com/"
22+
OUTPUT = Path(__file__).resolve().parents[1] / "src" / "data" / "globalJurisdictions.json"
23+
24+
HEADERS = {
25+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
26+
}
27+
28+
# ISO 3166-1 alpha-2 to numeric mapping (for map highlighting)
29+
ALPHA2_TO_NUMERIC = {
30+
"AL": "008", "DZ": "012", "AO": "024", "AR": "032", "AM": "051",
31+
"AU": "036", "AT": "040", "AZ": "031", "BS": "044", "BH": "048",
32+
"BD": "050", "BB": "052", "BY": "112", "BE": "056", "BJ": "204",
33+
"BM": "060", "BO": "068", "BA": "070", "BW": "072", "BR": "076",
34+
"BN": "096", "BG": "100", "BF": "854", "KH": "116", "CA": "124",
35+
"CV": "132", "KY": "136", "CL": "152", "CN": "156", "CO": "170",
36+
"CG": "178", "CD": "180", "CR": "188", "HR": "191", "CY": "196",
37+
"CZ": "203", "DK": "208", "DO": "214", "EC": "218", "EG": "818",
38+
"SV": "222", "EE": "233", "ET": "231", "FI": "246", "FR": "250",
39+
"DE": "276", "GH": "288", "GR": "300", "GT": "320", "HK": "344",
40+
"HU": "348", "IS": "352", "IN": "356", "ID": "360", "IE": "372",
41+
"IL": "376", "IT": "380", "JM": "388", "JP": "392", "JO": "400",
42+
"KZ": "398", "KE": "404", "KR": "410", "KW": "414", "LV": "428",
43+
"LT": "440", "LU": "442", "MO": "446", "MY": "458", "MT": "470",
44+
"MU": "480", "MX": "484", "MD": "498", "MA": "504", "MZ": "508",
45+
"NL": "528", "NZ": "554", "NG": "566", "NO": "578", "OM": "512",
46+
"PK": "586", "PA": "591", "PY": "600", "PE": "604", "PH": "608",
47+
"PL": "616", "PT": "620", "QA": "634", "RO": "642", "RU": "643",
48+
"RW": "646", "SA": "682", "SN": "686", "RS": "688", "SG": "702",
49+
"SK": "703", "SI": "705", "ZA": "710", "ES": "724", "LK": "144",
50+
"SE": "752", "CH": "756", "TW": "158", "TZ": "834", "TH": "764",
51+
"TN": "788", "TR": "792", "UG": "800", "UA": "804", "AE": "784",
52+
"GB": "826", "US": "840", "UY": "858", "VN": "704", "ZM": "894",
53+
"ZW": "716", "CI": "384", "GN": "324", "MG": "450", "NE": "562",
54+
"TG": "768", "TD": "148", "MW": "454", "NA": "516", "SZ": "748",
55+
"LS": "426", "GY": "328", "TT": "780", "BZ": "084", "HN": "340",
56+
"NI": "558", "CU": "192", "HT": "332", "MM": "104", "LA": "418",
57+
"NP": "524", "MN": "496", "GE": "268", "UZ": "860", "TM": "795",
58+
"KG": "417", "TJ": "762", "AF": "004", "IQ": "368", "SY": "760",
59+
"LB": "422", "LY": "434", "SD": "729", "ER": "232", "DJ": "262",
60+
"SO": "706", "YE": "887", "IR": "364", "CM": "120", "GA": "266",
61+
"GQ": "226", "CF": "140", "SS": "728",
62+
}
63+
64+
65+
def get_country_codes() -> list[dict]:
66+
"""Extract country codes and names from the main page."""
67+
resp = requests.get(BASE_URL, headers=HEADERS, timeout=30)
68+
resp.raise_for_status()
69+
soup = BeautifulSoup(resp.text, "lxml")
70+
71+
countries = []
72+
seen = set()
73+
# Find country select options
74+
for opt in soup.select("select option"):
75+
code = opt.get("value", "").strip()
76+
name = opt.get_text(strip=True)
77+
if code and name and len(code) <= 3 and code not in seen and code != "Select an option":
78+
seen.add(code)
79+
countries.append({"code": code, "name": name})
80+
81+
# If nothing found from select, parse JS
82+
if not countries:
83+
for m in re.finditer(r'"(\w{2,3})"\s*:\s*"([^"]+)"', resp.text):
84+
code, name = m.group(1), m.group(2)
85+
if code not in seen and len(name) > 2:
86+
seen.add(code)
87+
countries.append({"code": code, "name": name})
88+
89+
return countries
90+
91+
92+
def scrape_country(code: str, name: str) -> dict | None:
93+
"""Scrape a single country page for privacy law data."""
94+
url = f"{BASE_URL}?c={code}"
95+
try:
96+
resp = requests.get(url, headers=HEADERS, timeout=30)
97+
resp.raise_for_status()
98+
except Exception as e:
99+
log.warning(f"Failed to fetch {name} ({code}): {e}")
100+
return None
101+
102+
soup = BeautifulSoup(resp.text, "lxml")
103+
main = soup.select_one("main.page-content") or soup
104+
105+
# Extract sections by headings
106+
sections = {}
107+
current = None
108+
for el in main.find_all(["h2", "h3", "p", "li"]):
109+
if el.name in ("h2", "h3"):
110+
heading = el.get_text(strip=True)
111+
if 3 < len(heading) < 100:
112+
current = heading.lower()
113+
sections[current] = []
114+
elif current and el.name in ("p", "li"):
115+
text = el.get_text(strip=True)
116+
if text and len(text) > 15:
117+
sections[current].append(text)
118+
119+
# Extract key fields
120+
overview = ""
121+
authority = ""
122+
enforcement = ""
123+
laws = []
124+
125+
for heading, texts in sections.items():
126+
combined = " ".join(texts[:5])[:600]
127+
if not combined:
128+
continue
129+
130+
if any(k in heading for k in ("definition of personal data", "law", "authority")):
131+
if "authority" in heading and not authority:
132+
authority = combined
133+
elif not overview and "definition" in heading:
134+
overview = combined
135+
136+
if any(k in heading for k in ("enforcement", "sanction", "penalt")):
137+
enforcement = combined
138+
139+
if "collection" in heading and not overview:
140+
overview = combined
141+
142+
# Build a general overview from the first substantial section if still empty
143+
if not overview:
144+
for texts in sections.values():
145+
combined = " ".join(texts[:3])
146+
if len(combined) > 100:
147+
overview = combined[:600]
148+
break
149+
150+
# Get ISO numeric code for map
151+
numeric_code = ALPHA2_TO_NUMERIC.get(code[:2], "")
152+
153+
return {
154+
"code": code,
155+
"name": name,
156+
"numericCode": numeric_code,
157+
"overview": overview,
158+
"authority": authority,
159+
"mainLaws": laws,
160+
"enforcementStyle": enforcement,
161+
}
162+
163+
164+
def main():
165+
log.info("Fetching country list...")
166+
countries = get_country_codes()
167+
log.info(f"Found {len(countries)} countries")
168+
169+
results = []
170+
for i, country in enumerate(countries):
171+
code = country["code"]
172+
name = country["name"]
173+
174+
# Skip special regional entries
175+
if len(code) > 2 and code not in ("BQ1",):
176+
log.info(f"[{i+1}/{len(countries)}] Skipping regional entry {name} ({code})")
177+
continue
178+
179+
log.info(f"[{i+1}/{len(countries)}] Scraping {name} ({code})...")
180+
data = scrape_country(code, name)
181+
if data:
182+
results.append(data)
183+
184+
time.sleep(0.3)
185+
186+
OUTPUT.parent.mkdir(parents=True, exist_ok=True)
187+
OUTPUT.write_text(json.dumps(results, indent=2, ensure_ascii=False), encoding="utf-8")
188+
log.info(f"Wrote {len(results)} countries to {OUTPUT}")
189+
190+
191+
if __name__ == "__main__":
192+
main()
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import { AlertCircle } from "lucide-react";
2+
3+
interface GlobalJurisdiction {
4+
code: string;
5+
name: string;
6+
numericCode: string;
7+
overview: string;
8+
authority: string;
9+
mainLaws: string[];
10+
enforcementStyle: string;
11+
}
12+
13+
interface GlobalJurisdictionDetailProps {
14+
country: GlobalJurisdiction;
15+
}
16+
17+
export default function GlobalJurisdictionDetail({ country }: GlobalJurisdictionDetailProps) {
18+
return (
19+
<div className="space-y-6">
20+
{/* Header */}
21+
<div className="brutalist-border bg-card p-6">
22+
<h2 className="text-3xl font-bold tracking-tight uppercase mb-2">
23+
{country.name}
24+
</h2>
25+
<p className="text-sm font-mono text-muted-foreground">
26+
Data protection overview
27+
</p>
28+
</div>
29+
30+
{/* No Cases Banner */}
31+
<div className="brutalist-border bg-yellow-50 p-6 flex items-start gap-4">
32+
<AlertCircle className="w-6 h-6 text-yellow-600 shrink-0 mt-0.5" />
33+
<div>
34+
<p className="font-bold text-sm uppercase tracking-wide mb-1">
35+
No cases from this jurisdiction in Privacy Jury yet
36+
</p>
37+
<p className="text-sm text-muted-foreground">
38+
Stay tuned! We're expanding our coverage to include enforcement actions from more jurisdictions worldwide.
39+
</p>
40+
</div>
41+
</div>
42+
43+
{/* Overview */}
44+
{country.overview && (
45+
<section>
46+
<h3 className="text-lg font-bold tracking-tight mb-2 uppercase">\ Overview</h3>
47+
<div className="h-[3px] bg-border mb-3" />
48+
<div className="brutalist-border bg-card p-5">
49+
<p className="text-sm leading-relaxed">{country.overview}</p>
50+
</div>
51+
</section>
52+
)}
53+
54+
{/* Authority */}
55+
{country.authority && (
56+
<section>
57+
<h3 className="text-lg font-bold tracking-tight mb-2 uppercase">\ Enforcement Authority</h3>
58+
<div className="h-[3px] bg-border mb-3" />
59+
<div className="brutalist-border bg-card p-5">
60+
<p className="text-sm leading-relaxed">{country.authority}</p>
61+
</div>
62+
</section>
63+
)}
64+
65+
{/* Main Laws */}
66+
{country.mainLaws && country.mainLaws.length > 0 && (
67+
<section>
68+
<h3 className="text-lg font-bold tracking-tight mb-2 uppercase">\ Main Privacy Laws</h3>
69+
<div className="h-[3px] bg-border mb-3" />
70+
<div className="space-y-2">
71+
{country.mainLaws.map((law, i) => (
72+
<div key={i} className="brutalist-border bg-card p-4" style={{ borderLeftWidth: "4px", borderLeftColor: "#FFD700" }}>
73+
<p className="text-sm font-bold">{law}</p>
74+
</div>
75+
))}
76+
</div>
77+
</section>
78+
)}
79+
80+
{/* Enforcement Style */}
81+
{country.enforcementStyle && (
82+
<section>
83+
<h3 className="text-lg font-bold tracking-tight mb-2 uppercase">\ Enforcement Style</h3>
84+
<div className="h-[3px] bg-border mb-3" />
85+
<div className="brutalist-border bg-card p-5">
86+
<p className="text-sm leading-relaxed">{country.enforcementStyle}</p>
87+
</div>
88+
</section>
89+
)}
90+
91+
{/* Source */}
92+
<p className="text-xs text-muted-foreground font-mono text-center">
93+
Source:{" "}
94+
<a
95+
href={`https://www.dlapiperdataprotection.com/?c=${country.code}`}
96+
target="_blank"
97+
rel="noopener noreferrer"
98+
className="underline hover:text-foreground"
99+
>
100+
DLA Piper Data Protection Laws of the World
101+
</a>
102+
</p>
103+
</div>
104+
);
105+
}
106+
107+
export type { GlobalJurisdiction };

0 commit comments

Comments
 (0)