-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcodex-ai.json
More file actions
86 lines (86 loc) · 3.52 KB
/
codex-ai.json
File metadata and controls
86 lines (86 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{
"name": "footy-data-kit",
"description": "Tools and scripts to gather, parse, and clean English football data for downstream use in other apps.",
"language": "JavaScript",
"runtime": "Node.js",
"type": "module",
"packageManager": "pnpm",
"framework": null,
"version": "0.6.0",
"projectType": "CLI + Data Processing",
"structure": {
"data": {
"description": "Raw scraped or collected football data before cleaning",
"path": "data"
},
"dataOutput": {
"description": "Processed and cleaned data, ready for use in other apps",
"path": "data-output"
},
"scripts": {
"description": "Data cleaning and transformation utilities",
"path": "scripts"
},
"wikipedia": {
"description": "Scraping and generation scripts for Wikipedia-based datasets",
"path": "wikipedia",
"main": "cli/index.js",
"cli": "cli/index.js"
},
"rsssf": {
"description": "Legacy RSSSF scraping and parsing utilities kept for archive work",
"path": "rsssf",
"main": "cli.js",
"cli": "cli.js"
},
"tests": {
"description": "Jest unit and integration tests covering parser, combiner, and verifier behavior",
"path": "wikipedia/__tests__"
}
},
"build": {
"tool": "pnpm",
"scripts": {
"install": "pnpm i",
"generate": "node wikipedia/cli/index.js build --start 1888 --end 1990 --output ./data-output",
"generate:overview": "node wikipedia/cli/index.js overview --start 1991 --end 2024 --output ./data-output",
"combine": "node wikipedia/data/combine-output-files.js --output ./data-output/all-seasons.json ./data-output/wiki_overview_tables_by_season.json ./data-output/wiki_promotion_relegations_by_season.json",
"verify": "node wikipedia/data/verify-football-data.js --fail-on-issues ./data-output",
"lint": "pnpm run lint",
"format": "pnpm run format",
"test": "pnpm test"
}
},
"dependencies": {
"core": ["cheerio", "commander", "entities", "fast-csv", "wikipedia"],
"dev": ["jest", "cross-env", "eslint", "prettier"]
},
"engines": {
"node": ">=20.0.0"
},
"aiContext": {
"purpose": "Assist with data scraping, parsing, and transformation scripts for English football datasets.",
"preferred_style": "Functional, modular, and reusable Node.js scripts with clear CLI support.",
"focus": [
"Wikipedia scraping and parser maintenance",
"Structured JSON output generation",
"CLI ergonomics and resumable workflows",
"Data validation and merge tooling",
"Deterministic unit testing for parser behavior",
"Clean ESM-based Node CLI design",
"Readable and stable JavaScript data pipelines"
],
"notes": [
"Ensure compatibility with ESM syntax.",
"Keep CLI entry points thin and move parsing or transformation logic into importable functions.",
"Use explicit node: imports for built-ins and prefer centralized defaults for output names, source IDs, and URLs.",
"Prefer the wikipedia/ pipeline; rsssf/ is legacy unless explicitly requested.",
"Preserve reproducibility for historic datasets and avoid silent schema drift.",
"Make scripts idempotent and configurable by season range.",
"Prefer fast, deterministic tests without hitting live Wikipedia.",
"Reuse central domain config files instead of duplicating source IDs, URLs, or output names.",
"Prefer straightforward JavaScript with stable object shapes and explicit normalization at data boundaries."
]
},
"license": "MIT"
}