Skip to content

Commit 605e0ad

Browse files
Improve readme
1 parent 21ab4e0 commit 605e0ad

2 files changed

Lines changed: 7 additions & 34 deletions

File tree

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212

1313
## Introduction
1414

15-
Chart-extractor provides tools for extracting important information from financial charts, utilizing my [chart-info-detector](https://github.com/StephanAkkerman/chart-info-detector) model and OCR tools.
15+
Chart-extractor provides tools for extracting important information from financial charts, utilizing the [chart-info-detector](https://github.com/StephanAkkerman/chart-info-detector) model and OCR tools. It detects the chart title and the small price "pill" in screenshots, runs OCR on those regions, and applies lightweight parsing heuristics to convert raw OCR text into structured fields such as symbol, exchange, timeframe, price, and trading session. The project is designed primarily for TradingView-style screenshots but also works with similar chart widgets. It supports RapidOCR (ONNX runtime) for fast local inference and can fall back to Tesseract when needed. Use the library programmatically, the small CLI for quick JSON output, or the example script for experiments.
1616

1717
## Example
1818
Given an Tradingview chart image like the one below, it will extract the information such as price, symbol, exchange, and timeframe.
1919

2020
<details closed>
21-
<summary>Input Image Example</summary>
21+
<summary>Input Image Example 📊</summary>
2222
<img src="img/chart.png" alt="Input Chart">
2323
</details>
2424

chart_extractor/chart_extractor.py

Lines changed: 5 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,19 @@
1-
# chart_extractor.py
2-
from __future__ import annotations
3-
41
import re
52
from dataclasses import dataclass
63
from pathlib import Path
74

85
import cv2
96
import numpy as np
107
from huggingface_hub import hf_hub_download
8+
from rapidocr_onnxruntime import RapidOCR
119
from ultralytics import YOLO
1210

13-
# ---------- Config ----------
14-
# Class IDs (must match your training config)
1511
CLS_SYMBOL_TITLE = 0
1612
CLS_LAST_PRICE_PILL = 1
1713

18-
# Hugging Face model (adjust if you renamed the repo or path)
1914
HF_MODEL_REPO = "StephanAkkerman/chart-info-detector"
20-
HF_MODEL_FILE = "weights/best.pt" # path inside the model repo
15+
HF_MODEL_FILE = "weights/best.pt"
2116

22-
# OCR engine (lazy-loaded): RapidOCR preferred, Tesseract fallback
2317
_OCR = None
2418
_OCR_KIND = None # "rapid" | "tesseract"
2519

@@ -55,7 +49,6 @@ def _download_weights_if_needed(
5549
return hf_hub_download(repo_id=repo_id, filename=filename, repo_type="model")
5650

5751

58-
# --- NEW helpers: classify OCR text, smart swap ---
5952
def _looks_like_price(text: str) -> bool:
6053
if not text:
6154
return False
@@ -91,26 +84,10 @@ def _ensure_ocr():
9184
global _OCR, _OCR_KIND
9285
if _OCR is not None:
9386
return _OCR
94-
try:
95-
from rapidocr_onnxruntime import RapidOCR
96-
97-
_OCR = RapidOCR()
98-
_OCR_KIND = "rapid"
99-
return _OCR
100-
except Exception:
101-
try:
102-
import pytesseract # requires system tesseract (Windows installer / apt-get on Linux)
10387

104-
_OCR = pytesseract
105-
_OCR_KIND = "tesseract"
106-
return _OCR
107-
except Exception as e2:
108-
raise RuntimeError(
109-
"No OCR engine available. Install one of:\n"
110-
" pip install rapidocr-onnxruntime onnxruntime\n"
111-
"or\n"
112-
" sudo apt-get install tesseract-ocr && pip install pytesseract"
113-
) from e2
88+
_OCR = RapidOCR()
89+
_OCR_KIND = "rapid"
90+
return _OCR
11491

11592

11693
def _read_image(img: str | Path | np.ndarray) -> np.ndarray:
@@ -175,7 +152,6 @@ def _ocr_text(im_bgr: np.ndarray) -> str:
175152
)
176153

177154

178-
# --- REPLACE your title parser with this TradingView-oriented version ---
179155
def _parse_title(text: str) -> tuple[str | None, str | None, str | None]:
180156
"""
181157
Parse (name, exchange, timeframe) from TradingView-style titles like:
@@ -223,7 +199,6 @@ def _parse_title(text: str) -> tuple[str | None, str | None, str | None]:
223199
return name, exchange, timeframe
224200

225201

226-
# --- TIGHTER price parser (only from pill text) ---
227202
def _parse_pill(text: str) -> tuple[float | None, str | None]:
228203
"""
229204
Parse (price, session) from pill text; avoid 'S&P 500' false matches.
@@ -258,7 +233,6 @@ def _parse_pill(text: str) -> tuple[float | None, str | None]:
258233
return price, ("regular" if sess is None else sess)
259234

260235

261-
# ---------- Core pipeline ----------
262236
class ChartExtractor:
263237
"""
264238
Detects chart widgets (YOLO) and extracts info via OCR.
@@ -419,7 +393,6 @@ def analyze(
419393
return result
420394

421395

422-
# ---------- Quick CLI test ----------
423396
if __name__ == "__main__":
424397
import json
425398
import sys

0 commit comments

Comments
 (0)