Skip to content

Commit 0f05b61

Browse files
committed
improve docs performance
1 parent 1919b50 commit 0f05b61

File tree

1 file changed

+74
-52
lines changed

1 file changed

+74
-52
lines changed

gapic/utils/rst.py

Lines changed: 74 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,53 @@
1313
# limitations under the License.
1414

1515
import re
16-
from typing import Optional
16+
from typing import Optional, Dict
1717

1818
import pypandoc # type: ignore
1919

2020
from gapic.utils.lines import wrap
2121

22+
# Cache for the few complex items we actually send to pandoc
23+
_RAW_RST_CACHE: Dict[str, str] = {}
24+
25+
def _tuned_fast_convert(text: str) -> Optional[str]:
26+
"""
27+
Converts Markdown to RST using pure Python.
28+
Only falls back to Pandoc for Tables and Images.
29+
"""
30+
# --- 1. FALLBACKS ---
31+
# Tables (pipe surrounded by spaces) or Images (![).
32+
# We allow "][" (Reference Links) to be handled by Python now.
33+
if (re.search(r" \| ", text) or re.search(r"\|\n", text)) or "![" in text:
34+
return None
35+
36+
# --- 2. CONVERSION ---
37+
38+
# A. CODE BLOCKS: `code` -> ``code``
39+
# CRITICAL: Run this FIRST. This ensures we handle existing backticks
40+
# before we create NEW backticks for links.
41+
# (?<!:) ensures we don't break Sphinx roles like :class:`MyClass`
42+
converted = re.sub(r"(?<!:|`)`([^`]+)`(?!`)", r"``\1``", text)
43+
44+
# B. REFERENCE LINKS: [Text][Ref] -> `Text <Ref>`__
45+
# We fix the broken documentation by converting these to valid RST links.
46+
# Since step A is done, these new backticks will NOT be doubled.
47+
converted = re.sub(r"\[([^\]]+)\]\[([^\]]+)\]", r"`\1 <\2>`__", converted)
48+
49+
# C. STANDARD LINKS: [Text](URL) -> `Text <URL>`__
50+
converted = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"`\1 <\2>`__", converted)
51+
52+
# D. BOLD/ITALICS:
53+
converted = re.sub(r"(?<!_)\b_([^_]+)_\b(?!_)", r"*\1*", converted)
54+
55+
# E. HEADINGS: # Heading -> Heading\n=======
56+
converted = re.sub(r"^# (.*)$", r"\1\n" + "=" * 10, converted, flags=re.MULTILINE)
57+
converted = re.sub(r"^## (.*)$", r"\1\n" + "-" * 10, converted, flags=re.MULTILINE)
58+
59+
# F. LISTS: Markdown (- item) needs a preceding newline for RST.
60+
converted = re.sub(r"(\n[^-*].*)\n\s*([-*] )", r"\1\n\n\2", converted)
61+
62+
return converted
2263

2364
def rst(
2465
text: str,
@@ -27,60 +68,41 @@ def rst(
2768
nl: Optional[bool] = None,
2869
source_format: str = "commonmark",
2970
):
30-
"""Convert the given text to ReStructured Text.
31-
32-
Args:
33-
text (str): The text to convert.
34-
width (int): The number of columns.
35-
indent (int): The number of columns to indent each line of text
36-
(except the first).
37-
nl (bool): Whether to append a trailing newline.
38-
Defaults to appending a newline if the result is more than
39-
one line long.
40-
source_format (str): The source format. This is ``commonmark`` by
41-
default, which is what is used by convention in protocol buffers.
42-
43-
Returns:
44-
str: The same text, in RST format.
45-
"""
46-
# Quick check: If the text block does not appear to have any formatting,
47-
# do not convert it.
48-
# (This makes code generation significantly faster; calling out to pandoc
49-
# is by far the most expensive thing we do.)
50-
if not re.search(r"[|*`_[\]]", text):
51-
answer = wrap(
52-
text,
53-
indent=indent,
54-
offset=indent + 3,
55-
width=width - indent,
56-
)
71+
# 1. Super Fast Path: No special chars? Just wrap.
72+
if not re.search(r"[|*`_[\]#]", text):
73+
answer = wrap(text, indent=indent, offset=indent + 3, width=width - indent)
74+
return _finalize(answer, nl, indent)
75+
76+
# 2. Check Cache
77+
if text in _RAW_RST_CACHE:
78+
raw_rst = _RAW_RST_CACHE[text]
5779
else:
58-
# Convert from CommonMark to ReStructured Text.
59-
answer = (
60-
pypandoc.convert_text(
61-
text,
62-
"rst",
63-
format=source_format,
64-
verify_format=False,
65-
extra_args=["--columns=%d" % (width - indent)],
66-
)
67-
.strip()
68-
.replace("\n", f"\n{' ' * indent}")
69-
)
70-
71-
# Add a newline to the end of the document if any line breaks are
72-
# already present.
73-
#
74-
# This causes the closing """ to be on the subsequent line only when
75-
# appropriate.
80+
# 3. Try Tuned Python Convert (Fastest)
81+
fast_result = _tuned_fast_convert(text)
82+
83+
if fast_result is not None:
84+
raw_rst = fast_result.strip()
85+
else:
86+
# 4. Fallback to Pandoc (Only for Tables/Images)
87+
raw_rst = pypandoc.convert_text(
88+
text, "rst", format=source_format, extra_args=["--columns=1000"]
89+
).strip()
90+
91+
_RAW_RST_CACHE[text] = raw_rst
92+
93+
# 5. Python Formatting
94+
if "::" in raw_rst or ".. code" in raw_rst:
95+
answer = raw_rst.replace("\n", f"\n{' ' * indent}")
96+
else:
97+
answer = wrap(raw_rst, indent=indent, offset=indent, width=width - indent)
98+
99+
return _finalize(answer, nl, indent)
100+
101+
102+
def _finalize(answer, nl, indent):
103+
"""Helper to handle trailing newlines and quotes."""
76104
if nl or ("\n" in answer and nl is None):
77105
answer += "\n" + " " * indent
78-
79-
# If the text ends in a double-quote, append a period.
80-
# This ensures that we do not get a parse error when this output is
81-
# followed by triple-quotes.
82106
if answer.endswith('"'):
83107
answer += "."
84-
85-
# Done; return the answer.
86108
return answer

0 commit comments

Comments
 (0)