1313# limitations under the License.
1414
1515import re
16- from typing import Optional
16+ from typing import Optional , Dict
1717
1818import pypandoc # type: ignore
1919
2020from gapic .utils .lines import wrap
2121
22+ # Cache for the few complex items we actually send to pandoc
23+ _RAW_RST_CACHE : Dict [str , str ] = {}
24+
25+ def _tuned_fast_convert (text : str ) -> Optional [str ]:
26+ """
27+ Converts Markdown to RST using pure Python.
28+ Only falls back to Pandoc for Tables and Images.
29+ """
30+ # --- 1. FALLBACKS ---
31+ # Tables (pipe surrounded by spaces) or Images (![).
32+ # We allow "][" (Reference Links) to be handled by Python now.
33+ if (re .search (r" \| " , text ) or re .search (r"\|\n" , text )) or "![" in text :
34+ return None
35+
36+ # --- 2. CONVERSION ---
37+
38+ # A. CODE BLOCKS: `code` -> ``code``
39+ # CRITICAL: Run this FIRST. This ensures we handle existing backticks
40+ # before we create NEW backticks for links.
41+ # (?<!:) ensures we don't break Sphinx roles like :class:`MyClass`
42+ converted = re .sub (r"(?<!:|`)`([^`]+)`(?!`)" , r"``\1``" , text )
43+
44+ # B. REFERENCE LINKS: [Text][Ref] -> `Text <Ref>`__
45+ # We fix the broken documentation by converting these to valid RST links.
46+ # Since step A is done, these new backticks will NOT be doubled.
47+ converted = re .sub (r"\[([^\]]+)\]\[([^\]]+)\]" , r"`\1 <\2>`__" , converted )
48+
49+ # C. STANDARD LINKS: [Text](URL) -> `Text <URL>`__
50+ converted = re .sub (r"\[([^\]]+)\]\(([^)]+)\)" , r"`\1 <\2>`__" , converted )
51+
52+ # D. BOLD/ITALICS:
53+ converted = re .sub (r"(?<!_)\b_([^_]+)_\b(?!_)" , r"*\1*" , converted )
54+
55+ # E. HEADINGS: # Heading -> Heading\n=======
56+ converted = re .sub (r"^# (.*)$" , r"\1\n" + "=" * 10 , converted , flags = re .MULTILINE )
57+ converted = re .sub (r"^## (.*)$" , r"\1\n" + "-" * 10 , converted , flags = re .MULTILINE )
58+
59+ # F. LISTS: Markdown (- item) needs a preceding newline for RST.
60+ converted = re .sub (r"(\n[^-*].*)\n\s*([-*] )" , r"\1\n\n\2" , converted )
61+
62+ return converted
2263
2364def rst (
2465 text : str ,
@@ -27,60 +68,41 @@ def rst(
2768 nl : Optional [bool ] = None ,
2869 source_format : str = "commonmark" ,
2970):
30- """Convert the given text to ReStructured Text.
31-
32- Args:
33- text (str): The text to convert.
34- width (int): The number of columns.
35- indent (int): The number of columns to indent each line of text
36- (except the first).
37- nl (bool): Whether to append a trailing newline.
38- Defaults to appending a newline if the result is more than
39- one line long.
40- source_format (str): The source format. This is ``commonmark`` by
41- default, which is what is used by convention in protocol buffers.
42-
43- Returns:
44- str: The same text, in RST format.
45- """
46- # Quick check: If the text block does not appear to have any formatting,
47- # do not convert it.
48- # (This makes code generation significantly faster; calling out to pandoc
49- # is by far the most expensive thing we do.)
50- if not re .search (r"[|*`_[\]]" , text ):
51- answer = wrap (
52- text ,
53- indent = indent ,
54- offset = indent + 3 ,
55- width = width - indent ,
56- )
71+ # 1. Super Fast Path: No special chars? Just wrap.
72+ if not re .search (r"[|*`_[\]#]" , text ):
73+ answer = wrap (text , indent = indent , offset = indent + 3 , width = width - indent )
74+ return _finalize (answer , nl , indent )
75+
76+ # 2. Check Cache
77+ if text in _RAW_RST_CACHE :
78+ raw_rst = _RAW_RST_CACHE [text ]
5779 else :
58- # Convert from CommonMark to ReStructured Text.
59- answer = (
60- pypandoc .convert_text (
61- text ,
62- "rst" ,
63- format = source_format ,
64- verify_format = False ,
65- extra_args = ["--columns=%d" % (width - indent )],
66- )
67- .strip ()
68- .replace ("\n " , f"\n { ' ' * indent } " )
69- )
70-
71- # Add a newline to the end of the document if any line breaks are
72- # already present.
73- #
74- # This causes the closing """ to be on the subsequent line only when
75- # appropriate.
80+ # 3. Try Tuned Python Convert (Fastest)
81+ fast_result = _tuned_fast_convert (text )
82+
83+ if fast_result is not None :
84+ raw_rst = fast_result .strip ()
85+ else :
86+ # 4. Fallback to Pandoc (Only for Tables/Images)
87+ raw_rst = pypandoc .convert_text (
88+ text , "rst" , format = source_format , extra_args = ["--columns=1000" ]
89+ ).strip ()
90+
91+ _RAW_RST_CACHE [text ] = raw_rst
92+
93+ # 5. Python Formatting
94+ if "::" in raw_rst or ".. code" in raw_rst :
95+ answer = raw_rst .replace ("\n " , f"\n { ' ' * indent } " )
96+ else :
97+ answer = wrap (raw_rst , indent = indent , offset = indent , width = width - indent )
98+
99+ return _finalize (answer , nl , indent )
100+
101+
102+ def _finalize (answer , nl , indent ):
103+ """Helper to handle trailing newlines and quotes."""
76104 if nl or ("\n " in answer and nl is None ):
77105 answer += "\n " + " " * indent
78-
79- # If the text ends in a double-quote, append a period.
80- # This ensures that we do not get a parse error when this output is
81- # followed by triple-quotes.
82106 if answer .endswith ('"' ):
83107 answer += "."
84-
85- # Done; return the answer.
86108 return answer
0 commit comments