export a method to onnx in order to export using method generate (#375)

xadupre · web-flow · commit f4fbaa8fcc3d · 2026-01-07T18:30:46.000+01:00
* export a method to onnx

* mypy

* mypy

* fix missing args

* add one example

* fix

* fix

* fix

* fix

* doc

* disable
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.8.8
 +++++
 
+* :pr:`375`: export a method to onnx in order to export using method generate
 * :pr:`376`: fix patched lazy_initialization for transformers>=5
 * :pr:`372`: fix patch on rotary embedding
 * :pr:`371`: fix make_fake_with_dynamic_dimensions
diff --git a/README.rst b/README.rst
@@ -73,6 +73,8 @@ Enlightening Examples
 
 * `Export microsoft/phi-2
   <https://sdpython.github.io/doc/onnx-diagnostic/dev/auto_examples/plot_export_tiny_phi2.html>`_
+* `Export a model through method generate (with Tiny-LLM)
+  <https://sdpython.github.io/doc/onnx-diagnostic/dev/auto_examples/plot_export_tiny_llm_method_generate.html>`_
 
 **Torch Export**
 
diff --git a/_doc/examples/plot_export_tiny_llm_method_generate.py b/_doc/examples/plot_export_tiny_llm_method_generate.py
@@ -0,0 +1,97 @@
+"""
+.. _l-plot-tiny-llm-export-method-generate:
+
+Export a model through method generate (with Tiny-LLM)
+======================================================
+
+The main issue when exporting a LLM is the example on HuggingFace is
+based on method generate but we only need to export the forward method.
+Example :ref:`l-plot-tiny-llm-export` gives details on how to guess
+dummy inputs and dynamic shapes to do so.
+Let's see how to simplify that.
+
+Dummy Example
++++++++++++++
+
+Let's use the example provided on
+`arnir0/Tiny-LLM <https://huggingface.co/arnir0/Tiny-LLM>`_.
+"""
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from onnx_diagnostic import doc
+from onnx_diagnostic.export.api import method_to_onnx
+
+
+MODEL_NAME = "arnir0/Tiny-LLM"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+
+
+def generate_text(
+    prompt, model, tokenizer, max_length=50, temperature=1, top_k=50, top_p=0.95
+):
+    inputs = tokenizer.encode(prompt, return_tensors="pt")
+
+    outputs = model.generate(
+        inputs,
+        max_length=max_length,
+        temperature=temperature,
+        top_k=top_k,
+        top_p=top_p,
+        do_sample=True,
+    )
+
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return generated_text
+
+    # Define your prompt
+
+
+prompt = "Continue: it rains..."
+generated_text = generate_text(prompt, model, tokenizer)
+print("-----------------")
+print(generated_text)
+print("-----------------")
+
+# %%
+# Replace forward method
+# ++++++++++++++++++++++
+#
+# We now modify the model to export the model by replacing the forward method.
+filename = "plot_export_tiny_llm_method_generate.onnx"
+forward_replacement = method_to_onnx(
+    model,
+    method_name="forward",
+    exporter="custom",
+    filename=filename,
+    patch_kwargs=dict(patch_transformers=True),
+    verbose=1,
+    convert_after_n_calls=3,
+    skip_kwargs_names={"kwargs", "use_cache", "return_dict", "inputs_embeds"},
+    dynamic_shapes={
+        "cache_position": {0: "total_sequence_length"},
+        "past_key_values": [
+            {0: "batch_size", 2: "past_sequence_length"},
+            {0: "batch_size", 2: "past_sequence_length"},
+        ],
+        "input_ids": {0: "batch_size", 1: "sequence_length"},
+    },
+)
+
+# %%
+# The lambda function cannot be skipped as
+# forward_replacement is a module.
+
+print(f"type(forward_replacement)={type(forward_replacement)}")
+model.forward = lambda *args, **kwargs: forward_replacement(*args, **kwargs)
+
+
+# %%
+# Let's call generate again.
+generated_text = generate_text(prompt, model, tokenizer)
+print(generated_text)
+
+
+# %%
+
+doc.plot_legend("Tiny-LLM\nforward inputs\through generate", "torch.export.export", "tomato")
diff --git a/_doc/index.rst b/_doc/index.rst
@@ -85,6 +85,7 @@ Enlightening Examples
 **Where to start to export a model**
 
 * :ref:`l-plot-export_tiny_phi2`
+* :ref:`l-plot-tiny-llm-export-method-generate`
 
 **Exporter Recipes**
 
diff --git a/_unittests/ut_export/test_api.py b/_unittests/ut_export/test_api.py
@@ -7,14 +7,15 @@
     has_transformers,
     ignore_warnings,
     requires_transformers,
+    requires_experimental_experiment,
 )
 from onnx_diagnostic.helpers import max_diff
 from onnx_diagnostic.helpers.torch_helper import torch_deepcopy
 from onnx_diagnostic.helpers.rt_helper import make_feeds
 from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
 from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
 from onnx_diagnostic.torch_export_patches import torch_export_patches
-from onnx_diagnostic.export.api import to_onnx
+from onnx_diagnostic.export.api import to_onnx, method_to_onnx
 
 
 class TestValidate(ExtTestCase):
@@ -114,6 +115,136 @@ def test_tiny_llm_to_onnx(self):
 
         self.clean_dump()
 
+    @requires_experimental_experiment("0.1")
+    def test_method_to_onnx_args(self):
+        class Model(torch.nn.Module):
+            def forward(self, x, y):
+                return x + y
+
+        filename = self.get_dump_file("test_method_to_onnx_args.onnx")
+        inputs = [
+            (torch.randn((5, 6)), torch.randn((1, 6))),
+            (torch.randn((7, 7)), torch.randn((1, 7))),
+        ]
+        model = Model()
+        method_to_call = method_to_onnx(model, exporter="custom", filename=filename)
+        expecteds = []
+        for args in inputs:
+            expecteds.append(method_to_call(*args))
+        self.assertExists(filename)
+        src = method_to_call._method_src
+        self.assertIn("f(self, x, y):", src)
+        self.assertIn("return self._method_call(x=x, y=y)", src)
+        self.assertEqual(len(list(method_to_call.named_modules())), 2)
+        sess = self.check_ort(filename)
+        input_names = [i.name for i in sess.get_inputs()]
+        for expected, args in zip(expecteds, inputs):
+            feeds = make_feeds(input_names, args, use_numpy=True)
+            got = sess.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+        self.clean_dump()
+
+    @requires_experimental_experiment("0.1")
+    def test_method_to_onnx_kwargs(self):
+        class Model(torch.nn.Module):
+            def forward(self, x=None, y=None):
+                return x + y
+
+        filename = self.get_dump_file("test_method_to_onnx_kwargs.onnx")
+        inputs = [
+            dict(x=torch.randn((5, 6)), y=torch.randn((1, 6))),
+            dict(x=torch.randn((7, 7)), y=torch.randn((1, 7))),
+        ]
+        model = Model()
+        method_to_call = method_to_onnx(model, exporter="custom", filename=filename)
+        expecteds = []
+        for kwargs in inputs:
+            expecteds.append(method_to_call(**kwargs))
+        self.assertExists(filename)
+        src = method_to_call._method_src
+        self.assertIn("f(self, x=None, y=None):", src)
+        self.assertIn("return self._method_call(x=x, y=y)", src)
+        self.assertEqual(len(list(method_to_call.named_modules())), 2)
+        sess = self.check_ort(filename)
+        input_names = [i.name for i in sess.get_inputs()]
+        for expected, kwargs in zip(expecteds, inputs):
+            feeds = make_feeds(input_names, kwargs, use_numpy=True)
+            got = sess.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+        self.clean_dump()
+
+    @requires_experimental_experiment("0.1")
+    def test_method_to_onnx_kwargs_patch(self):
+        class Model(torch.nn.Module):
+            def forward(self, x=None, y=None):
+                return x + y
+
+        filename = self.get_dump_file("test_method_to_onnx_kwargs_patch.onnx")
+        inputs = [
+            dict(x=torch.randn((5, 6)), y=torch.randn((1, 6))),
+            dict(x=torch.randn((7, 7)), y=torch.randn((1, 7))),
+        ]
+        model = Model()
+        method_to_call = method_to_onnx(
+            model,
+            exporter="custom",
+            filename=filename,
+            patch_kwargs=dict(patch_transformers=True),
+        )
+        expecteds = []
+        for kwargs in inputs:
+            expecteds.append(method_to_call(**kwargs))
+        self.assertExists(filename)
+        src = method_to_call._method_src
+        self.assertIn("f(self, x=None, y=None):", src)
+        self.assertIn("return self._method_call(x=x, y=y)", src)
+        self.assertEqual(len(list(method_to_call.named_modules())), 2)
+        sess = self.check_ort(filename)
+        input_names = [i.name for i in sess.get_inputs()]
+        for expected, kwargs in zip(expecteds, inputs):
+            feeds = make_feeds(input_names, kwargs, use_numpy=True)
+            got = sess.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+        self.clean_dump()
+
+    @requires_experimental_experiment("0.1")
+    @hide_stdout()
+    def test_method_to_onnx_mixed(self):
+        from experimental_experiment.torch_interpreter import ExportOptions
+
+        class Model(torch.nn.Module):
+            def forward(self, x, y=None):
+                return x + y
+
+        filename = self.get_dump_file("test_method_to_onnx_mixed.onnx")
+        inputs = [
+            ((torch.randn((5, 6)),), dict(y=torch.randn((1, 6)))),
+            ((torch.randn((7, 7)),), dict(y=torch.randn((1, 7)))),
+        ]
+        model = Model()
+        method_to_call = method_to_onnx(
+            model,
+            exporter="custom",
+            filename=filename,
+            verbose=10,
+            exporter_kwargs=dict(export_options=ExportOptions(backed_size_oblivious=False)),
+        )
+        expecteds = []
+        for args, kwargs in inputs:
+            expecteds.append(method_to_call(*args, **kwargs))
+        self.assertExists(filename)
+        src = method_to_call._method_src
+        self.assertIn("f(self, x, y=None):", src)
+        self.assertIn("return self._method_call(x=x, y=y)", src)
+        self.assertEqual(len(list(method_to_call.named_modules())), 2)
+        sess = self.check_ort(filename)
+        input_names = [i.name for i in sess.get_inputs()]
+        for expected, (args, kwargs) in zip(expecteds, inputs):
+            feeds = make_feeds(input_names, (args, kwargs), use_numpy=True)
+            got = sess.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+        self.clean_dump()
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_export/test_dynamic_shapes.py b/_unittests/ut_export/test_dynamic_shapes.py
@@ -188,7 +188,7 @@ def forward(self, **kwargs):
         expected = "#2[((),dict(x:T1s5x6)),((),dict(x:T1s6x6))]"
         self.assertEqual(expected, string_type(mi.inputs, with_shape=True))
         ds = mi.guess_dynamic_shapes()
-        self.assertEqual(ds, (tuple(), {"x": {0: torch.export.Dim.DYNAMIC}}))
+        self.assertEqual((tuple(), {"x": {0: torch.export.Dim.DYNAMIC}}), ds)
         _a, _kw, ds = mi.move_to_kwargs(*mi.inputs[0], ds)
         self.assertEqual(ds, (tuple(), {"kwargs": {"x": {0: torch.export.Dim.DYNAMIC}}}))
         self.assertEqual(
@@ -937,6 +937,31 @@ def test_invalid_dimensions_for_export(self):
             backed_size_oblivious = cpl.invalid_dimensions_for_export()
             self.assertFalse(backed_size_oblivious)
 
+    def test_guess_dynamic_shapes_missing(self):
+        class Model(torch.nn.Module):
+            def forward(self, x, y=None):
+                if y is None:
+                    return x.abs()
+                return x.abs() + y
+
+        model = Model()
+        x = torch.randn((5, 6))
+        y = model(x=x)
+        self.assertNotEmpty(y)
+
+        inputs = [
+            (tuple(), {"x": x}),
+            (tuple(), {"x": torch.randn((6, 6)), "y": torch.randn((6, 6))}),
+            (tuple(), {"x": torch.randn((7, 6)), "y": torch.randn((7, 6))}),
+        ]
+
+        mi = ModelInputs(model, inputs)
+        ds = mi.guess_dynamic_shapes()
+        DYN = torch.export.Dim.DYNAMIC
+        self.assertEqual(ds, ((), {"x": {0: DYN}, "y": {0: DYN}}))
+        _a, _kw, ds = mi.move_to_kwargs(*mi.inputs[-1], ds)
+        self.assertEqual(ds, (tuple(), {"x": {0: DYN}, "y": {0: DYN}}))
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_documentation_examples.py b/_unittests/ut_xrun_doc/test_documentation_examples.py
@@ -102,7 +102,8 @@ def add_test_methods(cls):
 
             if (
                 not reason
-                and name in {"plot_export_tiny_phi2.py"}
+                and name
+                in {"plot_export_tiny_phi2.py", "plot_export_tiny_llm_method_generate.py"}
                 and not has_transformers("4.55")
             ):
                 reason = "transformers<4.55"
@@ -124,6 +125,7 @@ def add_test_methods(cls):
                     "plot_export_locate_issue.py",
                     "plot_export_with_auto.py",
                     "plot_export_tiny_llm.py",
+                    "plot_export_tiny_llm_method_generate.py",
                 }
                 and not has_torch("2.8")
             ):
diff --git a/onnx_diagnostic/export/api.py b/onnx_diagnostic/export/api.py
diff --git a/onnx_diagnostic/export/dynamic_shapes.py b/onnx_diagnostic/export/dynamic_shapes.py