feat: more informative error on constructing trainstate with compiled function (#1547)

avik-pal · web-flow · commit 8c14c89dbc0d · 2025-11-12T12:38:32.000-05:00
* feat: more informative error on constructing trainstate with compiled function

* fix: grammatical error
diff --git a/ext/LuxReactantExt/training.jl b/ext/LuxReactantExt/training.jl
@@ -1,3 +1,47 @@
+# Common mistake that users make is passing in a compiled function
+function Lux.Training.TrainState(
+    ::Reactant.Compiler.Thunk, ps, st, optimizer::Optimisers.AbstractRule
+)
+    throw(
+        ArgumentError(
+            """
+Invalid TrainState construction using a compiled function.
+
+`TrainState` is being constructed with a reactant compiled function, i.e. a
+`Reactant.Compiler.Thunk`. This is likely a mistake as the model should be
+passed in directly without being compiled first.
+
+This is likely originating from the following style of usage:
+
+```julia
+using Lux, Reactant, Random, Optimisers
+
+rdev = reactant_device()
+
+model = Dense(10, 10)
+ps, st = Lux.setup(Random.default_rng(), model) |> rdev
+x = rand(10) |> rdev
+
+model_compiled = @compile model(x, ps, st)
+
+train_state = Training.TrainState(model_compiled, ps, st, Adam())
+```
+
+Instead avoid compiling the model and pass it directly to `TrainState`. When
+`single_train_step` or other functions are called on the `TrainState`, the
+model will be compiled automatically.
+
+```julia
+train_state = Training.TrainState(model, ps, st, Adam())
+```
+
+For end-to-end usage example refer to the documentation:
+<https://lux.csail.mit.edu/stable/manual/compiling_lux_models#compile_lux_model_trainstate>
+"""
+        ),
+    )
+end
+
 function objective_function_wrapper(objective_function::F, model, ps, st, data) where {F}
     loss, stₙ, stats = objective_function(model, ps, st, data)
     return loss, Reactant.ignore_derivatives(stₙ), Reactant.ignore_derivatives(stats)
diff --git a/test/reactant/training_tests.jl b/test/reactant/training_tests.jl
@@ -167,3 +167,19 @@ end
         @test length(Reactant.XLA.devices(Reactant.XLA.sharding(loss.data))) == 8
     end
 end
+
+@testitem "Reactant.Compiler.Thunk in TrainState" tags = [:reactant] setup = [
+    SharedTestSetup
+] begin
+    using Lux, Random, Reactant, Optimisers
+
+    rdev = reactant_device(; force=true)
+
+    model = Dense(10, 10)
+    ps, st = Lux.setup(Random.default_rng(), model) |> rdev
+    x = rand(10) |> rdev
+
+    model_compiled = @compile model(x, ps, st)
+
+    @test_throws ArgumentError Training.TrainState(model_compiled, ps, st, Adam())
+end