|
7 | 7 |
|
8 | 8 | import pytensor.scalar as ps |
9 | 9 | from pytensor.compile.function import function |
10 | | -from pytensor.gradient import grad, jacobian |
| 10 | +from pytensor.gradient import grad, grad_not_implemented, jacobian |
11 | 11 | from pytensor.graph.basic import Apply, Constant |
12 | 12 | from pytensor.graph.fg import FunctionGraph |
13 | 13 | from pytensor.graph.op import ComputeMapType, HasInnerGraph, Op, StorageMapType |
14 | 14 | from pytensor.graph.replace import graph_replace |
15 | 15 | from pytensor.graph.traversal import ancestors, truncated_graph_inputs |
| 16 | +from pytensor.scalar import ScalarType, ScalarVariable |
16 | 17 | from pytensor.tensor.basic import ( |
17 | 18 | atleast_2d, |
18 | 19 | concatenate, |
|
22 | 23 | ) |
23 | 24 | from pytensor.tensor.math import dot |
24 | 25 | from pytensor.tensor.slinalg import solve |
| 26 | +from pytensor.tensor.type import DenseTensorType |
25 | 27 | from pytensor.tensor.variable import TensorVariable, Variable |
26 | 28 |
|
27 | 29 |
|
@@ -140,23 +142,19 @@ def _find_optimization_parameters(objective: TensorVariable, x: TensorVariable): |
140 | 142 |
|
141 | 143 |
|
142 | 144 | def _get_parameter_grads_from_vector( |
143 | | - grad_wrt_args_vector: Variable, |
144 | | - x_star: Variable, |
145 | | - args: Sequence[Variable], |
| 145 | + grad_wrt_args_vector: TensorVariable, |
| 146 | + x_star: TensorVariable, |
| 147 | + args: Sequence[TensorVariable | ScalarVariable], |
146 | 148 | output_grad: Variable, |
147 | 149 | ): |
148 | 150 | """ |
149 | 151 | Given a single concatenated vector of objective function gradients with respect to raveled optimization parameters, |
150 | 152 | returns the contribution of each parameter to the total loss function, with the unraveled shape of the parameter. |
151 | 153 | """ |
152 | | - grad_wrt_args_vector = cast(TensorVariable, grad_wrt_args_vector) |
153 | | - x_star = cast(TensorVariable, x_star) |
154 | | - |
155 | 154 | cursor = 0 |
156 | 155 | grad_wrt_args = [] |
157 | 156 |
|
158 | 157 | for arg in args: |
159 | | - arg = cast(TensorVariable, arg) |
160 | 158 | arg_shape = arg.shape |
161 | 159 | arg_size = arg_shape.prod() |
162 | 160 | arg_grad = grad_wrt_args_vector[:, cursor : cursor + arg_size].reshape( |
@@ -375,14 +373,18 @@ def __init__( |
375 | 373 | method: str = "brent", |
376 | 374 | optimizer_kwargs: dict | None = None, |
377 | 375 | ): |
378 | | - if not cast(TensorVariable, x).ndim == 0: |
| 376 | + if not (isinstance(x, TensorVariable) and x.ndim == 0): |
379 | 377 | raise ValueError( |
380 | 378 | "The variable `x` must be a scalar (0-dimensional) tensor for minimize_scalar." |
381 | 379 | ) |
382 | | - if not cast(TensorVariable, objective).ndim == 0: |
| 380 | + if not (isinstance(objective, TensorVariable) and objective.ndim == 0): |
383 | 381 | raise ValueError( |
384 | 382 | "The objective function must be a scalar (0-dimensional) tensor for minimize_scalar." |
385 | 383 | ) |
| 384 | + if x not in ancestors([objective]): |
| 385 | + raise ValueError( |
| 386 | + "The variable `x` must be an input to the computational graph of the objective function." |
| 387 | + ) |
386 | 388 | self.fgraph = FunctionGraph([x, *args], [objective]) |
387 | 389 |
|
388 | 390 | self.method = method |
@@ -416,7 +418,19 @@ def perform(self, node, inputs, outputs): |
416 | 418 | outputs[1][0] = np.bool_(res.success) |
417 | 419 |
|
418 | 420 | def L_op(self, inputs, outputs, output_grads): |
| 421 | + # TODO: Handle disconnected inputs, instead of zeroing them out or failing for unsupported types |
419 | 422 | x, *args = inputs |
| 423 | + if non_supported_types := tuple( |
| 424 | + inp.type |
| 425 | + for inp in inputs |
| 426 | + if not isinstance(inp.type, DenseTensorType | ScalarType) |
| 427 | + ): |
| 428 | + # TODO: Support SparseTensorTypes |
| 429 | + # TODO: Remaining types are likely just disconnected anyway |
| 430 | + msg = f"Minimize gradient not implemented due to inputs of type {non_supported_types}" |
| 431 | + return [ |
| 432 | + grad_not_implemented(self, i, inp, msg) for i, inp in enumerate(inputs) |
| 433 | + ] |
420 | 434 | x_star, _ = outputs |
421 | 435 | output_grad, _ = output_grads |
422 | 436 |
|
@@ -468,7 +482,6 @@ def minimize_scalar( |
468 | 482 | Symbolic boolean flag indicating whether the minimization routine reported convergence to a minimum |
469 | 483 | value, based on the requested convergence criteria. |
470 | 484 | """ |
471 | | - |
472 | 485 | args = _find_optimization_parameters(objective, x) |
473 | 486 |
|
474 | 487 | minimize_scalar_op = MinimizeScalarOp( |
@@ -499,7 +512,11 @@ def __init__( |
499 | 512 | use_vectorized_jac: bool = False, |
500 | 513 | optimizer_kwargs: dict | None = None, |
501 | 514 | ): |
502 | | - if not cast(TensorVariable, objective).ndim == 0: |
| 515 | + if not (isinstance(x, TensorVariable) and x.ndim in (0, 1)): |
| 516 | + raise ValueError( |
| 517 | + "The variable `x` must be a scalar or vector (0-or-1-dimensional) tensor for minimize." |
| 518 | + ) |
| 519 | + if not (isinstance(objective, TensorVariable) and objective.ndim == 0): |
503 | 520 | raise ValueError( |
504 | 521 | "The objective function must be a scalar (0-dimensional) tensor for minimize." |
505 | 522 | ) |
@@ -570,7 +587,19 @@ def perform(self, node, inputs, outputs): |
570 | 587 | outputs[1][0] = np.bool_(res.success) |
571 | 588 |
|
572 | 589 | def L_op(self, inputs, outputs, output_grads): |
| 590 | + # TODO: Handle disconnected inputs, instead of zeroing them out or failing for unsupported types |
573 | 591 | x, *args = inputs |
| 592 | + if non_supported_types := tuple( |
| 593 | + inp.type |
| 594 | + for inp in inputs |
| 595 | + if not isinstance(inp.type, DenseTensorType | ScalarType) |
| 596 | + ): |
| 597 | + # TODO: Support SparseTensorTypes |
| 598 | + # TODO: Remaining types are likely just disconnected anyway |
| 599 | + msg = f"MinimizeOp gradient not implemented due to inputs of type {non_supported_types}" |
| 600 | + return [ |
| 601 | + grad_not_implemented(self, i, inp, msg) for i, inp in enumerate(inputs) |
| 602 | + ] |
574 | 603 | x_star, _success = outputs |
575 | 604 | output_grad, _ = output_grads |
576 | 605 |
|
@@ -672,13 +701,15 @@ def __init__( |
672 | 701 | hess: bool = False, |
673 | 702 | optimizer_kwargs=None, |
674 | 703 | ): |
675 | | - if not equation.ndim == 0: |
| 704 | + if not (isinstance(variables, TensorVariable) and variables.ndim == 0): |
| 705 | + raise ValueError( |
| 706 | + "The variable `x` must be a scalar (0-dimensional) tensor for root_scalar." |
| 707 | + ) |
| 708 | + if not (isinstance(equation, TensorVariable) and equation.ndim == 0): |
676 | 709 | raise ValueError( |
677 | 710 | "The equation must be a scalar (0-dimensional) tensor for root_scalar." |
678 | 711 | ) |
679 | | - if not isinstance(variables, Variable) or variables not in ancestors( |
680 | | - [equation] |
681 | | - ): |
| 712 | + if variables not in ancestors([equation]): |
682 | 713 | raise ValueError( |
683 | 714 | "The variable `variables` must be an input to the computational graph of the equation." |
684 | 715 | ) |
@@ -741,7 +772,19 @@ def perform(self, node, inputs, outputs): |
741 | 772 | outputs[1][0] = np.bool_(res.converged) |
742 | 773 |
|
743 | 774 | def L_op(self, inputs, outputs, output_grads): |
| 775 | + # TODO: Handle disconnected inputs, instead of zeroing them out or failing for unsupported types |
744 | 776 | x, *args = inputs |
| 777 | + if non_supported_types := tuple( |
| 778 | + inp.type |
| 779 | + for inp in inputs |
| 780 | + if not isinstance(inp.type, DenseTensorType | ScalarType) |
| 781 | + ): |
| 782 | + # TODO: Support SparseTensorTypes |
| 783 | + # TODO: Remaining types are likely just disconnected anyway |
| 784 | + msg = f"RootScalarOp gradient not implemented due to inputs of type {non_supported_types}" |
| 785 | + return [ |
| 786 | + grad_not_implemented(self, i, inp, msg) for i, inp in enumerate(inputs) |
| 787 | + ] |
745 | 788 | x_star, _ = outputs |
746 | 789 | output_grad, _ = output_grads |
747 | 790 |
|
@@ -833,7 +876,11 @@ def __init__( |
833 | 876 | optimizer_kwargs: dict | None = None, |
834 | 877 | use_vectorized_jac: bool = False, |
835 | 878 | ): |
836 | | - if cast(TensorVariable, variables).ndim != cast(TensorVariable, equations).ndim: |
| 879 | + if not isinstance(variables, TensorVariable): |
| 880 | + raise ValueError("The variable `variables` must be a tensor for root.") |
| 881 | + if not isinstance(equations, TensorVariable): |
| 882 | + raise ValueError("The equations must be a tensor for root.") |
| 883 | + if variables.ndim != equations.ndim: |
837 | 884 | raise ValueError( |
838 | 885 | "The variable `variables` must have the same number of dimensions as the equations." |
839 | 886 | ) |
@@ -922,7 +969,19 @@ def L_op( |
922 | 969 | outputs: Sequence[Variable], |
923 | 970 | output_grads: Sequence[Variable], |
924 | 971 | ) -> list[Variable]: |
| 972 | + # TODO: Handle disconnected inputs, instead of zeroing them out or failing for unsupported types |
925 | 973 | x, *args = inputs |
| 974 | + if non_supported_types := tuple( |
| 975 | + inp.type |
| 976 | + for inp in inputs |
| 977 | + if not isinstance(inp.type, DenseTensorType | ScalarType) |
| 978 | + ): |
| 979 | + # TODO: Support SparseTensorTypes |
| 980 | + # TODO: Remaining types are likely just disconnected anyway |
| 981 | + msg = f"RootOp gradient not implemented due to inputs of type {non_supported_types}" |
| 982 | + return [ |
| 983 | + grad_not_implemented(self, i, inp, msg) for i, inp in enumerate(inputs) |
| 984 | + ] |
926 | 985 | x_star, _ = outputs |
927 | 986 | output_grad, _ = output_grads |
928 | 987 |
|
|
0 commit comments