DualSet: BLAS3 optimization for IntegralMomentOfDerivative (#210)

pbrubeck · web-flow · commit d2204e828eab · 2026-01-21T19:48:18.000Z
* DualSet: BLAS3 optimization for IntegralMomentOfDerivative
diff --git a/FIAT/dual_set.py b/FIAT/dual_set.py
@@ -7,6 +7,8 @@
 # SPDX-License-Identifier:    LGPL-3.0-or-later
 
 import numpy
+from itertools import chain
+from collections import defaultdict
 
 from FIAT import polynomial_set, functional
 from FIAT.reference_element import compute_unflattening_map
@@ -117,36 +119,37 @@ def to_riesz(self, poly_set):
         riesz_shape = (num_nodes, *tshape, num_exp)
         mat = numpy.zeros(riesz_shape, "d")
 
-        pts = set()
-        dpts = set()
-        Qs_to_ells = dict()
-        for i, ell in enumerate(self.nodes):
-            if len(ell.deriv_dict) > 0:
-                dpts.update(ell.deriv_dict.keys())
-                continue
-            if isinstance(ell, functional.IntegralMoment):
-                Q = ell.Q
-            else:
-                Q = None
-                pts.update(ell.pt_dict.keys())
-            if Q in Qs_to_ells:
+        def map_quadratures_to_points(nodes, deriv=False):
+            Qs_to_ells = defaultdict(list)
+            for i, ell in enumerate(nodes):
+                if deriv and len(ell.deriv_dict) == 0:
+                    continue
+                elif not deriv and len(ell.pt_dict) == 0:
+                    continue
+                if isinstance(ell, (functional.IntegralMoment, functional.IntegralMomentOfDerivative)):
+                    Q = ell.Q
+                else:
+                    Q = None
                 Qs_to_ells[Q].append(i)
-            else:
-                Qs_to_ells[Q] = [i]
-
-        Qs_to_pts = {}
-        if len(pts) > 0:
-            Qs_to_pts[None] = tuple(sorted(pts))
-        for Q in Qs_to_ells:
-            if Q is not None:
-                cur_pts = tuple(map(tuple, Q.pts))
+            pts = set()
+            Qs_to_pts = {}
+            for Q in Qs_to_ells:
+                if Q is None:
+                    if deriv:
+                        cur_pts = chain.from_iterable(nodes[i].deriv_dict.keys() for i in Qs_to_ells[None])
+                    else:
+                        cur_pts = chain.from_iterable(nodes[i].pt_dict.keys() for i in Qs_to_ells[None])
+                    cur_pts = tuple(set(cur_pts))
+                else:
+                    cur_pts = tuple(map(tuple, Q.pts))
                 Qs_to_pts[Q] = cur_pts
                 pts.update(cur_pts)
+            pts = list(sorted(pts))
+            return Qs_to_ells, Qs_to_pts, pts
 
         # Now tabulate the function values
-        pts = list(sorted(pts))
+        Qs_to_ells, Qs_to_pts, pts = map_quadratures_to_points(self.nodes)
         expansion_values = numpy.transpose(es.tabulate(ed, pts))
-
         for Q in Qs_to_ells:
             ells = Qs_to_ells[Q]
             cur_pts = Qs_to_pts[Q]
@@ -171,25 +174,35 @@ def to_riesz(self, poly_set):
         # Tabulate the derivative values that are needed
         max_deriv_order = max(ell.max_deriv_order for ell in self.nodes)
         if max_deriv_order > 0:
-            dpts = list(sorted(dpts))
+            Qs_to_ells, Qs_to_pts, pts = map_quadratures_to_points(self.nodes, deriv=True)
             # It's easiest/most efficient to get derivatives of the
             # expansion set through the polynomial set interface.
             # This is creating a short-lived set to do just this.
             coeffs = numpy.eye(num_exp)
             expansion = polynomial_set.PolynomialSet(self.ref_el, ed, ed, es, coeffs)
-            dexpansion_values = expansion.tabulate(dpts, max_deriv_order)
-
-            ells = [k for k, ell in enumerate(self.nodes) if len(ell.deriv_dict) > 0]
-            wshape = (len(ells), *tshape, len(dpts))
-            dwts = {alpha: numpy.zeros(wshape, "d") for alpha in dexpansion_values if sum(alpha) > 0}
-            for i, k in enumerate(ells):
-                ell = self.nodes[k]
-                for pt, wac_list in ell.deriv_dict.items():
-                    j = dpts.index(pt)
-                    for (w, alpha, c) in wac_list:
-                        dwts[alpha][i][c][j] = w
-            for alpha in dwts:
-                mat[ells] += numpy.dot(dwts[alpha], dexpansion_values[alpha].T)
+            dexpansion_values = expansion.tabulate(pts, max_deriv_order)
+            for Q in Qs_to_ells:
+                ells = Qs_to_ells[Q]
+                cur_pts = Qs_to_pts[Q]
+                indices = list(map(pts.index, cur_pts))
+                wshape = (len(ells), *tshape, len(cur_pts))
+                dwts = {alpha: numpy.zeros(wshape, "d") for alpha in dexpansion_values if sum(alpha) > 0}
+                if Q is None:
+                    for i, k in enumerate(ells):
+                        ell = self.nodes[k]
+                        for pt, wac_list in ell.deriv_dict.items():
+                            j = cur_pts.index(pt)
+                            for (w, alpha, c) in wac_list:
+                                dwts[alpha][i][c][j] = w
+                else:
+                    for i, k in enumerate(ells):
+                        ell = self.nodes[k]
+                        for alpha in ell.weights:
+                            dwts[alpha][i][ell.comp][:] = ell.weights[alpha]
+                for alpha in dwts:
+                    wts = dwts[alpha]
+                    expansion_values = dexpansion_values[alpha].T
+                    mat[ells] += numpy.dot(wts, expansion_values[indices])
         return mat
 
     def get_indices(self, restriction_domain, take_closure=True):
diff --git a/FIAT/functional.py b/FIAT/functional.py
@@ -347,6 +347,8 @@ def __init__(self, ref_el, Q, f_at_qpts, *directions, comp=(), shp=(), nm=""):
 
         points = Q.get_points()
         weights = numpy.multiply(f_at_qpts, Q.get_weights())
+        self.weights = {alpha: weights*tau[alpha] for alpha in tau}
+
         dpt_dict = {tuple(pt): [(wt*tau[alpha], alpha, comp) for alpha in tau]
                     for pt, wt in zip(points, weights)}
 
diff --git a/FIAT/wuxu.py b/FIAT/wuxu.py
@@ -168,7 +168,6 @@ def __init__(self, ref_el, degree):
             Q = FacetQuadratureRule(ref_el, 1, e, Q_ref, avg=True)
             cur = len(nodes)
             nodes.append(IntegralMomentOfDerivative(ref_el, Q, f, n, n))
-
             entity_ids[1][e].extend(range(cur, len(nodes)))
 
         super().__init__(nodes, ref_el, entity_ids)