Skip to content

Commit daa6e41

Browse files
authored
Axpyv axpyf kernel optimizations (#903)
Details: - Implemented new AVX2 AXPYV kernels for complex (c) and double complex (z) data types. - Added a new AXPYF kernel with fuse_factor = 4 and iter_unroll = 4 to improve performance. - Updated kernel registration to include the new implementations. - Implemented new AVX2 AXPYV kernels for complex (c) and double complex (z) data types. - Added a new AXPYF kernel with fuse_factor = 4 and iter_unroll = 4 to improve performance. - Updated kernel registration to include the new implementations.
1 parent 5d9332e commit daa6e41

File tree

12 files changed

+3796
-1098
lines changed

12 files changed

+3796
-1098
lines changed

config/haswell/bli_cntx_init_haswell.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ void bli_cntx_init_haswell( cntx_t* cntx )
8080
// axpyf
8181
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8,
8282
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8,
83+
BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_zen_int_5,
84+
BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_zen_int_5,
8385
// dotxf
8486
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8,
8587
BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8,
@@ -93,8 +95,10 @@ void bli_cntx_init_haswell( cntx_t* cntx )
9395
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int,
9496
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int,
9597
#else
96-
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10,
97-
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10,
98+
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int_10,
99+
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int_10,
100+
BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_zen_int_5,
101+
BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_zen_int_5,
98102
#endif
99103
// dotv
100104
BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int,
@@ -200,7 +204,7 @@ void bli_cntx_init_haswell( cntx_t* cntx )
200204
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
201205
#endif
202206
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
203-
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 8, 8 );
207+
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 5, 5 );
204208
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, 8, 8 );
205209

206210
// -------------------------------------------------------------------------

config/knl/bli_cntx_init_knl.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,10 @@ void bli_cntx_init_knl( cntx_t* cntx )
7474
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int,
7575
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int,
7676
#else
77-
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10,
78-
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10,
77+
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int_10,
78+
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int_10,
79+
BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_zen_int_5,
80+
BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_zen_int_5,
7981
#endif
8082

8183
// dotv

config/old/haswellbb/bli_cntx_init_haswell.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,10 @@ void bli_cntx_init_haswell( cntx_t* cntx )
160160
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int,
161161
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int,
162162
#else
163-
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10,
164-
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10,
163+
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int_10,
164+
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int_10,
165+
BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_zen_int_5,
166+
BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_zen_int_5,
165167
#endif
166168
// dotv
167169
BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int,

config/skx/bli_cntx_init_skx.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ void bli_cntx_init_skx( cntx_t* cntx )
5555
// axpyf
5656
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8,
5757
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8,
58+
BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_zen_int_5,
59+
BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_zen_int_5,
5860

5961
// dotxf
6062
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8,
@@ -71,8 +73,10 @@ void bli_cntx_init_skx( cntx_t* cntx )
7173
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int,
7274
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int,
7375
#else
74-
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10,
75-
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10,
76+
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int_10,
77+
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int_10,
78+
BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_zen_int_5,
79+
BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_zen_int_5,
7680
#endif
7781

7882
// dotv
@@ -115,7 +119,7 @@ void bli_cntx_init_skx( cntx_t* cntx )
115119
bli_blksz_init ( &blkszs[ BLIS_KC ], 384, 256, -1, -1,
116120
480, 320, -1, -1 );
117121
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3752, -1, -1 );
118-
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 );
122+
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 5, 5 );
119123
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
120124

121125
// Update the context with the current architecture's register and cache

config/zen/bli_cntx_init_zen.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ void bli_cntx_init_zen( cntx_t* cntx )
121121
// axpyf
122122
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8,
123123
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8,
124+
BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_zen_int_5,
125+
BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_zen_int_5,
124126

125127
// dotxf
126128
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8,
@@ -131,8 +133,10 @@ void bli_cntx_init_zen( cntx_t* cntx )
131133
BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int,
132134

133135
// axpyv
134-
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10,
135-
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10,
136+
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int_10,
137+
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int_10,
138+
BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_zen_int_5,
139+
BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_zen_int_5,
136140

137141
// copyv
138142
BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_zen_int,
@@ -254,7 +258,7 @@ void bli_cntx_init_zen( cntx_t* cntx )
254258
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
255259
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 );
256260
#endif
257-
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 );
261+
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 5, 5 );
258262
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
259263

260264
// Initialize sup thresholds with architecture-appropriate values.

config/zen2/bli_cntx_init_zen2.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ void bli_cntx_init_zen2( cntx_t* cntx )
118118
// axpyf
119119
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_5,
120120
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_5,
121+
BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_zen_int_5,
122+
BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_zen_int_5,
121123

122124
// dotxf
123125
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8,
@@ -128,8 +130,10 @@ void bli_cntx_init_zen2( cntx_t* cntx )
128130
BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int,
129131

130132
// axpyv
131-
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10,
132-
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10,
133+
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int_10,
134+
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int_10,
135+
BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_zen_int_5,
136+
BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_zen_int_5,
133137

134138
// dotv
135139
BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int10,
@@ -213,7 +217,7 @@ void bli_cntx_init_zen2( cntx_t* cntx )
213217
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 );
214218
#endif
215219

216-
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 );
220+
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, 5, 5 );
217221
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
218222

219223
// Initialize sup thresholds with architecture-appropriate values.

config/zen3/bli_cntx_init_zen3.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ void bli_cntx_init_zen3( cntx_t* cntx )
9494
// axpyf
9595
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_5,
9696
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_5,
97+
BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_zen_int_5,
98+
BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_zen_int_5,
9799

98100
// dotxf
99101
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8,
@@ -104,8 +106,10 @@ void bli_cntx_init_zen3( cntx_t* cntx )
104106
BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int,
105107

106108
// axpyv
107-
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10,
108-
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10,
109+
BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int_10,
110+
BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int_10,
111+
BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_zen_int_5,
112+
BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_zen_int_5,
109113

110114
// dotv
111115
BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int10,
@@ -201,7 +205,7 @@ void bli_cntx_init_zen3( cntx_t* cntx )
201205
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 566 );
202206
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 256 );
203207

204-
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 );
208+
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, 5, 5 );
205209
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
206210

207211
// Initialize sup thresholds with architecture-appropriate values.

0 commit comments

Comments
 (0)