|
725 | 725 | " }\n", |
726 | 726 | " }\n", |
727 | 727 | " STOP(section0,timers)\n", |
728 | | - "}\n", |
729 | | - "\n" |
| 728 | + "}\n" |
730 | 729 | ] |
731 | 730 | } |
732 | 731 | ], |
|
1193 | 1192 | "int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int z_size, const int x_size, const int y_size, struct profiler * timers)\n", |
1194 | 1193 | "{\n", |
1195 | 1194 | " float **restrict pr2_vec __attribute__ ((aligned (64)));\n", |
1196 | | - " posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)(nthreads));\n", |
| 1195 | + " posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)nthreads);\n", |
1197 | 1196 | " float *restrict r0_vec __attribute__ ((aligned (64)));\n", |
1198 | | - " posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n", |
| 1197 | + " posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n", |
1199 | 1198 | " #pragma omp parallel num_threads(nthreads)\n", |
1200 | 1199 | " {\n", |
1201 | 1200 | " const int tid = omp_get_thread_num();\n", |
1202 | | - " posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)(z_size)*(4 + (long)(y0_blk0_size)));\n", |
| 1201 | + " posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)z_size*(4 + (long)y0_blk0_size));\n", |
1203 | 1202 | " }\n", |
1204 | 1203 | "\n", |
1205 | 1204 | " float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n", |
|
1413 | 1412 | "name": "stdout", |
1414 | 1413 | "output_type": "stream", |
1415 | 1414 | "text": [ |
1416 | | - "posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n" |
| 1415 | + "posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n" |
1417 | 1416 | ] |
1418 | 1417 | } |
1419 | 1418 | ], |
|
1484 | 1483 | "int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_y, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int z_size, const int x_size, const int y_size, struct profiler * timers)\n", |
1485 | 1484 | "{\n", |
1486 | 1485 | " float **restrict pr2_vec __attribute__ ((aligned (64)));\n", |
1487 | | - " posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)(nthreads));\n", |
| 1486 | + " posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)nthreads);\n", |
1488 | 1487 | " float *restrict r0_vec __attribute__ ((aligned (64)));\n", |
1489 | | - " posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n", |
| 1488 | + " posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n", |
1490 | 1489 | " #pragma omp parallel num_threads(nthreads)\n", |
1491 | 1490 | " {\n", |
1492 | 1491 | " const int tid = omp_get_thread_num();\n", |
1493 | | - " posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size)));\n", |
| 1492 | + " posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)z_size*(4 + (long)y_size));\n", |
1494 | 1493 | " }\n", |
1495 | 1494 | "\n", |
1496 | 1495 | " float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n", |
|
1627 | 1626 | "int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_x, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x1_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y1_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int x_size, const int y_size, const int z_size, struct profiler * timers)\n", |
1628 | 1627 | "{\n", |
1629 | 1628 | " float *restrict r0_vec __attribute__ ((aligned (64)));\n", |
1630 | | - " posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n", |
| 1629 | + " posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n", |
1631 | 1630 | " float *restrict r3_vec __attribute__ ((aligned (64)));\n", |
1632 | | - " posix_memalign((void**)(&r3_vec),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size))*(4 + (long)(x_size)));\n", |
| 1631 | + " posix_memalign((void**)(&r3_vec),64,sizeof(float)*(long)z_size*(4 + (long)y_size)*(4 + (long)x_size));\n", |
1633 | 1632 | " float *restrict r4_vec __attribute__ ((aligned (64)));\n", |
1634 | | - " posix_memalign((void**)(&r4_vec),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size))*(4 + (long)(x_size)));\n", |
| 1633 | + " posix_memalign((void**)(&r4_vec),64,sizeof(float)*(long)z_size*(4 + (long)y_size)*(4 + (long)x_size));\n", |
1635 | 1634 | "\n", |
1636 | 1635 | " float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n", |
1637 | 1636 | " float (*restrict r0)[y_size][z_size] __attribute__ ((aligned (64))) = (float (*)[y_size][z_size]) r0_vec;\n", |
|
0 commit comments