|
725 | 725 | " }\n", |
726 | 726 | " }\n", |
727 | 727 | " STOP(section0,timers)\n", |
728 | | - "}\n" |
| 728 | + "}\n", |
| 729 | + "\n" |
729 | 730 | ] |
730 | 731 | } |
731 | 732 | ], |
|
1192 | 1193 | "int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int z_size, const int x_size, const int y_size, struct profiler * timers)\n", |
1193 | 1194 | "{\n", |
1194 | 1195 | " float **restrict pr2_vec __attribute__ ((aligned (64)));\n", |
1195 | | - " posix_memalign((void**)(&pr2_vec),64,nthreads*sizeof(float*));\n", |
| 1196 | + " posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)(nthreads));\n", |
1196 | 1197 | " float *restrict r0_vec __attribute__ ((aligned (64)));\n", |
1197 | | - " posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n", |
| 1198 | + " posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n", |
1198 | 1199 | " #pragma omp parallel num_threads(nthreads)\n", |
1199 | 1200 | " {\n", |
1200 | 1201 | " const int tid = omp_get_thread_num();\n", |
1201 | | - " posix_memalign((void**)(&(pr2_vec[tid])),64,z_size*(y0_blk0_size + 4)*sizeof(float));\n", |
| 1202 | + " posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)(z_size)*(4 + (long)(y0_blk0_size)));\n", |
1202 | 1203 | " }\n", |
1203 | 1204 | "\n", |
1204 | 1205 | " float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n", |
|
1412 | 1413 | "name": "stdout", |
1413 | 1414 | "output_type": "stream", |
1414 | 1415 | "text": [ |
1415 | | - "posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n" |
| 1416 | + "posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n" |
1416 | 1417 | ] |
1417 | 1418 | } |
1418 | 1419 | ], |
|
1483 | 1484 | "int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_y, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int z_size, const int x_size, const int y_size, struct profiler * timers)\n", |
1484 | 1485 | "{\n", |
1485 | 1486 | " float **restrict pr2_vec __attribute__ ((aligned (64)));\n", |
1486 | | - " posix_memalign((void**)(&pr2_vec),64,nthreads*sizeof(float*));\n", |
| 1487 | + " posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)(nthreads));\n", |
1487 | 1488 | " float *restrict r0_vec __attribute__ ((aligned (64)));\n", |
1488 | | - " posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n", |
| 1489 | + " posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n", |
1489 | 1490 | " #pragma omp parallel num_threads(nthreads)\n", |
1490 | 1491 | " {\n", |
1491 | 1492 | " const int tid = omp_get_thread_num();\n", |
1492 | | - " posix_memalign((void**)(&(pr2_vec[tid])),64,z_size*(y_size + 4)*sizeof(float));\n", |
| 1493 | + " posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size)));\n", |
1493 | 1494 | " }\n", |
1494 | 1495 | "\n", |
1495 | 1496 | " float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n", |
|
1626 | 1627 | "int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_x, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x1_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y1_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int x_size, const int y_size, const int z_size, struct profiler * timers)\n", |
1627 | 1628 | "{\n", |
1628 | 1629 | " float *restrict r0_vec __attribute__ ((aligned (64)));\n", |
1629 | | - " posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n", |
| 1630 | + " posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)(z_size)*(long)(y_size)*(long)(x_size));\n", |
1630 | 1631 | " float *restrict r3_vec __attribute__ ((aligned (64)));\n", |
1631 | | - " posix_memalign((void**)(&r3_vec),64,z_size*(x_size + 4)*(y_size + 4)*sizeof(float));\n", |
| 1632 | + " posix_memalign((void**)(&r3_vec),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size))*(4 + (long)(x_size)));\n", |
1632 | 1633 | " float *restrict r4_vec __attribute__ ((aligned (64)));\n", |
1633 | | - " posix_memalign((void**)(&r4_vec),64,z_size*(x_size + 4)*(y_size + 4)*sizeof(float));\n", |
| 1634 | + " posix_memalign((void**)(&r4_vec),64,sizeof(float)*(long)(z_size)*(4 + (long)(y_size))*(4 + (long)(x_size)));\n", |
1634 | 1635 | "\n", |
1635 | 1636 | " float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n", |
1636 | 1637 | " float (*restrict r0)[y_size][z_size] __attribute__ ((aligned (64))) = (float (*)[y_size][z_size]) r0_vec;\n", |
|
0 commit comments