Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 43 additions & 6 deletions .github/workflows/builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- run: make
- run: make check
- run: make install
- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v4
if: failure()
with:
name: generic
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
working-directory: out
- run: make check
working-directory: out
- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v4
if: failure()
with:
name: out-of-tree
Expand Down Expand Up @@ -85,7 +85,7 @@ jobs:
HWLOC_CPUID_PATH: ${{ github.workspace }}/cpuid
VALGRIND_SUPPRESSIONS_FILES: ${{ github.workspace }}/.valgrind.supp
OMP_NUM_THREADS: 1
- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v4
if: failure()
with:
name: valgrind
Expand Down Expand Up @@ -114,7 +114,7 @@ jobs:
HWLOC_CPUID_PATH: ${{ github.workspace }}/cpuid
VALGRIND_SUPPRESSIONS_FILES: ${{ github.workspace }}/.valgrind.supp
OMP_NUM_THREADS: 1
- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v4
if: failure()
with:
name: valgrind
Expand Down Expand Up @@ -147,7 +147,7 @@ jobs:
mkdir build
./configure --prefix=`pwd`/build --without-rocm
- run: make distcheck
- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v4
if: failure()
with:
name: distcheck
Expand All @@ -173,7 +173,7 @@ jobs:
- run: make CFLAGS=-std=c99
- run: make check
- run: make install
- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v4
if: failure()
with:
name: rocm
Expand All @@ -182,3 +182,40 @@ jobs:
tests/*.log
benchmarks/*.log
doc/tutorials/*.log
nvhpc:
env:
CFLAGS: "-std=c99"
runs-on: ubuntu-latest
defaults:
run:
shell: bash
container:
image: nvcr.io/nvidia/nvhpc:24.7-devel-cuda12.5-ubuntu22.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 0
- run: apt-get update
- run: apt-get install -y make autoconf automake libtool pkgconf libhwloc-dev
- run: |
echo "0.8.0" > .tarball-version
- name: build
run: |
source /usr/share/lmod/6.6/init/bash
module load nvhpc
./autogen.sh
mkdir build
./configure --prefix=`pwd`/build --with-cuda CUDA_HOME=$NVHPC_ROOT/cuda
make
make check
make install
- uses: actions/upload-artifact@v4
if: failure()
with:
name: nvhpc
path: |
config.log
tests/*.log
benchmarks/*.log
doc/tutorials/*.log
2 changes: 1 addition & 1 deletion .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/$INSTALL_PATH/lib:$LD_LIBRARY_PATH
cd xsbench/openmp-threading
test -n "$(./XSBench -s small | grep 'Verification checksum' | grep -i valid)"
- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v4
if: failure()
with:
name: generic
Expand Down
9 changes: 6 additions & 3 deletions benchmarks/blas/l1_kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,18 @@ double ddot(size_t n, double *a, double *b, double *c, double scalar)
(void)*c;
(void)scalar;
size_t i;
long double dot = 0.0;
/* should be a long double for overflow checks, but some compilers (nvc)
* don't support reduce on long double in 2024.
*/
double dot = 0.0;

#pragma omp parallel for reduction(+ : dot)
for (i = 0; i < n; i++) {
long double temp;
double temp;
temp = a[i] * b[i];
dot += temp;
}
return (double)dot;
return dot;
}

double dnrm2(size_t n, double *a, double *b, double *c, double scalar)
Expand Down
4 changes: 4 additions & 0 deletions doc/tutorials/area/1_custom_interleave_area.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ test_custom_area(const size_t size)
int
main(void)
{
/* impossible to do those check in a CI environment consistently */
if (!strcmp(getenv("CI"), "true"))
exit(77);

const size_t size = (2 << 16); // 16 pages

test_custom_area(size);
Expand Down
4 changes: 4 additions & 0 deletions doc/tutorials/area/2_aml_area_hwloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ int max_bandwidth_area()

int main(int argc, char **argv)
{
/* impossible to do those check in a CI environment consistently */
if (!strcmp(getenv("CI"), "true"))
exit(77);

if (aml_init(&argc, &argv) != 0)
return 1;

Expand Down
54 changes: 52 additions & 2 deletions include/aml/higher/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ struct aml_allocator {
struct aml_allocator_ops *ops;
};

/** Allocator internal's chunk information */
struct aml_allocator_chunk {
/** memory allocator for the user (read-only) */
void *ptr;
/** size of the chunk, greater or equals to the size requested by the
* user (read-only) */
size_t size;
/** an opaque object that the user can attach to the chunk (read/write)
*/
void *user_data;
};

/**
* Allocator methods.
* The design pattern of aml allocator is design to meet simplicity and
Expand Down Expand Up @@ -104,6 +116,20 @@ struct aml_allocator_ops {
* @return AML_SUCCESS on success or an appropriate aml error code.
*/
int (*free)(struct aml_allocator_data *data, void *ptr);

/**
* Optional method.
* @see aml_allocator_alloc_chunk()
*/
struct aml_allocator_chunk *(*alloc_chunk)(
struct aml_allocator_data *data, size_t size);

/**
* Optional method.
* @see aml_allocator_free_chunk()
*/
int (*free_chunk)(struct aml_allocator_data *data,
struct aml_allocator_chunk *chunk);
};

/**
Expand All @@ -118,14 +144,38 @@ struct aml_allocator_ops {
void *aml_alloc(struct aml_allocator *allocator, size_t size);

/**
* Release memory associated with a pointer obtained with an
* allocator.
* Release memory associated with a pointer obtained from a call to
* aml_alloc().
*
* @param[in, out] allocator: The allocator used to allocate pointer.
* @param[in, out] ptr: The pointer allocated with the same allocator.
* @return AML_SUCCESS on success or an appropriate aml error code.
*/
int aml_free(struct aml_allocator *allocator, void *ptr);

/**
* Allocate memory with an allocator.
*
* @param[in, out] allocator: The allocator to use.
* @param[in] size: The minimum allocation size.
* @return NULL on error with aml_errno set to the appropriate error
* code.
* @return The chunk of memory allocated.
*/
struct aml_allocator_chunk *
aml_allocator_alloc_chunk(struct aml_allocator *allocator, size_t size);

/**
* Release memory associated with the chunk obtained from a call to
* aml_allocator_alloc_chunk().
*
* @param[in, out] allocator: The allocator used to allocate pointer.
* @param[in, out] ptr: The chunk allocated with the same allocator.
* @return AML_SUCCESS on success or an appropriate aml error code.
*/
int aml_allocator_free_chunk(struct aml_allocator *allocator,
struct aml_allocator_chunk *chunk);

/**
* @}
**/
Expand Down
9 changes: 4 additions & 5 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,10 @@ endif
# Cuda sources

if HAVE_CUDA
libcuda_la_SOURCES=area/cuda.c dma/cuda.c
noinst_LTLIBRARIES+=libcuda.la
libcuda_la_CPPFLAGS=$(AM_CPPFLAGS) $(CUDA_CFLAGS)
libcuda_la_LDFLAGS=$(AM_LDFLAGS) $(CUDA_LIBS)
libaml_la_LIBADD=libcuda.la
AM_CPPFLAGS += $(CUDA_CFLAGS)
AM_LDFLAGS += $(CUDA_LIBS)
libaml_la_SOURCES+=area/cuda.c
libaml_la_SOURCES+=dma/cuda.c
endif

#############################################
Expand Down
41 changes: 35 additions & 6 deletions src/allocator/allocator.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,58 @@
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/

#include <assert.h>

#include "aml.h"

#include "aml/higher/allocator.h"

void *aml_alloc(struct aml_allocator *allocator, size_t size)
{
if (allocator == NULL || allocator->data == NULL ||
allocator->ops == NULL || allocator->ops->alloc == NULL) {
allocator->ops == NULL) {
aml_errno = AML_EINVAL;
return NULL;
}

assert(allocator->ops->alloc);
return allocator->ops->alloc(allocator->data, size);
}

int aml_free(struct aml_allocator *allocator, void *ptr)
struct aml_allocator_chunk *
aml_allocator_alloc_chunk(struct aml_allocator *allocator, size_t size)
{
if (allocator == NULL || allocator->data == NULL ||
allocator->ops == NULL || allocator->ops->free == NULL)
return -AML_EINVAL;
allocator->ops == NULL) {
aml_errno = AML_EINVAL;
return NULL;
}
if (allocator->ops->alloc_chunk == NULL) {
aml_errno = AML_ENOTSUP;
return NULL;
}
return allocator->ops->alloc_chunk(allocator->data, size);
}

int aml_free(struct aml_allocator *allocator, void *ptr)
{
if (ptr == NULL)
return AML_SUCCESS;

if (allocator == NULL || allocator->data == NULL ||
allocator->ops == NULL)
return -AML_EINVAL;
assert(allocator->ops->free);
return allocator->ops->free(allocator->data, ptr);
}

int aml_allocator_free_chunk(struct aml_allocator *allocator,
struct aml_allocator_chunk *chunk)
{
if (chunk == NULL)
return AML_SUCCESS;
if (allocator == NULL || allocator->data == NULL ||
allocator->ops == NULL)
return -AML_EINVAL;
if (allocator->ops->free_chunk == NULL)
return -AML_ENOTSUP;
return allocator->ops->free_chunk(allocator->data, chunk);
}
3 changes: 2 additions & 1 deletion src/allocator/area.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
struct aml_allocator_ops aml_allocator_area_ops = {
.alloc = aml_allocator_area_alloc,
.free = aml_allocator_area_free,
};
.alloc_chunk = NULL,
.free_chunk = NULL};

struct aml_allocator_area_chunk {
void *ptr;
Expand Down
Loading