Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 1 addition & 47 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,52 +135,6 @@ def create(
Lifecycle:
Maturing.
"""
index_column_schema = []
index_column_data = {}
ndim = len(shape)

for dim_idx, dim_shape in enumerate(shape):
dim_name = f"soma_dim_{dim_idx}"

pa_field = pa.field(dim_name, pa.int64())
index_column_schema.append(pa_field)

# Here is our Arrow data API for communicating schema info between
# Python/R and C++ libtiledbsoma:
#
# [0] core max domain lo
# [1] core max domain hi
# [2] core extent parameter
# If present, these next two signal to use the current-domain feature:
# [3] core current domain lo
# [4] core current domain hi

if dim_shape is None:
raise ValueError("DenseNDArray shape slots must be numeric")

dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
# The user specifies current domain -- this is the max domain
# which is taken from the max ranges for the dim datatype.
# We pass None here to detect those.
None,
ndim,
TileDBCreateOptions.from_platform_config(platform_config),
)

if dim_shape == 0:
raise ValueError("DenseNDArray shape slots must be at least 1")

index_column_data[pa_field.name] = [
0,
dim_capacity - 1,
dim_extent,
0,
dim_shape - 1,
]

index_column_info = pa.RecordBatch.from_pydict(index_column_data, schema=pa.schema(index_column_schema))

carrow_type = pyarrow_to_carrow_type(type)
plt_cfg = build_clib_platform_config(platform_config)
context, tiledb_timestamp = _update_context_and_timestamp(context, tiledb_timestamp)
Expand All @@ -189,7 +143,7 @@ def create(
clib.SOMADenseNDArray.create(
uri,
format=carrow_type,
index_column_info=index_column_info,
shape=shape,
ctx=context._handle,
platform_config=plt_cfg,
timestamp=(0, timestamp_ms),
Expand Down
52 changes: 3 additions & 49 deletions apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,58 +171,12 @@ def create(
# about. It's resizeable (up to max_domain anyway), reads and writes are bounds-checked
# against it, etc.

index_column_schema = []
index_column_data = {}

if any(col_size is None for col_size in shape):
new_shape = tuple(1 if col_size is None else col_size for col_size in shape)
if None in shape:
warnings.warn(
f"Using ``None`` in the shape is deprecated. Updating shape={shape} to shape={new_shape}.",
f"Using ``None`` in the shape is deprecated. Updating shape={shape} to shape={tuple(1 if col_size is None else col_size for col_size in shape)}.",
DeprecationWarning,
stacklevel=2,
)
shape = new_shape

for dim_idx, dim_shape in enumerate(shape):
dim_name = f"soma_dim_{dim_idx}"

pa_field = pa.field(dim_name, pa.int64())
index_column_schema.append(pa_field)

# Here is our Arrow data API for communicating schema info between
# Python/R and C++ libtiledbsoma:
#
# [0] core max domain lo
# [1] core max domain hi
# [2] core extent parameter
# If present, these next two signal to use the current-domain feature:
# [3] core current domain lo
# [4] core current domain hi

dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
# The user specifies current domain -- this is the max domain
# which is taken from the max ranges for the dim datatype.
# We pass None here to detect those.
None,
len(shape),
TileDBCreateOptions.from_platform_config(platform_config),
)

if dim_shape == 0:
raise ValueError("SparseNDArray shape slots must be at least 1")
if dim_shape is None:
dim_shape = 1

index_column_data[pa_field.name] = [
0,
dim_capacity - 1,
dim_extent,
0,
dim_shape - 1,
]

index_column_info = pa.RecordBatch.from_pydict(index_column_data, schema=pa.schema(index_column_schema))

carrow_type = pyarrow_to_carrow_type(type)
plt_cfg = build_clib_platform_config(platform_config)
Expand All @@ -232,7 +186,7 @@ def create(
clib.SOMASparseNDArray.create(
uri,
format=carrow_type,
index_column_info=index_column_info,
shape=shape,
ctx=context._handle,
platform_config=plt_cfg,
timestamp=(0, timestamp_ms),
Expand Down
26 changes: 26 additions & 0 deletions apis/python/src/tiledbsoma/soma_dense_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,32 @@ void load_soma_dense_ndarray(py::module& m) {
"platform_config"_a,
"timestamp"_a = py::none())

.def_static(
"create",
[](std::string_view uri,
std::string format,
const std::vector<std::optional<int64_t>>& shape,
std::shared_ptr<SOMAContext> context,
PlatformConfig platform_config,
std::optional<std::pair<uint64_t, uint64_t>> timestamp) {
try {
SOMADenseNDArray::create(uri, format, shape, context, platform_config, timestamp);
} catch (const std::out_of_range& e) {
throw py::type_error(e.what());
} catch (const std::range_error& e) {
throw py::value_error(e.what());
} catch (const std::exception& e) {
TPY_ERROR_LOC(e.what());
}
},
"uri"_a,
py::kw_only(),
"format"_a,
"shape"_a,
"ctx"_a,
"platform_config"_a,
"timestamp"_a = py::none())

.def_static(
"open",
py::overload_cast<
Expand Down
26 changes: 26 additions & 0 deletions apis/python/src/tiledbsoma/soma_sparse_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,32 @@ void load_soma_sparse_ndarray(py::module& m) {
"platform_config"_a,
"timestamp"_a = py::none())

.def_static(
"create",
[](std::string_view uri,
std::string format,
const std::vector<std::optional<int64_t>>& shape,
std::shared_ptr<SOMAContext> context,
PlatformConfig platform_config,
std::optional<std::pair<uint64_t, uint64_t>> timestamp) {
try {
SOMASparseNDArray::create(uri, format, shape, context, platform_config, timestamp);
} catch (const std::out_of_range& e) {
throw py::type_error(e.what());
} catch (const std::range_error& e) {
throw py::value_error(e.what());
} catch (const std::exception& e) {
TPY_ERROR_LOC(e.what());
}
},
"uri"_a,
py::kw_only(),
"format"_a,
"shape"_a,
"ctx"_a,
"platform_config"_a,
"timestamp"_a = py::none())

.def_static(
"open",
py::overload_cast<
Expand Down
4 changes: 4 additions & 0 deletions apis/r/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ createSchemaFromArrow <- function(uri, nasp, nadimap, nadimsp, sparse, datatype,
invisible(.Call(`_tiledbsoma_createSchemaFromArrow`, uri, nasp, nadimap, nadimsp, sparse, datatype, pclst, ctxxp, tsvec))
}

createSchemaForNDArray <- function(uri, format, shape, soma_type, pclst, ctxxp, tsvec = NULL) {
invisible(.Call(`_tiledbsoma_createSchemaForNDArray`, uri, format, shape, soma_type, pclst, ctxxp, tsvec))
}

writeArrayFromArrow <- function(uri, naap, nasp, ctxxp, arraytype = "", config = NULL, tsvec = NULL) {
invisible(.Call(`_tiledbsoma_writeArrayFromArrow`, uri, naap, nasp, ctxxp, arraytype, config, tsvec))
}
Expand Down
25 changes: 4 additions & 21 deletions apis/r/R/SOMANDArrayBase.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,21 +56,6 @@ SOMANDArrayBase <- R6::R6Class(
# typed, queryable data structure.
tiledb_create_options <- TileDBCreateOptions$new(platform_config)

## we transfer to the arrow table via a pair of array and schema pointers
dnaap <- nanoarrow::nanoarrow_allocate_array()
dnasp <- nanoarrow::nanoarrow_allocate_schema()
arrow::as_record_batch(dom_ext_tbl)$export_to_c(dnaap, dnasp)

## we need a schema pointer to transfer the schema information
## so we first embed the (single column) 'type' into a schema and
## combine it with domain schema
schema <- arrow::unify_schemas(
arrow::schema(dom_ext_tbl),
arrow::schema(arrow::field("soma_data", type))
)
nasp <- nanoarrow::nanoarrow_allocate_schema()
schema$export_to_c(nasp)

## create array
# ctxptr <- self$tiledbsoma_ctx$context()
sparse <- if (inherits(self, "SOMASparseNDArray")) {
Expand All @@ -80,13 +65,11 @@ SOMANDArrayBase <- R6::R6Class(
} else {
stop("Unknown SOMA array type: ", self$class(), call. = FALSE)
}
createSchemaFromArrow(
createSchemaForNDArray(
uri = self$uri,
nasp = nasp,
nadimap = dnaap,
nadimsp = dnasp,
sparse = sparse,
datatype = if (sparse) "SOMASparseNDArray" else "SOMADenseNDArray",
format = as_nanoarrow_schema(arrow::schema(arrow::field("soma_data", type)))$children$soma_data$format,
shape = as.integer64(shape),
soma_type = if (sparse) "SOMASparseNDArray" else "SOMADenseNDArray",
pclst = tiledb_create_options$to_list(FALSE),
ctxxp = private$.context$handle,
tsvec = self$.tiledb_timestamp_range
Expand Down
17 changes: 17 additions & 0 deletions apis/r/src/RcppExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 59 additions & 0 deletions apis/r/src/arrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,65 @@ void createSchemaFromArrow(
}
}

// [[Rcpp::export]]
void createSchemaForNDArray(
const std::string& uri,
const std::string& format,
Rcpp::NumericVector shape,
const std::string& soma_type,
Rcpp::List pclst,
Rcpp::XPtr<somactx_wrap_t> ctxxp,
Rcpp::Nullable<Rcpp::DatetimeVector> tsvec = R_NilValue) {
tdbs::PlatformConfig pltcfg;
pltcfg.dataframe_dim_zstd_level = Rcpp::as<int>(pclst["dataframe_dim_zstd_level"]);
pltcfg.sparse_nd_array_dim_zstd_level = Rcpp::as<int>(pclst["sparse_nd_array_dim_zstd_level"]);
pltcfg.dense_nd_array_dim_zstd_level = Rcpp::as<int>(pclst["dense_nd_array_dim_zstd_level"]);
pltcfg.write_X_chunked = Rcpp::as<bool>(pclst["write_X_chunked"]);
pltcfg.goal_chunk_nnz = Rcpp::as<double>(pclst["goal_chunk_nnz"]);
pltcfg.capacity = Rcpp::as<double>(pclst["capacity"]);
pltcfg.offsets_filters = Rcpp::as<std::string>(pclst["offsets_filters"]);
pltcfg.validity_filters = Rcpp::as<std::string>(pclst["validity_filters"]);
pltcfg.allows_duplicates = Rcpp::as<bool>(pclst["allows_duplicates"]);
pltcfg.cell_order = Rcpp::as<std::string>(pclst["cell_order"]);
pltcfg.tile_order = Rcpp::as<std::string>(pclst["tile_order"]);
pltcfg.attrs = Rcpp::as<std::string>(pclst["attrs"]);
pltcfg.dims = Rcpp::as<std::string>(pclst["dims"]);

// shared pointer to SOMAContext from external pointer wrapper
std::shared_ptr<tdbs::SOMAContext> sctx = ctxxp->ctxptr;
// shared pointer to TileDB Context from SOMAContext
std::shared_ptr<tiledb::Context> ctx = sctx->tiledb_ctx();

// optional timestamp range
std::optional<tdbs::TimestampRange> tsrng = makeTimestampRange(tsvec);

bool exists = false;
if (soma_type == "SOMASparseNDArray") {
exists = tdbs::SOMASparseNDArray::exists(uri, sctx);
} else if (soma_type == "SOMADenseNDArray") {
exists = tdbs::SOMADenseNDArray::exists(uri, sctx);
} else {
Rcpp::stop(tfm::format("Error: Invalid SOMA type_argument '%s'", soma_type));
}

if (exists) {
Rcpp::stop(tfm::format("Error: Array '%s' already exists", uri));
}

std::vector<std::optional<int64_t>> cpp_shape;
for (size_t i = 0; i < shape.length(); ++i) {
cpp_shape.push_back(std::make_optional<int64_t>(*reinterpret_cast<int64_t*>(&shape[i])));
}

if (soma_type == "SOMASparseNDArray") {
tdbs::SOMASparseNDArray::create(uri, format, cpp_shape, sctx, pltcfg, tsrng);
} else if (soma_type == "SOMADenseNDArray") {
tdbs::SOMADenseNDArray::create(uri, format, cpp_shape, sctx, pltcfg, tsrng);
} else {
Rcpp::stop(tfm::format("Error: Invalid SOMA type_argument '%s'", soma_type));
}
}

// [[Rcpp::export]]
void writeArrayFromArrow(
const std::string& uri,
Expand Down
29 changes: 29 additions & 0 deletions libtiledbsoma/src/soma/soma_dense_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@
*/
#include "soma_dense_ndarray.h"
#include "../utils/arrow_adapter.h"
#include "soma_attribute.h"
#include "soma_coordinates.h"
#include "soma_dimension.h"

#include "common/arrow/utils.h"
#include "common/logging/impl/logger.h"

#include <limits>

namespace tiledbsoma {
using namespace tiledb;
Expand Down Expand Up @@ -70,6 +77,28 @@ void SOMADenseNDArray::create(
SOMAArray::create(ctx, uri, tiledb_schema, "SOMADenseNDArray", std::nullopt, timestamp);
}

void SOMADenseNDArray::create(
std::string_view uri,
std::string_view format,
std::span<const std::optional<int64_t>> shape,
std::shared_ptr<SOMAContext> ctx,
PlatformConfig platform_config,
std::optional<TimestampRange> timestamp) {
std::vector<int64_t> sanitized_shape;
std::transform(shape.begin(), shape.end(), std::back_inserter(sanitized_shape), [](const auto& dim_shape) {
if (!dim_shape) {
throw std::range_error("[SOMADenseNDArray][create] Shape slots must be numeric");
}

return dim_shape.value();
});

tiledb::ArraySchema schema = utils::create_nd_array_schema(
"SOMADenseNDArray", false, format, sanitized_shape, ctx->tiledb_ctx(), platform_config, timestamp);

SOMAArray::create(ctx, uri, schema, "SOMADenseNDArray", std::nullopt, timestamp);
}

std::unique_ptr<SOMADenseNDArray> SOMADenseNDArray::open(
std::string_view uri, OpenMode mode, std::shared_ptr<SOMAContext> ctx, std::optional<TimestampRange> timestamp) {
auto array = std::make_unique<SOMADenseNDArray>(mode, uri, ctx, timestamp);
Expand Down
Loading