Skip to content

Commit 9fc759c

Browse files
Merge pull request #308 from NCAS-CMS/interface_type
Switch from `storage_type` to `interface_type`
2 parents f213d66 + 57195a0 commit 9fc759c

16 files changed

+126
-99
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ There are some (relatively obsolete) documents from our exploration of zarr inte
6969
## Storage types
7070

7171
PyActiveStorage is designed to interact with various storage backends.
72-
The storage backend is automatically detected, but can still be specified using the `storage_type` argument to the `Active` constructor.
72+
The storage backend is automatically detected, but can still be specified using the `interface_type` argument to the `Active` constructor.
7373
There are two main integration points for a storage backend:
7474

7575
#. Load netCDF metadata
@@ -78,7 +78,7 @@ There are two main integration points for a storage backend:
7878
### Local file
7979

8080
The default storage backend is a local file.
81-
To use a local file, use a `storage_type` of `None`, which is its default value.
81+
To use a local file, use a `interface_type` of `None`, which is its default value.
8282
netCDF metadata is loaded using the [netCDF4](https://pypi.org/project/netCDF4/) library.
8383
The chunk reductions are implemented in `activestorage.storage` using NumPy.
8484

@@ -87,7 +87,7 @@ The chunk reductions are implemented in `activestorage.storage` using NumPy.
8787
We now have support for Active runs with netCDF4 files on S3, from [PR 89](https://github.com/NCAS-CMS/PyActiveStorage/pull/89).
8888
To achieve this we integrate with [Reductionist](https://github.com/stackhpc/reductionist-rs), an S3 Active Storage Server.
8989
Reductionist is typically deployed "near" to an S3-compatible object store and provides an API to perform numerical reductions on object data.
90-
To use Reductionist, use a `storage_type` of `s3`.
90+
To use Reductionist, use a `interface_type` of `s3`.
9191

9292
To load metadata, netCDF files are opened using `s3fs`, with `h5netcdf` used to put the open file (which is nothing more than a memory view of the netCDF file) into an hdf5/netCDF-like object format.
9393
Chunk reductions are implemented in `activestorage.reductionist`, with each operation resulting in an API request to the Reductionist server.

activestorage/active.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from activestorage.storage import reduce_chunk, reduce_opens3_chunk
2121

2222

23-
def return_storage_type(uri):
23+
def return_interface_type(uri):
2424
"""
2525
Extract the gateway-protocol to infer what type of storage
2626
"""
@@ -187,7 +187,7 @@ def __init__(self,
187187
dataset: Optional[str | Path | object],
188188
ncvar: str = None,
189189
axis: tuple = None,
190-
storage_type: str = None,
190+
interface_type: str = None,
191191
max_threads: int = 100,
192192
storage_options: dict = None,
193193
active_storage_url: str = None) -> None:
@@ -218,9 +218,9 @@ def __init__(self,
218218
self.ds = dataset
219219
self.uri = dataset
220220

221-
# determine the storage_type
221+
# determine the interface_type
222222
# based on what we have available
223-
if not storage_type:
223+
if not interface_type:
224224
if not input_variable:
225225
check_uri = self.uri
226226
else:
@@ -236,20 +236,20 @@ def __init__(self,
236236
else:
237237
check_uri = os.path.join(base_url,
238238
self.ds.id._filename)
239-
storage_type = return_storage_type(check_uri)
239+
interface_type = return_interface_type(check_uri)
240240

241-
# still allow for a passable storage_type
241+
# still allow for a passable interface_type
242242
# for special cases eg "special-POSIX" ie DDN
243-
if not storage_type and storage_options is not None:
244-
storage_type = urllib.parse.urlparse(dataset).scheme
245-
self.storage_type = storage_type
243+
if not interface_type and storage_options is not None:
244+
interface_type = urllib.parse.urlparse(dataset).scheme
245+
self.interface_type = interface_type
246246

247247
# set correct filename attr
248-
if input_variable and not self.storage_type:
248+
if input_variable and not self.interface_type:
249249
self.filename = self.ds
250-
elif input_variable and self.storage_type == "s3":
250+
elif input_variable and self.interface_type == "s3":
251251
self.filename = self.ds.id._filename
252-
elif input_variable and self.storage_type == "https":
252+
elif input_variable and self.interface_type == "https":
253253
self.filename = self.ds
254254

255255
# get storage_options
@@ -258,7 +258,7 @@ def __init__(self,
258258

259259
# basic check on file
260260
if not input_variable:
261-
if not os.path.isfile(self.uri) and not self.storage_type:
261+
if not os.path.isfile(self.uri) and not self.interface_type:
262262
raise ValueError(
263263
f"Must use existing file for uri. {self.uri} not found")
264264

@@ -294,11 +294,11 @@ def __load_nc_file(self):
294294
and `_filename` attribute.
295295
"""
296296
ncvar = self.ncvar
297-
if self.storage_type is None:
297+
if self.interface_type is None:
298298
nc = pyfive.File(self.uri)
299-
elif self.storage_type == "s3":
299+
elif self.interface_type == "s3":
300300
nc = load_from_s3(self.uri, self.storage_options)
301-
elif self.storage_type == "https":
301+
elif self.interface_type == "https":
302302
nc = load_from_https(self.uri, self.storage_options)
303303
self.filename = self.uri
304304
self.ds = nc[ncvar]
@@ -512,7 +512,7 @@ def _from_storage(self, ds, indexer, chunks, out_shape, out_dtype,
512512
out = np.ma.empty(out_shape, dtype=out_dtype, order=ds._order)
513513

514514
# Create a shared session object.
515-
if self.storage_type == "s3" and self._version == 2:
515+
if self.interface_type == "s3" and self._version == 2:
516516
if self.storage_options is not None:
517517
key, secret = None, None
518518
if self.storage_options.get("anon", None) is True:
@@ -533,7 +533,7 @@ def _from_storage(self, ds, indexer, chunks, out_shape, out_dtype,
533533
session = reductionist.get_session(S3_ACCESS_KEY,
534534
S3_SECRET_KEY,
535535
S3_ACTIVE_STORAGE_CACERT)
536-
elif self.storage_type == "https" and self._version == 2:
536+
elif self.interface_type == "https" and self._version == 2:
537537
username, password = None, None
538538
if self.storage_options is not None:
539539
username = self.storage_options.get("username", None)
@@ -660,7 +660,7 @@ def _process_chunk(self,
660660
# Axes over which to apply a reduction
661661
axis = self._axis
662662

663-
if self.storage_type == 's3' and self._version == 1:
663+
if self.interface_type == 's3' and self._version == 1:
664664
tmp, count = reduce_opens3_chunk(ds._fh,
665665
offset,
666666
size,
@@ -674,7 +674,7 @@ def _process_chunk(self,
674674
axis=axis,
675675
method=self.method)
676676

677-
elif self.storage_type == "s3" and self._version == 2:
677+
elif self.interface_type == "s3" and self._version == 2:
678678
# S3: pass in pre-configured storage options (credentials)
679679
parsed_url = urllib.parse.urlparse(self.filename)
680680
bucket = parsed_url.netloc
@@ -723,7 +723,7 @@ def _process_chunk(self,
723723
chunk_selection,
724724
axis,
725725
operation=self._method)
726-
elif self.storage_type == "https" and self._version == 2:
726+
elif self.interface_type == "https" and self._version == 2:
727727
tmp, count = reductionist.reduce_chunk(session,
728728
self.active_storage_url,
729729
f"{self.uri}",
@@ -738,9 +738,9 @@ def _process_chunk(self,
738738
chunk_selection,
739739
axis,
740740
operation=self._method,
741-
storage_type="https")
741+
interface_type="https")
742742

743-
elif self.storage_type == 'ActivePosix' and self.version == 2:
743+
elif self.interface_type == 'ActivePosix' and self.version == 2:
744744
# This is where the DDN Fuse and Infinia wrappers go
745745
raise NotImplementedError
746746
else:

activestorage/reductionist.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def reduce_chunk(session,
4848
chunk_selection,
4949
axis,
5050
operation,
51-
storage_type=None):
51+
interface_type=None):
5252
"""Perform a reduction on a chunk using Reductionist.
5353
5454
:param server: Reductionist server URL
@@ -70,7 +70,7 @@ def reduce_chunk(session,
7070
obtained or operated upon.
7171
:param axis: tuple of the axes to be reduced (non-negative integers)
7272
:param operation: name of operation to perform
73-
:param storage_type: optional testing flag to allow HTTPS reduction
73+
:param interface_type: optional testing flag to allow HTTPS reduction
7474
:returns: the reduced data as a numpy array or scalar
7575
:raises ReductionistError: if the request to Reductionist fails
7676
"""
@@ -86,7 +86,7 @@ def reduce_chunk(session,
8686
order,
8787
chunk_selection,
8888
axis,
89-
storage_type=storage_type)
89+
interface_type=interface_type)
9090
if DEBUG:
9191
print(f"Reductionist request data dictionary: {request_data}")
9292
api_operation = "sum" if operation == "mean" else operation or "select"
@@ -184,10 +184,10 @@ def build_request_data(url: str,
184184
order,
185185
selection,
186186
axis,
187-
storage_type=None) -> dict:
187+
interface_type=None) -> dict:
188188
"""Build request data for Reductionist API."""
189189
request_data = {
190-
'interface_type': storage_type if storage_type else "s3",
190+
'interface_type': interface_type if interface_type else "s3",
191191
'url': url,
192192
'dtype': dtype.name,
193193
'byte_order': encode_byte_order(dtype),

tests/s3_exploratory/test_s3_performance.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def test_Active_s3_v0():
115115
"""
116116
# run Active on s3 file
117117
s3_file = "s3://pyactivestorage/s3_test_bizarre_large.nc"
118-
active = Active(s3_file, "data", storage_type="s3")
118+
active = Active(s3_file, "data", interface_type="s3")
119119
active._version = 0
120120
active.components = True
121121
result1 = active[0:2, 4:6, 7:9]
@@ -127,7 +127,7 @@ def test_Active_s3_v1():
127127
"""
128128
# run Active on s3 file
129129
s3_file = "s3://pyactivestorage/s3_test_bizarre_large.nc"
130-
active = Active(s3_file, "data", storage_type="s3")
130+
active = Active(s3_file, "data", interface_type="s3")
131131
active._version = 1
132132
active.method = "mean"
133133
active.components = True
@@ -140,7 +140,7 @@ def test_Active_s3_v2():
140140
"""
141141
# run Active on s3 file
142142
s3_file = "s3://pyactivestorage/s3_test_bizarre_large.nc"
143-
active = Active(s3_file, "data", storage_type="s3")
143+
active = Active(s3_file, "data", interface_type="s3")
144144
active._version = 2
145145
active.method = "mean"
146146
active.components = True

tests/s3_exploratory/test_s3_reduction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_Active():
6868
print("S3 file uri", s3_testfile_uri)
6969

7070
# run Active on s3 file
71-
active = Active(s3_testfile_uri, "data", storage_type="s3")
71+
active = Active(s3_testfile_uri, "data", interface_type="s3")
7272
active.method = "mean"
7373
result1 = active[0:2, 4:6, 7:9]
7474
print(result1)
@@ -116,7 +116,7 @@ def test_with_valid_netCDF_file(test_data_path):
116116
print("S3 file uri", s3_testfile_uri)
117117

118118
# run Active on s3 file
119-
active = Active(s3_testfile_uri, "TREFHT", storage_type="s3")
119+
active = Active(s3_testfile_uri, "TREFHT", interface_type="s3")
120120
active._version = 2
121121
active.method = "mean"
122122
active.components = True

0 commit comments

Comments
 (0)