Skip to content

Commit f68bfbf

Browse files
authored
More robust testing and operations on compressed lists (#8)
- Renamed `CompressedBiocFrameList` to `CompressedSplitBiocFrameList` since the current implementation can only handle dataframes with the same number and names of columns. - Correctly link some of the annoying typehint errors. - `to_list` and `unlist` now perform same operation as the R implementation. - `splitAsCompressedList` has a more abstract implementation if no generic is implemented. If it fails, returns an error.
1 parent be997ba commit f68bfbf

18 files changed

+469
-65
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## Version 0.3.0
4+
5+
- Renamed `CompressedBiocFrameList` to `CompressedSplitBiocFrameList` since the current implementation can only handle dataframes with the same number and names of columns.
6+
- Correctly link some of the annoying typehint errors.
7+
- `to_list` and `unlist` now perform same operation as the R implementation.
8+
- `splitAsCompressedList` has a more abstract implementation if no generic is implemented. If it fails, returns an error.
9+
310
## Version 0.2.0
411

512
- Major changes to the package; Switch to typed lists from the biocutils package.

docs/tutorial.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,4 +192,4 @@ class CompressedMyObjectList(CompressedList):
192192
# ...
193193
```
194194

195-
Check out the `CompressedBiocFrameList` for a complete example of this usecase.
195+
Check out the `CompressedSplitBiocFrameList` for a complete example of this usecase.

src/compressed_lists/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,5 @@
2222
from .bool_list import CompressedBooleanList
2323
from .float_list import CompressedFloatList
2424
from .numpy_list import CompressedNumpyList
25-
from .biocframe_list import CompressedBiocFrameList
25+
from .biocframe_list import CompressedSplitBiocFrameList
2626
from .split_generic import splitAsCompressedList

src/compressed_lists/base.py

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,13 @@ def __repr__(self) -> str:
144144
output = f"{type(self).__name__}(number_of_elements={len(self)}"
145145
output += ", unlist_data=" + ut.print_truncated_list(self._unlist_data)
146146
output += ", partitioning=" + self._partitioning.__repr__()
147-
output += ", element_type=" + self._element_type
147+
148+
_etype_name = "__unknown_class__"
149+
if isinstance(self._element_type, str):
150+
_etype_name = self._element_type
151+
elif hasattr(self._element_type, "__name__"):
152+
_etype_name = self._element_type.__name__
153+
output += ", element_type=" + _etype_name
148154

149155
if len(self._element_metadata) > 0:
150156
output += ", element_metadata=" + ut.print_truncated_dict(self._element_metadata)
@@ -162,7 +168,13 @@ def __str__(self) -> str:
162168
"""
163169
output = f"class: {type(self).__name__}\n"
164170

165-
output += f"number of elements: ({len(self)}) of type: {self._element_type}\n"
171+
_etype_name = "__unknown_class__"
172+
if isinstance(self._element_type, str):
173+
_etype_name = self._element_type
174+
elif hasattr(self._element_type, "__name__"):
175+
_etype_name = self._element_type.__name__
176+
177+
output += f"number of elements: ({len(self)}) of type: {_etype_name}\n"
166178

167179
output += f"unlist_data: {ut.print_truncated_list(self._unlist_data)}\n"
168180

@@ -203,11 +215,11 @@ def paritioning(self) -> Partitioning:
203215
######>> names <<######
204216
#######################
205217

206-
def get_names(self) -> Optional[ut.NamedList]:
218+
def get_names(self) -> Optional[ut.Names]:
207219
"""Get the names of list elements."""
208220
return self._partitioning.get_names()
209221

210-
def set_names(self, names: List[str], in_place: bool = False) -> "CompressedList":
222+
def set_names(self, names: Sequence[str], in_place: bool = False) -> "CompressedList":
211223
"""Set the names of list elements.
212224
213225
names:
@@ -227,7 +239,7 @@ def set_names(self, names: List[str], in_place: bool = False) -> "CompressedList
227239
return output
228240

229241
@property
230-
def names(self) -> Optional[ut.NamedList]:
242+
def names(self) -> Optional[ut.Names]:
231243
"""Alias for :py:attr:`~get_names`."""
232244
return self._partitioning.get_names()
233245

@@ -460,7 +472,7 @@ def extract_range(self, start: int, end: int) -> Any:
460472

461473
@classmethod
462474
def from_list(
463-
cls, lst: List[Any], names: Optional[Sequence[str]] = None, metadata: Optional[dict] = None
475+
cls, lst: Any, names: Optional[Union[ut.Names, Sequence[str]]] = None, metadata: Optional[dict] = None
464476
) -> "CompressedList":
465477
"""Create a CompressedList from a regular list.
466478
@@ -497,27 +509,40 @@ def from_list(
497509
######>> coercions <<######
498510
###########################
499511

500-
def to_list(self) -> List[Any]:
512+
def to_list(self) -> List[List[Any]]:
501513
"""Convert to a regular Python list.
502514
503515
Returns:
504516
A regular Python list with all elements.
505517
"""
506-
return list(self)
518+
result = []
519+
for i in range(len(self)):
520+
_subset = list(self[i])
521+
if len(_subset) == 0:
522+
_subset = [None]
523+
result.append(_subset)
524+
525+
return result
507526

508-
def unlist(self, use_names: bool = True) -> Any:
527+
def as_list(self) -> List[List[Any]]:
528+
"""Alias to :py:meth:`~to_list`"""
529+
return self.to_list()
530+
531+
def unlist(self, use_names: bool = False) -> Any:
509532
"""Get the underlying unlisted data.
510533
511534
Args:
512535
use_names:
513536
Whether to include names in the result if applicable.
514537
515-
Currently not used.
516-
517538
Returns:
518539
The unlisted data.
519540
"""
520-
return self._unlist_data
541+
return (
542+
self._unlist_data
543+
if use_names is False
544+
else self._unlist_data.set_names(self.get_partitioning().get_names(), in_place=False)
545+
)
521546

522547
def relist(self, unlist_data: Any) -> "CompressedList":
523548
"""Create a new `CompressedList` with the same partitioning but different data.

src/compressed_lists/biocframe_list.py

Lines changed: 64 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
__license__ = "MIT"
1313

1414

15-
class CompressedBiocFrameList(CompressedList):
16-
"""CompressedList for BiocFrames."""
15+
class CompressedSplitBiocFrameList(CompressedList):
16+
"""CompressedList for BiocFrames.
17+
18+
All BiocFrames are expected to have the same number and names of columns."""
1719

1820
def __init__(
1921
self,
@@ -23,7 +25,7 @@ def __init__(
2325
metadata: Optional[dict] = None,
2426
**kwargs,
2527
):
26-
"""Initialize a CompressedBiocFrameList.
28+
"""Initialize a CompressedSplitBiocFrameList.
2729
2830
Args:
2931
unlist_data:
@@ -50,16 +52,21 @@ def __init__(
5052

5153
@classmethod
5254
def from_list(
53-
cls, lst: List[BiocFrame], names: Optional[Sequence[str]] = None, metadata: Optional[dict] = None
54-
) -> "CompressedBiocFrameList":
55-
"""Create a `CompressedBiocFrameList` from a regular list.
55+
cls,
56+
lst: List[BiocFrame],
57+
names: Optional[Union[ut.Names, Sequence[str]]] = None,
58+
metadata: Optional[dict] = None,
59+
) -> "CompressedSplitBiocFrameList":
60+
"""Create a `CompressedSplitBiocFrameList` from a regular list.
5661
5762
This concatenates the list of `BiocFrame` objects.
5863
5964
Args:
6065
lst:
6166
List of `BiocFrame` objects.
6267
68+
Must have the same number and names of columns.
69+
6370
names:
6471
Optional names for list elements.
6572
@@ -106,21 +113,68 @@ def extract_range(self, start: int, end: int) -> BiocFrame:
106113
"Custom classes should implement their own `extract_range` method for slice operations"
107114
) from e
108115

116+
##########################
117+
######>> Printing <<######
118+
##########################
119+
120+
def __repr__(self) -> str:
121+
"""
122+
Returns:
123+
A string representation.
124+
"""
125+
output = f"{type(self).__name__}(number_of_elements={len(self)}"
126+
output += ", unlist_data=" + self._unlist_data.__repr__()
127+
output += ", partitioning=" + self._partitioning.__repr__()
128+
output += (
129+
", element_type=" + self._element_type.__name__
130+
if not isinstance(self._element_type, str)
131+
else self._element_type
132+
)
133+
134+
if len(self._element_metadata) > 0:
135+
output += ", element_metadata=" + ut.print_truncated_dict(self._element_metadata)
136+
137+
if len(self._metadata) > 0:
138+
output += ", metadata=" + ut.print_truncated_dict(self._metadata)
139+
140+
output += ")"
141+
return output
142+
143+
def __str__(self) -> str:
144+
"""
145+
Returns:
146+
A pretty-printed string containing the contents of this object.
147+
"""
148+
output = f"class: {type(self).__name__}\n"
149+
150+
output += f"number of elements: ({len(self)}) of type: {self._element_type.__name__ if not isinstance(self._element_type, str) else self._element_type}\n"
151+
152+
output += f"unlist_data: {ut.print_truncated_list(self._unlist_data.get_column_names())}\n"
153+
154+
output += f"partitioning: {ut.print_truncated_list(self._partitioning)}\n"
155+
156+
output += f"element_metadata({str(len(self._element_metadata))}): {ut.print_truncated_list(list(self._element_metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
157+
output += f"metadata({str(len(self._metadata))}): {ut.print_truncated_list(list(self._metadata.keys()), sep=' ', include_brackets=False, transform=lambda y: y)}\n"
158+
159+
return output
160+
109161

110162
@splitAsCompressedList.register
111163
def _(
112164
data: BiocFrame,
113165
groups_or_partitions: Union[list, Partitioning],
114-
names: Optional[Sequence[str]] = None,
166+
names: Optional[Union[ut.Names, Sequence[str]]] = None,
115167
metadata: Optional[dict] = None,
116-
) -> CompressedBiocFrameList:
168+
) -> CompressedSplitBiocFrameList:
117169
"""Handle lists of BiocFrame objects."""
118170

119171
partitioned_data, groups_or_partitions = _generic_register_helper(
120172
data=data, groups_or_partitions=groups_or_partitions, names=names
121173
)
122174

123-
if not isinstance(partitioned_data, BiocFrame):
175+
if not isinstance(partitioned_data, BiocFrame) and len(partitioned_data) != 0:
124176
partitioned_data = ut.relaxed_combine_rows(*partitioned_data)
125177

126-
return CompressedBiocFrameList(unlist_data=partitioned_data, partitioning=groups_or_partitions, metadata=metadata)
178+
return CompressedSplitBiocFrameList(
179+
unlist_data=partitioned_data, partitioning=groups_or_partitions, metadata=metadata
180+
)

src/compressed_lists/bool_list.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __init__(
5858
def _(
5959
data: ut.BooleanList,
6060
groups_or_partitions: Union[list, Partitioning],
61-
names: Optional[Sequence[str]] = None,
61+
names: Optional[Union[ut.Names, Sequence[str]]] = None,
6262
metadata: Optional[dict] = None,
6363
) -> CompressedBooleanList:
6464
"""Handle lists of booleans."""
@@ -67,7 +67,7 @@ def _(
6767
data=data, groups_or_partitions=groups_or_partitions, names=names
6868
)
6969

70-
if not isinstance(partitioned_data, ut.BooleanList):
70+
if not isinstance(partitioned_data, ut.BooleanList) and len(partitioned_data) != 0:
7171
partitioned_data = ut.combine_sequences(*partitioned_data)
7272

7373
return CompressedBooleanList(unlist_data=partitioned_data, partitioning=groups_or_partitions, metadata=metadata)

src/compressed_lists/float_list.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __init__(
5858
def _(
5959
data: ut.FloatList,
6060
groups_or_partitions: Union[list, Partitioning],
61-
names: Optional[Sequence[str]] = None,
61+
names: Optional[Union[ut.Names, Sequence[str]]] = None,
6262
metadata: Optional[dict] = None,
6363
) -> CompressedFloatList:
6464
"""Handle lists of floats."""
@@ -67,7 +67,7 @@ def _(
6767
data=data, groups_or_partitions=groups_or_partitions, names=names
6868
)
6969

70-
if not isinstance(partitioned_data, ut.FloatList):
70+
if not isinstance(partitioned_data, ut.FloatList) and len(partitioned_data) != 0:
7171
partitioned_data = ut.combine_sequences(*partitioned_data)
7272

7373
return CompressedFloatList(unlist_data=partitioned_data, partitioning=groups_or_partitions, metadata=metadata)

src/compressed_lists/integer_list.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __init__(
5858
def _(
5959
data: ut.IntegerList,
6060
groups_or_partitions: Union[list, Partitioning],
61-
names: Optional[Sequence[str]] = None,
61+
names: Optional[Union[ut.Names, Sequence[str]]] = None,
6262
metadata: Optional[dict] = None,
6363
) -> CompressedIntegerList:
6464
"""Handle lists of integers."""
@@ -67,7 +67,7 @@ def _(
6767
data=data, groups_or_partitions=groups_or_partitions, names=names
6868
)
6969

70-
if not isinstance(partitioned_data, ut.IntegerList):
70+
if not isinstance(partitioned_data, ut.IntegerList) and len(partitioned_data) != 0:
7171
partitioned_data = ut.combine_sequences(*partitioned_data)
7272

7373
return CompressedIntegerList(unlist_data=partitioned_data, partitioning=groups_or_partitions, metadata=metadata)

src/compressed_lists/numpy_list.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,19 +44,25 @@ def __init__(
4444
"""
4545

4646
if not isinstance(unlist_data, np.ndarray):
47-
try:
48-
warn("trying to concatenate/coerce 'unlist_data' to a `np.ndarray`..")
49-
unlist_data = np.concatenate(unlist_data)
50-
except Exception as e:
51-
raise TypeError("'unlist_data' must be an `np.ndarray`, provided ", type(unlist_data)) from e
47+
if len(unlist_data) == 0:
48+
unlist_data = np.asarray([])
49+
else:
50+
try:
51+
warn("trying to concatenate/coerce 'unlist_data' to a `np.ndarray`..")
52+
unlist_data = np.concatenate(unlist_data)
53+
except Exception as e:
54+
raise TypeError("'unlist_data' must be an `np.ndarray`, provided ", type(unlist_data)) from e
5255

5356
super().__init__(
5457
unlist_data, partitioning, element_type=np.ndarray, element_metadata=element_metadata, metadata=metadata
5558
)
5659

5760
@classmethod
5861
def from_list(
59-
cls, lst: List[np.ndarray], names: Optional[Sequence[str]] = None, metadata: Optional[dict] = None
62+
cls,
63+
lst: List[np.ndarray],
64+
names: Optional[Union[ut.Names, Sequence[str]]] = None,
65+
metadata: Optional[dict] = None,
6066
) -> "CompressedNumpyList":
6167
"""
6268
Create a `CompressedNumpyList` from a list of NumPy vectors.
@@ -88,7 +94,7 @@ def from_list(
8894
def _(
8995
data: np.ndarray,
9096
groups_or_partitions: Union[list, Partitioning],
91-
names: Optional[Sequence[str]] = None,
97+
names: Optional[Union[ut.Names, Sequence[str]]] = None,
9298
metadata: Optional[dict] = None,
9399
) -> CompressedNumpyList:
94100
"""Handle NumPy arrays."""
@@ -97,7 +103,7 @@ def _(
97103
data=data, groups_or_partitions=groups_or_partitions, names=names
98104
)
99105

100-
if not isinstance(partitioned_data, np.ndarray):
106+
if not isinstance(partitioned_data, np.ndarray) and len(partitioned_data) != 0:
101107
partitioned_data = ut.combine_sequences(*partitioned_data)
102108

103109
return CompressedNumpyList(unlist_data=partitioned_data, partitioning=groups_or_partitions, metadata=metadata)

0 commit comments

Comments
 (0)