@@ -148,3 +148,45 @@ def read_with_cache_clear() -> None:
148148 getitem (data , indexer )
149149
150150 benchmark (read_with_cache_clear )
151+
152+
153+ @pytest .mark .parametrize ("store" , ["memory" ], indirect = ["store" ])
154+ @pytest .mark .parametrize ("shards" , large_morton_shards , ids = str )
155+ def test_sharded_morton_single_chunk (
156+ store : Store ,
157+ shards : tuple [int , ...],
158+ benchmark : BenchmarkFixture ,
159+ ) -> None :
160+ """Benchmark reading a single chunk from a large shard.
161+
162+ This isolates the Morton order computation overhead by minimizing I/O.
163+ Reading one chunk from a shard with 32^3 = 32768 chunks still requires
164+ computing the full Morton order, making the optimization impact clear.
165+ The Morton order cache is cleared before each iteration.
166+ """
167+ from zarr .core .indexing import _morton_order
168+
169+ # 1x1x1 chunks means chunks_per_shard equals shard shape
170+ shape = tuple (s * 2 for s in shards ) # 2 shards per dimension
171+ chunks = (1 ,) * 3 # 1x1x1 chunks: chunks_per_shard = shards
172+
173+ data = create_array (
174+ store = store ,
175+ shape = shape ,
176+ dtype = "uint8" ,
177+ chunks = chunks ,
178+ shards = shards ,
179+ compressors = None ,
180+ filters = None ,
181+ fill_value = 0 ,
182+ )
183+
184+ data [:] = 1
185+ # Read only a single chunk (1x1x1) from the shard
186+ indexer = (slice (1 ),) * 3
187+
188+ def read_with_cache_clear () -> None :
189+ _morton_order .cache_clear ()
190+ getitem (data , indexer )
191+
192+ benchmark (read_with_cache_clear )
0 commit comments