Skip to content

Commit 977e70f

Browse files
committed
added tests
1 parent 36f4a02 commit 977e70f

2 files changed

Lines changed: 241 additions & 0 deletions

File tree

paimon-python/pypaimon/tests/file_io_test.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,5 +489,62 @@ def test_path_on_windows(self):
489489
self.assertNotIn("\\", call[0][0], f"backslash in path: {call[0][0]}")
490490

491491

492+
class GCSFileIOPathTest(unittest.TestCase):
493+
"""Unit tests for PyArrowFileIO.to_filesystem_path with GCS (no credentials required)."""
494+
495+
def setUp(self):
496+
self.file_io = PyArrowFileIO("gs://my-bucket/warehouse", Options({}))
497+
498+
def test_gcs_filesystem_type(self):
499+
"""GCS warehouse path should produce a GcsFileSystem."""
500+
self.assertIsInstance(self.file_io.filesystem, pafs.GcsFileSystem)
501+
502+
def test_gcs_path_conversion(self):
503+
"""gs://bucket/key should map to bucket/key (no leading slash)."""
504+
self.assertEqual(
505+
self.file_io.to_filesystem_path("gs://my-bucket/path/to/file.parquet"),
506+
"my-bucket/path/to/file.parquet"
507+
)
508+
self.assertEqual(
509+
self.file_io.to_filesystem_path("gs://my-bucket/warehouse/db.db/tbl/data-0.parquet"),
510+
"my-bucket/warehouse/db.db/tbl/data-0.parquet"
511+
)
512+
513+
def test_gcs_path_bucket_only(self):
514+
"""gs://bucket with no path should return just the bucket name."""
515+
self.assertEqual(
516+
self.file_io.to_filesystem_path("gs://my-bucket"),
517+
"my-bucket"
518+
)
519+
520+
def test_gcs_path_normalization(self):
521+
"""Multiple consecutive slashes in the path should be collapsed."""
522+
self.assertEqual(
523+
self.file_io.to_filesystem_path("gs://my-bucket///path///to///file.parquet"),
524+
"my-bucket/path/to/file.parquet"
525+
)
526+
527+
def test_gcs_path_idempotency(self):
528+
"""Already-converted bucket/key paths should pass through unchanged."""
529+
converted = "my-bucket/path/to/file.parquet"
530+
self.assertEqual(self.file_io.to_filesystem_path(converted), converted)
531+
parent = str(Path(converted).parent)
532+
self.assertEqual(self.file_io.to_filesystem_path(parent), parent)
533+
534+
def test_gcs_path_no_leading_slash(self):
535+
"""to_filesystem_path must never return a path starting with '/'."""
536+
cases = [
537+
"gs://my-bucket/path/to/file.parquet",
538+
"gs://my-bucket",
539+
"gs://my-bucket///path",
540+
]
541+
for uri in cases:
542+
result = self.file_io.to_filesystem_path(uri)
543+
self.assertFalse(
544+
result.startswith("/"),
545+
f"Path must not start with '/' for GcsFileSystem, got: {result!r} (input: {uri!r})"
546+
)
547+
548+
492549
if __name__ == '__main__':
493550
unittest.main()
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
################################################################################
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
################################################################################
18+
import os
19+
import unittest
20+
import uuid
21+
22+
import pyarrow.fs as pafs
23+
24+
from pypaimon.common.options import Options
25+
from pypaimon.filesystem.pyarrow_file_io import PyArrowFileIO
26+
27+
28+
class GCSFileIOTest(unittest.TestCase):
29+
"""Integration tests for PyArrowFileIO with GCS.
30+
31+
Requires the following environment variable to be set:
32+
GCS_TEST_BUCKET — name of the GCS bucket to use (without gs:// prefix)
33+
34+
Credentials are picked up automatically via Application Default Credentials
35+
(GOOGLE_APPLICATION_CREDENTIALS, GCP metadata server, or Workload Identity).
36+
All tests are skipped when GCS_TEST_BUCKET is not configured.
37+
"""
38+
39+
def setUp(self):
40+
self.bucket = os.environ.get("GCS_TEST_BUCKET")
41+
if not self.bucket:
42+
self.skipTest("GCS_TEST_BUCKET is not configured")
43+
return
44+
45+
self.root_path = f"gs://{self.bucket}/"
46+
self.file_io = PyArrowFileIO(self.root_path, Options({}))
47+
self.test_prefix = f"test-{uuid.uuid4().hex[:8]}/"
48+
49+
def tearDown(self):
50+
if not hasattr(self, 'file_io'):
51+
return
52+
test_dir = f"{self.root_path}{self.test_prefix}"
53+
try:
54+
if self.file_io.exists(test_dir):
55+
self.file_io.delete(test_dir, recursive=True)
56+
except Exception:
57+
pass
58+
59+
def _path(self, name: str) -> str:
60+
return f"{self.root_path}{self.test_prefix}{name}"
61+
62+
def test_gcs_filesystem_type(self):
63+
"""PyArrowFileIO with gs:// should use GcsFileSystem."""
64+
self.assertIsInstance(self.file_io.filesystem, pafs.GcsFileSystem)
65+
66+
def test_exists(self):
67+
"""exists() returns False for non-existent paths."""
68+
self.assertFalse(self.file_io.exists(self._path("nonexistent.txt")))
69+
with self.assertRaises(FileNotFoundError):
70+
self.file_io.get_file_status(self._path("nonexistent.txt"))
71+
72+
def test_write_and_read_file(self):
73+
"""write_file() and read_file_utf8() round-trip."""
74+
test_file = self._path("write_read_test.txt")
75+
76+
self.file_io.write_file(test_file, "hello gcs")
77+
self.assertTrue(self.file_io.exists(test_file))
78+
self.assertEqual(self.file_io.read_file_utf8(test_file), "hello gcs")
79+
80+
def test_write_file_overwrite(self):
81+
"""write_file() respects the overwrite flag."""
82+
test_file = self._path("overwrite_test.txt")
83+
84+
self.file_io.write_file(test_file, "first")
85+
with self.assertRaises(FileExistsError):
86+
self.file_io.write_file(test_file, "second", overwrite=False)
87+
self.assertEqual(self.file_io.read_file_utf8(test_file), "first")
88+
89+
self.file_io.write_file(test_file, "overwritten", overwrite=True)
90+
self.assertEqual(self.file_io.read_file_utf8(test_file), "overwritten")
91+
92+
def test_new_input_stream_read(self):
93+
"""new_output_stream() / new_input_stream() round-trip."""
94+
test_data = b"Hello, GCS! This is a test file."
95+
test_file = self._path("input_stream_test.bin")
96+
97+
with self.file_io.new_output_stream(test_file) as out:
98+
out.write(test_data)
99+
100+
with self.file_io.new_input_stream(test_file) as inp:
101+
self.assertEqual(inp.read(), test_data)
102+
103+
with self.assertRaises(FileNotFoundError):
104+
self.file_io.new_input_stream(self._path("nonexistent.bin"))
105+
106+
def test_get_file_status_directory(self):
107+
"""get_file_status() returns Directory type for a directory."""
108+
test_dir = self._path("test-dir/")
109+
self.file_io.mkdirs(test_dir)
110+
status = self.file_io.get_file_status(test_dir)
111+
self.assertIsNotNone(status)
112+
self.assertEqual(status.type, pafs.FileType.Directory)
113+
114+
def test_get_file_status_file(self):
115+
"""get_file_status() returns File type and non-None size for a file."""
116+
test_file = self._path("status_test.txt")
117+
self.file_io.write_file(test_file, "content")
118+
status = self.file_io.get_file_status(test_file)
119+
self.assertEqual(status.type, pafs.FileType.File)
120+
self.assertIsNotNone(status.size)
121+
122+
def test_delete_returns_false_when_not_exists(self):
123+
"""delete() returns False when the path does not exist."""
124+
self.assertFalse(self.file_io.delete(self._path("nonexistent.txt")))
125+
self.assertFalse(self.file_io.delete(self._path("nonexistent_dir"), recursive=False))
126+
127+
def test_delete_non_empty_directory_raises_error(self):
128+
"""delete() without recursive=True raises OSError for non-empty directory."""
129+
test_dir = self._path("nonempty-dir/")
130+
test_file = self._path("nonempty-dir/file.txt")
131+
self.file_io.mkdirs(test_dir)
132+
with self.file_io.new_output_stream(test_file) as out:
133+
out.write(b"data")
134+
135+
with self.assertRaises(OSError) as ctx:
136+
self.file_io.delete(test_dir, recursive=False)
137+
self.assertIn("is not empty", str(ctx.exception))
138+
139+
def test_rename_returns_false_when_dst_exists(self):
140+
"""rename() returns False when the destination already exists."""
141+
src = self._path("src.txt")
142+
dst = self._path("dst.txt")
143+
with self.file_io.new_output_stream(src) as out:
144+
out.write(b"src")
145+
with self.file_io.new_output_stream(dst) as out:
146+
out.write(b"dst")
147+
148+
self.assertFalse(self.file_io.rename(src, dst))
149+
150+
def test_copy_file(self):
151+
"""copy_file() copies content and respects the overwrite flag."""
152+
src = self._path("copy_src.txt")
153+
dst = self._path("copy_dst.txt")
154+
with self.file_io.new_output_stream(src) as out:
155+
out.write(b"source content")
156+
with self.file_io.new_output_stream(dst) as out:
157+
out.write(b"target content")
158+
159+
with self.assertRaises(FileExistsError) as ctx:
160+
self.file_io.copy_file(src, dst, overwrite=False)
161+
self.assertIn("already exists", str(ctx.exception))
162+
163+
self.file_io.copy_file(src, dst, overwrite=True)
164+
with self.file_io.new_input_stream(dst) as inp:
165+
self.assertEqual(inp.read(), b"source content")
166+
167+
def test_try_to_write_atomic(self):
168+
"""try_to_write_atomic() writes a file and returns True on success."""
169+
normal_file = self._path("atomic_file.txt")
170+
self.assertTrue(self.file_io.try_to_write_atomic(normal_file, "atomic content"))
171+
self.assertEqual(self.file_io.read_file_utf8(normal_file), "atomic content")
172+
173+
def test_mkdirs_raises_error_when_path_is_file(self):
174+
"""mkdirs() raises FileExistsError when the path is an existing file."""
175+
test_file = self._path("existing_file.txt")
176+
self.file_io.write_file(test_file, "data")
177+
178+
with self.assertRaises(FileExistsError) as ctx:
179+
self.file_io.mkdirs(test_file)
180+
self.assertIn("is not a directory", str(ctx.exception))
181+
182+
183+
if __name__ == '__main__':
184+
unittest.main()

0 commit comments

Comments
 (0)