-
Notifications
You must be signed in to change notification settings - Fork 6.1k
mydump: read row group data at once for small parquet files (#66071) #66574
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: release-8.5
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -110,6 +110,51 @@ func TestMemStoreBasic(t *testing.T) { | |||||||||||||||||
| require.True(t, exists) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| func TestMemStoreOpenRangeSeek(t *testing.T) { | ||||||||||||||||||
| store := NewMemStorage() | ||||||||||||||||||
| ctx := context.Background() | ||||||||||||||||||
|
|
||||||||||||||||||
| fileName := "/seek-range.txt" | ||||||||||||||||||
| require.NoError(t, store.WriteFile(ctx, fileName, []byte("0123456789"))) | ||||||||||||||||||
|
|
||||||||||||||||||
| start, end := int64(2), int64(5) | ||||||||||||||||||
| r, err := store.Open(ctx, fileName, &storeapi.ReaderOption{ | ||||||||||||||||||
| StartOffset: &start, | ||||||||||||||||||
| EndOffset: &end, | ||||||||||||||||||
| }) | ||||||||||||||||||
|
Comment on lines
+121
to
+124
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's verify the file exists and check its size
cd /tmp && find . -name "memstore_test.go" -type f 2>/dev/null | head -5
wc -l br/pkg/storage/memstore_test.go 2>/dev/null || echo "File path needs adjustment"Repository: pingcap/tidb Length of output: 82 🏁 Script executed: # Search for ReaderOption definitions
rg -n 'type\s+ReaderOption\s+struct' --type=goRepository: pingcap/tidb Length of output: 112 🏁 Script executed: # Check for storeapi package references
rg -n 'storeapi' --type=go | head -20Repository: pingcap/tidb Length of output: 764 🏁 Script executed: # Read the actual memstore_test.go file around the mentioned lines
head -160 br/pkg/storage/memstore_test.go | tail -50Repository: pingcap/tidb Length of output: 1154 🏁 Script executed: # Check the beginning of memstore_test.go to see package declaration and imports
head -30 br/pkg/storage/memstore_test.goRepository: pingcap/tidb Length of output: 843 🏁 Script executed: # Search for import statements in memstore_test.go
rg -n '^import|^package' br/pkg/storage/memstore_test.go -A 15 | head -40Repository: pingcap/tidb Length of output: 398 🏁 Script executed: # Check what ReaderOption is in storage.go to understand the type
rg -A 10 'type ReaderOption struct' br/pkg/storage/storage.goRepository: pingcap/tidb Length of output: 378 🏁 Script executed: # Verify that storeapi is not imported in memstore_test.go
grep -n "import\|storeapi" br/pkg/storage/memstore_test.go | head -30Repository: pingcap/tidb Length of output: 132 🏁 Script executed: # Double-check if there's any storeapi import that we may have missed
rg -n 'storeapi' br/pkg/storage/memstore_test.goRepository: pingcap/tidb Length of output: 120 🏁 Script executed: # Verify the complete import section of memstore_test.go
sed -n '14,26p' br/pkg/storage/memstore_test.goRepository: pingcap/tidb Length of output: 205 🏁 Script executed: # Check what the Open method signature expects for the third parameter
rg -B 5 -A 10 'func.*Open.*ReaderOption' br/pkg/storage/storage.go | head -30Repository: pingcap/tidb Length of output: 38 🏁 Script executed: # Search for the Open method definition in memstore.go or storage.go
rg -n 'func.*Open' br/pkg/storage/storage.go -A 3 | head -20Repository: pingcap/tidb Length of output: 38 🏁 Script executed: # Check memstore.go for the Open implementation
rg -n 'func.*Open' br/pkg/storage/memstore.go -A 3 | head -20Repository: pingcap/tidb Length of output: 252 🏁 Script executed: # Final verification: confirm storeapi is NOT defined/aliased in storage package context
rg -n 'storeapi' br/pkg/storage/storage.goRepository: pingcap/tidb Length of output: 38 Use
💡 Proposed fix- r, err := store.Open(ctx, fileName, &storeapi.ReaderOption{
+ r, err := store.Open(ctx, fileName, &ReaderOption{
StartOffset: &start,
EndOffset: &end,
})📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||
| require.NoError(t, err) | ||||||||||||||||||
| defer func() { | ||||||||||||||||||
| require.NoError(t, r.Close()) | ||||||||||||||||||
| }() | ||||||||||||||||||
|
|
||||||||||||||||||
| offs, err := r.Seek(0, io.SeekCurrent) | ||||||||||||||||||
| require.NoError(t, err) | ||||||||||||||||||
| require.Equal(t, int64(2), offs) | ||||||||||||||||||
|
|
||||||||||||||||||
| offs, err = r.Seek(0, io.SeekEnd) | ||||||||||||||||||
| require.NoError(t, err) | ||||||||||||||||||
| require.Equal(t, int64(10), offs) | ||||||||||||||||||
|
|
||||||||||||||||||
| b := make([]byte, 2) | ||||||||||||||||||
| n, err := r.Read(b) | ||||||||||||||||||
| require.Equal(t, 0, n) | ||||||||||||||||||
| require.ErrorIs(t, err, io.EOF) | ||||||||||||||||||
|
|
||||||||||||||||||
| offs, err = r.Seek(1, io.SeekStart) | ||||||||||||||||||
| require.NoError(t, err) | ||||||||||||||||||
| require.Equal(t, int64(1), offs) | ||||||||||||||||||
|
|
||||||||||||||||||
| b = make([]byte, 10) | ||||||||||||||||||
| n, err = r.Read(b) | ||||||||||||||||||
| require.NoError(t, err) | ||||||||||||||||||
| require.Equal(t, 4, n) | ||||||||||||||||||
| require.Equal(t, "1234", string(b[:n])) | ||||||||||||||||||
|
|
||||||||||||||||||
| n, err = r.Read(b) | ||||||||||||||||||
| require.Equal(t, 0, n) | ||||||||||||||||||
| require.ErrorIs(t, err, io.EOF) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| type iterFileInfo struct { | ||||||||||||||||||
| Name string | ||||||||||||||||||
| Size int64 | ||||||||||||||||||
|
|
||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ go_library( | |
| "loader.go", | ||
| "parquet_parser.go", | ||
| "parquet_type_converter.go", | ||
| "parquet_wrapper.go", | ||
| "parquet_writer.go", | ||
| "parser.go", | ||
| "parser_generated.go", | ||
|
|
@@ -22,6 +23,11 @@ go_library( | |
| deps = [ | ||
| "//br/pkg/storage", | ||
| "//pkg/config", | ||
| <<<<<<< HEAD | ||
| ======= | ||
| "//pkg/errno", | ||
| "//pkg/lightning/backend/external", | ||
| >>>>>>> 1d9393d0cd7 (mydump: read row group data at once for small parquet files (#66071)) | ||
|
Comment on lines
+26
to
+30
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Resolve merge-conflict markers in deps list. The Also applies to: 58-62 🤖 Prompt for AI Agents |
||
| "//pkg/lightning/common", | ||
| "//pkg/lightning/config", | ||
| "//pkg/lightning/log", | ||
|
|
@@ -47,7 +53,13 @@ go_library( | |
| "@com_github_apache_arrow_go_v18//parquet", | ||
| "@com_github_apache_arrow_go_v18//parquet/compress", | ||
| "@com_github_apache_arrow_go_v18//parquet/file", | ||
| "@com_github_apache_arrow_go_v18//parquet/metadata", | ||
| "@com_github_apache_arrow_go_v18//parquet/schema", | ||
| <<<<<<< HEAD | ||
| ======= | ||
| "@com_github_docker_go_units//:go-units", | ||
| "@com_github_go_sql_driver_mysql//:mysql", | ||
| >>>>>>> 1d9393d0cd7 (mydump: read row group data at once for small parquet files (#66071)) | ||
| "@com_github_pingcap_errors//:errors", | ||
| "@com_github_pingcap_failpoint//:failpoint", | ||
| "@com_github_spkg_bom//:bom", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Resolve merge-conflict markers before merge.
The conflict markers around
shard_countmake this BUILD file invalid and will break Bazel parsing/CI.🤖 Prompt for AI Agents