Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions br/pkg/storage/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ go_test(
],
embed = [":storage"],
flaky = True,
<<<<<<< HEAD:br/pkg/storage/BUILD.bazel
shard_count = 50,
=======
shard_count = 45,
>>>>>>> 1d9393d0cd7 (mydump: read row group data at once for small parquet files (#66071)):pkg/objstore/BUILD.bazel
Comment on lines +107 to +111
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Resolve merge-conflict markers before merge.

The conflict markers around shard_count make this BUILD file invalid and will break Bazel parsing/CI.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@br/pkg/storage/BUILD.bazel` around lines 107 - 111, The BUILD file contains
unresolved Git conflict markers (<<<<<<<, =======, >>>>>>>) around the
shard_count setting which breaks Bazel; remove the conflict markers and leave a
single valid shard_count assignment (choose the intended value—either 45 or
50—or confirm with the author), e.g., replace the whole conflicted block with a
single line "shard_count = <chosen_value>," and ensure there are no leftover
markers or duplicate entries for shard_count.

deps = [
"//br/pkg/mock",
"//pkg/util/intest",
Expand Down
32 changes: 24 additions & 8 deletions br/pkg/storage/memstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,20 +153,23 @@ func (s *MemStorage) Open(ctx context.Context, filePath string, o *ReaderOption)
return nil, errors.Errorf("cannot find the file: %s", filePath)
}
data := theFile.GetData()
// just for simplicity, different from other implementation, MemStorage can't
// seek beyond [o.StartOffset, o.EndOffset)
start, end := 0, len(data)
start, end := int64(0), int64(len(data))
if o != nil {
if o.StartOffset != nil {
start = int(*o.StartOffset)
start = *o.StartOffset
}
if o.EndOffset != nil {
end = int(*o.EndOffset)
end = *o.EndOffset
}
}
r := bytes.NewReader(data[start:end])
r := bytes.NewReader(data)
if _, err := r.Seek(start, io.SeekStart); err != nil {
return nil, errors.Trace(err)
}
return &memFileReader{
br: r,
pos: start,
end: end,
size: int64(len(data)),
}, nil
}
Expand Down Expand Up @@ -269,6 +272,8 @@ func (s *MemStorage) Close() {
// memFileReader is the struct to read data from an opend mem storage file
type memFileReader struct {
br *bytes.Reader
pos int64
end int64
size int64
isClosed atomic.Bool
}
Expand All @@ -279,7 +284,13 @@ func (r *memFileReader) Read(p []byte) (int, error) {
if r.isClosed.Load() {
return 0, io.EOF
}
return r.br.Read(p)
maxCnt := min(r.end-r.pos, int64(len(p)))
if maxCnt <= 0 {
return 0, io.EOF
}
n, err := r.br.Read(p[:maxCnt])
r.pos += int64(n)
return n, err
}

// Close closes the mem storage file data
Expand All @@ -295,7 +306,12 @@ func (r *memFileReader) Seek(offset int64, whence int) (int64, error) {
if r.isClosed.Load() {
return 0, errors.New("reader closed")
}
return r.br.Seek(offset, whence)
n, err := r.br.Seek(offset, whence)
if err != nil {
return 0, errors.Trace(err)
}
r.pos = n
return n, nil
}

func (r *memFileReader) GetFileSize() (int64, error) {
Expand Down
45 changes: 45 additions & 0 deletions br/pkg/storage/memstore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,51 @@ func TestMemStoreBasic(t *testing.T) {
require.True(t, exists)
}

func TestMemStoreOpenRangeSeek(t *testing.T) {
store := NewMemStorage()
ctx := context.Background()

fileName := "/seek-range.txt"
require.NoError(t, store.WriteFile(ctx, fileName, []byte("0123456789")))

start, end := int64(2), int64(5)
r, err := store.Open(ctx, fileName, &storeapi.ReaderOption{
StartOffset: &start,
EndOffset: &end,
})
Comment on lines +121 to +124
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

# First, let's verify the file exists and check its size
cd /tmp && find . -name "memstore_test.go" -type f 2>/dev/null | head -5
wc -l br/pkg/storage/memstore_test.go 2>/dev/null || echo "File path needs adjustment"

Repository: pingcap/tidb

Length of output: 82


🏁 Script executed:

# Search for ReaderOption definitions
rg -n 'type\s+ReaderOption\s+struct' --type=go

Repository: pingcap/tidb

Length of output: 112


🏁 Script executed:

# Check for storeapi package references
rg -n 'storeapi' --type=go | head -20

Repository: pingcap/tidb

Length of output: 764


🏁 Script executed:

# Read the actual memstore_test.go file around the mentioned lines
head -160 br/pkg/storage/memstore_test.go | tail -50

Repository: pingcap/tidb

Length of output: 1154


🏁 Script executed:

# Check the beginning of memstore_test.go to see package declaration and imports
head -30 br/pkg/storage/memstore_test.go

Repository: pingcap/tidb

Length of output: 843


🏁 Script executed:

# Search for import statements in memstore_test.go
rg -n '^import|^package' br/pkg/storage/memstore_test.go -A 15 | head -40

Repository: pingcap/tidb

Length of output: 398


🏁 Script executed:

# Check what ReaderOption is in storage.go to understand the type
rg -A 10 'type ReaderOption struct' br/pkg/storage/storage.go

Repository: pingcap/tidb

Length of output: 378


🏁 Script executed:

# Verify that storeapi is not imported in memstore_test.go
grep -n "import\|storeapi" br/pkg/storage/memstore_test.go | head -30

Repository: pingcap/tidb

Length of output: 132


🏁 Script executed:

# Double-check if there's any storeapi import that we may have missed
rg -n 'storeapi' br/pkg/storage/memstore_test.go

Repository: pingcap/tidb

Length of output: 120


🏁 Script executed:

# Verify the complete import section of memstore_test.go
sed -n '14,26p' br/pkg/storage/memstore_test.go

Repository: pingcap/tidb

Length of output: 205


🏁 Script executed:

# Check what the Open method signature expects for the third parameter
rg -B 5 -A 10 'func.*Open.*ReaderOption' br/pkg/storage/storage.go | head -30

Repository: pingcap/tidb

Length of output: 38


🏁 Script executed:

# Search for the Open method definition in memstore.go or storage.go
rg -n 'func.*Open' br/pkg/storage/storage.go -A 3 | head -20

Repository: pingcap/tidb

Length of output: 38


🏁 Script executed:

# Check memstore.go for the Open implementation
rg -n 'func.*Open' br/pkg/storage/memstore.go -A 3 | head -20

Repository: pingcap/tidb

Length of output: 252


🏁 Script executed:

# Final verification: confirm storeapi is NOT defined/aliased in storage package context
rg -n 'storeapi' br/pkg/storage/storage.go

Repository: pingcap/tidb

Length of output: 38


Use ReaderOption from the current storage package, not storeapi.ReaderOption.

storeapi is not imported in this file, making storeapi.ReaderOption undefined at line 121. The Open method expects *ReaderOption from the same storage package.

💡 Proposed fix
-	r, err := store.Open(ctx, fileName, &storeapi.ReaderOption{
+	r, err := store.Open(ctx, fileName, &ReaderOption{
 		StartOffset: &start,
 		EndOffset:   &end,
 	})
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
r, err := store.Open(ctx, fileName, &storeapi.ReaderOption{
StartOffset: &start,
EndOffset: &end,
})
r, err := store.Open(ctx, fileName, &ReaderOption{
StartOffset: &start,
EndOffset: &end,
})
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@br/pkg/storage/memstore_test.go` around lines 121 - 124, The test is using
store.Open with a wrong type reference: replace the undefined
storeapi.ReaderOption with the local storage package type by passing
&ReaderOption{ StartOffset: &start, EndOffset: &end } (i.e., use ReaderOption
from the same package as Open); update the call at r, err := store.Open(ctx,
fileName, ...) to construct the correct *ReaderOption type so it matches the
Open signature.

require.NoError(t, err)
defer func() {
require.NoError(t, r.Close())
}()

offs, err := r.Seek(0, io.SeekCurrent)
require.NoError(t, err)
require.Equal(t, int64(2), offs)

offs, err = r.Seek(0, io.SeekEnd)
require.NoError(t, err)
require.Equal(t, int64(10), offs)

b := make([]byte, 2)
n, err := r.Read(b)
require.Equal(t, 0, n)
require.ErrorIs(t, err, io.EOF)

offs, err = r.Seek(1, io.SeekStart)
require.NoError(t, err)
require.Equal(t, int64(1), offs)

b = make([]byte, 10)
n, err = r.Read(b)
require.NoError(t, err)
require.Equal(t, 4, n)
require.Equal(t, "1234", string(b[:n]))

n, err = r.Read(b)
require.Equal(t, 0, n)
require.ErrorIs(t, err, io.EOF)
}

type iterFileInfo struct {
Name string
Size int64
Expand Down
4 changes: 2 additions & 2 deletions br/pkg/storage/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ type WriterOption struct {
}

type ReaderOption struct {
// StartOffset is inclusive. And it's incompatible with Seek.
// StartOffset is inclusive.
StartOffset *int64
// EndOffset is exclusive. And it's incompatible with Seek.
// EndOffset is exclusive.
EndOffset *int64
// PrefetchSize will switch to NewPrefetchReader if value is positive.
PrefetchSize int
Expand Down
12 changes: 12 additions & 0 deletions pkg/lightning/mydump/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ go_library(
"loader.go",
"parquet_parser.go",
"parquet_type_converter.go",
"parquet_wrapper.go",
"parquet_writer.go",
"parser.go",
"parser_generated.go",
Expand All @@ -22,6 +23,11 @@ go_library(
deps = [
"//br/pkg/storage",
"//pkg/config",
<<<<<<< HEAD
=======
"//pkg/errno",
"//pkg/lightning/backend/external",
>>>>>>> 1d9393d0cd7 (mydump: read row group data at once for small parquet files (#66071))
Comment on lines +26 to +30
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Resolve merge-conflict markers in deps list.

The <<<<<<< / ======= / >>>>>>> blocks make the Bazel file invalid and block build/test execution.

Also applies to: 58-62

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@pkg/lightning/mydump/BUILD.bazel` around lines 26 - 30, Remove the Git
merge-conflict markers (<<<<<<<, =======, >>>>>>>) and merge the dependency
entries so the deps list contains the intended entries (e.g. include both
"//pkg/errno" and "//pkg/lightning/backend/external" as normal strings) without
any conflict markers; update the dependency block in BUILD.bazel where the
markers appear and apply the same fix to the other occurrence noted (lines
58-62) so the file is valid for Bazel parsing.

"//pkg/lightning/common",
"//pkg/lightning/config",
"//pkg/lightning/log",
Expand All @@ -47,7 +53,13 @@ go_library(
"@com_github_apache_arrow_go_v18//parquet",
"@com_github_apache_arrow_go_v18//parquet/compress",
"@com_github_apache_arrow_go_v18//parquet/file",
"@com_github_apache_arrow_go_v18//parquet/metadata",
"@com_github_apache_arrow_go_v18//parquet/schema",
<<<<<<< HEAD
=======
"@com_github_docker_go_units//:go-units",
"@com_github_go_sql_driver_mysql//:mysql",
>>>>>>> 1d9393d0cd7 (mydump: read row group data at once for small parquet files (#66071))
"@com_github_pingcap_errors//:errors",
"@com_github_pingcap_failpoint//:failpoint",
"@com_github_spkg_bom//:bom",
Expand Down
Loading
Loading