@@ -15,8 +15,11 @@ import (
1515 "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
1616 "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec"
1717 "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
18+ "github.com/cockroachdb/cockroach/pkg/roachpb"
1819 "github.com/cockroachdb/cockroach/pkg/roachprod/install"
20+ "github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
1921 "github.com/cockroachdb/cockroach/pkg/util/timeutil"
22+ "github.com/stretchr/testify/require"
2023)
2124
2225func registerClearRange (r registry.Registry ) {
@@ -82,32 +85,18 @@ func runClearRange(ctx context.Context, t test.Test, c cluster.Cluster, aggressi
8285
8386 t .Status ()
8487
85- // Set up a convenience function that we can call to learn the number of
86- // ranges for the bigbank.bank table (even after it's been dropped).
87- numBankRanges := func () func () int {
88- conn := c .Conn (ctx , t .L (), 1 )
89- defer conn .Close ()
90-
91- var startHex string
92- if err := conn .QueryRow (
93- `SELECT to_hex(raw_start_key)
94- FROM [SHOW RANGES FROM TABLE bigbank.bank WITH KEYS]
95- ORDER BY raw_start_key ASC LIMIT 1` ,
96- ).Scan (& startHex ); err != nil {
97- t .Fatal (err )
98- }
99- return func () int {
100- conn := c .Conn (ctx , t .L (), 1 )
101- defer conn .Close ()
102- var n int
103- if err := conn .QueryRow (
104- `SELECT count(*) FROM crdb_internal.ranges_no_leases WHERE substr(to_hex(start_key), 1, length($1::string)) = $1` , startHex ,
105- ).Scan (& n ); err != nil {
106- t .Fatal (err )
107- }
108- return n
109- }
110- }()
88+ bigBankSpan , err := getKeyspanForTable (ctx , t , c , 1 , "bigbank.bank" )
89+ require .NoError (t , err )
90+ t .L ().Printf ("bigbank DB ID: %s (%x - %x)" , bigBankSpan , bigBankSpan .Key , bigBankSpan .EndKey )
91+ getBigBankStats := func () spanStats {
92+ stats := getSpanStats (ctx , t , c , 1 , bigBankSpan )
93+ t .L ().Printf ("bigbank: %d ranges, %s disk, %s live, %s total" ,
94+ stats .rangeCount ,
95+ humanizeutil .IBytes (stats .approximateDiskBytes ),
96+ humanizeutil .IBytes (stats .liveBytes ),
97+ humanizeutil .IBytes (stats .totalBytes ))
98+ return stats
99+ }
111100
112101 m .Go (func (ctx context.Context ) error {
113102 c .Run (ctx , option .WithNodes (c .Node (1 )), `./cockroach workload init kv {pgurl:1}` )
@@ -127,6 +116,10 @@ ORDER BY raw_start_key ASC LIMIT 1`,
127116 return err
128117 }
129118
119+ // Collect the stats before dropping the table. getBigBanksStats will
120+ // print them out.
121+ _ = getBigBankStats ()
122+
130123 t .WorkerStatus ("dropping table" )
131124 defer t .WorkerStatus ()
132125
@@ -136,26 +129,39 @@ ORDER BY raw_start_key ASC LIMIT 1`,
136129 return err
137130 }
138131
139- t .WorkerStatus ("computing number of ranges " )
140- initialBankRanges := numBankRanges ()
132+ t .WorkerStatus ("computing span stats " )
133+ preDropBankStats := getBigBankStats ()
141134
142135 t .WorkerStatus ("dropping bank table" )
143136 if _ , err := conn .ExecContext (ctx , `DROP TABLE bigbank.bank` ); err != nil {
144137 return err
145138 }
146139
147- // Spend some time reading data with a timeout to make sure the
148- // DROP above didn't brick the cluster. At the time of writing,
149- // clearing all of the table data takes ~6min, so we want to run
150- // for at least a multiple of that duration.
151- const minDuration = 45 * time .Minute
152- deadline := timeutil .Now ().Add (minDuration )
153- curBankRanges := numBankRanges ()
154- t .WorkerStatus ("waiting for ~" , curBankRanges , " merges to complete (and for at least " , minDuration , " to pass)" )
155- for timeutil .Now ().Before (deadline ) || curBankRanges > 1 {
156- after := time .After (5 * time .Minute )
157- curBankRanges = numBankRanges () // this call takes minutes, unfortunately
158- t .WorkerProgress (1 - float64 (curBankRanges )/ float64 (initialBankRanges ))
140+ curBankStats := getBigBankStats ()
141+ progressFn := func () float64 {
142+ // Compute progress as a float [0, 1.0].
143+ //
144+ // We compute the progress in terms of the number of ranges and the
145+ // amount of disk space, relative to the stats we computed
146+ // immediately after dropping the table. That is:
147+ //
148+ // 1 - (current / initial)
149+ //
150+ // The range count progress subtracts 1 from each count to account
151+ // for the expectation that one range will always remain.
152+ //
153+ // We compute the overall progress as the minimum of the two metrics'
154+ // progress.
155+ mergeProgress := 1 - (float64 (curBankStats .rangeCount - 1 ) /
156+ float64 (preDropBankStats .rangeCount - 1 ))
157+ diskProgress := 1 - (float64 (curBankStats .approximateDiskBytes ) /
158+ float64 (preDropBankStats .approximateDiskBytes ))
159+ return max (0 , min (mergeProgress , diskProgress ))
160+ }
161+ // Terminate when progress is 0.975 or greater. That is, we've reclaimed
162+ // 97.5% of the disk space and merged 97.5% of the ranges.
163+ for progress := progressFn (); progress < 0.975 ; progress = progressFn () {
164+ t .WorkerProgress (progress )
159165
160166 var count int
161167 // NB: context cancellation in QueryRowContext does not work as expected.
@@ -168,16 +174,65 @@ ORDER BY raw_start_key ASC LIMIT 1`,
168174 return err
169175 }
170176
171- t .WorkerStatus ("waiting for ~" , curBankRanges , " merges to complete (and for at least " , timeutil .Until (deadline ), " to pass)" )
177+ t .WorkerStatus ("progress " , progress , " (" , curBankStats .rangeCount , " ranges, " ,
178+ humanizeutil .IBytes (curBankStats .approximateDiskBytes ), " disk usage)" )
172179 select {
173- case <- after :
180+ case <- time . After ( time . Minute ) :
174181 case <- ctx .Done ():
175182 return ctx .Err ()
176183 }
184+ curBankStats = getBigBankStats ()
177185 }
178- // TODO(tschottdorf): verify that disk space usage drops below to <some small amount>, but that
179- // may not actually happen (see https://github.com/cockroachdb/cockroach/issues/29290).
186+ t .WorkerStatus ("reclamation condition met" )
180187 return nil
181188 })
182189 m .Wait ()
183190}
191+
192+ func getKeyspanForTable (
193+ ctx context.Context , t test.Test , c cluster.Cluster , n int , tbl string ,
194+ ) (roachpb.Span , error ) {
195+ conn := c .Conn (ctx , t .L (), n )
196+ defer conn .Close ()
197+ var startKey , endKey roachpb.Key
198+ err := conn .QueryRow (`SELECT ` +
199+ `crdb_internal.table_span($1::regclass::oid::int)[1] AS start_key, ` +
200+ `crdb_internal.table_span($1::regclass::oid::int)[2] AS end_key` ,
201+ tbl ).Scan (& startKey , & endKey )
202+ return roachpb.Span {
203+ Key : startKey ,
204+ EndKey : endKey ,
205+ }, err
206+ }
207+
208+ type spanStats struct {
209+ rangeCount int
210+ approximateDiskBytes int64
211+ liveBytes int64
212+ totalBytes int64
213+ }
214+
215+ func getSpanStats (
216+ ctx context.Context , t test.Test , c cluster.Cluster , n int , span roachpb.Span ,
217+ ) spanStats {
218+ conn := c .Conn (ctx , t .L (), n )
219+ defer conn .Close ()
220+
221+ var stats spanStats
222+ err := conn .QueryRow (
223+ `SELECT ` +
224+ `(stats->'range_count')::int AS range_count, ` +
225+ `(stats->'approximate_disk_bytes')::int AS approximate_disk_bytes, ` +
226+ `(stats->'approximate_total_stats'->'live_bytes')::int AS live_bytes, ` +
227+ `(stats->'approximate_total_stats'->'key_bytes')::int + (stats->'approximate_total_stats'->'val_bytes')::int AS total_bytes ` +
228+ `FROM crdb_internal.tenant_span_stats(ARRAY(SELECT($1::bytes, $2::bytes)))` ,
229+ span .Key , span .EndKey ).
230+ Scan (
231+ & stats .rangeCount ,
232+ & stats .approximateDiskBytes ,
233+ & stats .liveBytes ,
234+ & stats .totalBytes ,
235+ )
236+ require .NoError (t , err )
237+ return stats
238+ }
0 commit comments