Skip to content

Commit 372b9ab

Browse files
committed
roachprod: add GCE hyperdisk-balanced support and misc improvements
Add support for GCE hyperdisk-balanced disk type with new flags: - --gce-pd-volume-provisioned-iops: Required for hyperdisk-balanced, optional for pd-extreme - --gce-pd-volume-provisioned-throughput: Required for hyperdisk-balanced Additional improvements: - Move google_compute_engine to front of SSH key list to avoid passphrase prompts when connecting to GCE VMs (the key is typically not passphrase-protected) - Increase AWS gp3 max IOPS validation from 16000 to 80000 - Add AWS m8g/m8gd ARM instance type detection Release note: None Epic: None
1 parent 54d1211 commit 372b9ab

File tree

3 files changed

+36
-20
lines changed

3 files changed

+36
-20
lines changed

pkg/roachprod/config/config.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,16 @@ var localClusterRegex = regexp.MustCompile(`^local(|-[a-zA-Z0-9\-]+)$`)
190190
// establishing a remote session.
191191
// See https://github.com/openssh/openssh-portable/blob/86bdd385/ssh_config.5#L1123-L1130
192192
var DefaultPubKeyNames = []string{
193+
// google_compute_engine is listed first because it's typically not
194+
// passphrase-protected, avoiding passphrase prompts when connecting to GCE
195+
// VMs if the ssh-agent is slow to respond.
196+
"google_compute_engine",
193197
"id_rsa",
194198
"id_ecdsa",
195199
"id_ecdsa_sk",
196200
"id_ed25519",
197201
"id_ed25519_sk",
198202
"id_dsa",
199-
"google_compute_engine",
200203
}
201204

202205
// SSHPublicKeyPath returns the path to the default public key expected by

pkg/roachprod/vm/aws/aws.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,8 @@ func (d *ebsDisk) Set(s string) error {
166166
case "gp2":
167167
// Nothing -- size checked above.
168168
case "gp3":
169-
if d.IOPs > 16000 {
170-
return errors.AssertionFailedf("Iops required for gp3 disk: [3000, 16000]")
169+
if d.IOPs > 80000 {
170+
return errors.AssertionFailedf("Iops required for gp3 disk: [3000, 80000]")
171171
}
172172
if d.IOPs == 0 {
173173
// 3000 is a base IOPs for gp3.
@@ -1378,7 +1378,8 @@ func (p *Provider) runInstance(
13781378
}
13791379
imageID := withFlagOverride(az.Region.AMI_X86_64, &providerOpts.ImageAMI)
13801380
useArmAMI := strings.Index(machineType, "6g.") == 1 || strings.Index(machineType, "6gd.") == 1 ||
1381-
strings.Index(machineType, "7g.") == 1 || strings.Index(machineType, "7gd.") == 1
1381+
strings.Index(machineType, "7g.") == 1 || strings.Index(machineType, "7gd.") == 1 ||
1382+
strings.Index(machineType, "8g.") == 1 || strings.Index(machineType, "8gd.") == 1
13821383
if useArmAMI && (opts.Arch != "" && opts.Arch != string(vm.ArchARM64)) {
13831384
return nil, errors.Errorf("machine type %s is arm64, but requested arch is %s", machineType, opts.Arch)
13841385
}

pkg/roachprod/vm/gce/gcloud.go

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -347,16 +347,18 @@ type ProviderOpts struct {
347347
// projects represent the GCE projects to operate on. Accessed through
348348
// GetProject() or GetProjects() depending on whether the command accepts
349349
// multiple projects or a single one.
350-
MachineType string
351-
MinCPUPlatform string
352-
BootDiskType string
353-
Zones []string
354-
Image string
355-
SSDCount int
356-
PDVolumeType string
357-
PDVolumeSize int
358-
PDVolumeCount int
359-
UseMultipleDisks bool
350+
MachineType string
351+
MinCPUPlatform string
352+
BootDiskType string
353+
Zones []string
354+
Image string
355+
SSDCount int
356+
PDVolumeType string
357+
PDVolumeSize int
358+
PDVolumeCount int
359+
PDVolumeProvisionedIOPS int
360+
PDVolumeProvisionedThroughput int
361+
UseMultipleDisks bool
360362
// use spot instances (i.e., latest version of preemptibles which can run > 24 hours)
361363
UseSpot bool
362364
// Use an instance template and a managed instance group to create VMs. This
@@ -740,12 +742,12 @@ func (p *Provider) CreateVolume(
740742
}
741743

742744
switch vco.Type {
743-
case "local-ssd", "pd-balanced", "pd-extreme", "pd-ssd", "pd-standard":
745+
case "local-ssd", "pd-balanced", "pd-extreme", "pd-ssd", "pd-standard", "hyperdisk-balanced":
744746
args = append(args, "--type", vco.Type)
745747
case "":
746748
// use the default
747749
default:
748-
return vol, errors.Newf("Expected type to be one of local-ssd, pd-balanced, pd-extreme, pd-ssd, pd-standard got %s\n", vco.Type)
750+
return vol, errors.Newf("Expected type to be one of local-ssd, pd-balanced, pd-extreme, pd-ssd, pd-standard, hyperdisk-balanced got %s\n", vco.Type)
749751
}
750752

751753
var commandResponse []describeVolumeCommandResponse
@@ -1140,11 +1142,16 @@ func (o *ProviderOpts) ConfigureCreateFlags(flags *pflag.FlagSet) {
11401142
flags.IntVar(&o.SSDCount, ProviderName+"-local-ssd-count", 1,
11411143
"Number of local SSDs to create, only used if local-ssd=true")
11421144
flags.StringVar(&o.PDVolumeType, ProviderName+"-pd-volume-type", "pd-ssd",
1143-
"Type of the persistent disk volume, only used if local-ssd=false")
1145+
"Type of the persistent disk volume, only used if local-ssd=false "+
1146+
"(pd-ssd, pd-balanced, pd-extreme, pd-standard, hyperdisk-balanced)")
11441147
flags.IntVar(&o.PDVolumeSize, ProviderName+"-pd-volume-size", 500,
11451148
"Size in GB of persistent disk volume, only used if local-ssd=false")
11461149
flags.IntVar(&o.PDVolumeCount, ProviderName+"-pd-volume-count", 1,
11471150
"Number of persistent disk volumes, only used if local-ssd=false")
1151+
flags.IntVar(&o.PDVolumeProvisionedIOPS, ProviderName+"-pd-volume-provisioned-iops", 0,
1152+
"Provisioned IOPS for the disk volume (required for hyperdisk-balanced, optional for pd-extreme)")
1153+
flags.IntVar(&o.PDVolumeProvisionedThroughput, ProviderName+"-pd-volume-provisioned-throughput", 0,
1154+
"Provisioned throughput in MiB/s for the disk volume (required for hyperdisk-balanced)")
11481155
flags.BoolVar(&o.UseMultipleDisks, ProviderName+"-enable-multiple-stores",
11491156
false, "Enable the use of multiple stores by creating one store directory per disk. "+
11501157
"Default is to raid0 stripe all disks.")
@@ -1503,9 +1510,14 @@ func (p *Provider) computeInstanceArgs(
15031510
fmt.Sprintf("size=%dGB", providerOpts.PDVolumeSize),
15041511
"auto-delete=yes",
15051512
}
1506-
// TODO(pavelkalinnikov): support disk types with "provisioned-throughput"
1507-
// option, such as Hyperdisk Throughput:
1508-
// https://cloud.google.com/compute/docs/disks/add-hyperdisk#hyperdisk-throughput.
1513+
// Add provisioned IOPS if specified (required for hyperdisk-balanced, optional for pd-extreme).
1514+
if providerOpts.PDVolumeProvisionedIOPS > 0 {
1515+
pdProps = append(pdProps, fmt.Sprintf("provisioned-iops=%d", providerOpts.PDVolumeProvisionedIOPS))
1516+
}
1517+
// Add provisioned throughput if specified (required for hyperdisk-balanced).
1518+
if providerOpts.PDVolumeProvisionedThroughput > 0 {
1519+
pdProps = append(pdProps, fmt.Sprintf("provisioned-throughput=%d", providerOpts.PDVolumeProvisionedThroughput))
1520+
}
15091521
args = append(args, "--create-disk", strings.Join(pdProps, ","))
15101522
}
15111523
// Enable DISCARD commands for persistent disks, as is advised in:

0 commit comments

Comments
 (0)