|
13 | 13 |
|
14 | 14 | from __future__ import annotations |
15 | 15 |
|
| 16 | +import os |
16 | 17 | import re |
17 | 18 | import time |
18 | 19 | from textwrap import dedent |
|
30 | 31 | setup_polaris_for_iceberg, |
31 | 32 | ) |
32 | 33 | from materialize.mzcompose.services.sql_server import SqlServer |
| 34 | +from materialize.workload_replay.config import cluster_replica_sizes |
33 | 35 | from materialize.workload_replay.util import ( |
34 | 36 | get_kafka_topic, |
35 | 37 | get_mysql_reference_db_table, |
|
41 | 43 |
|
42 | 44 | def run_create_objects_part_1( |
43 | 45 | c: Composition, services: set[str], workload: dict[str, Any], verbose: bool |
44 | | -) -> None: |
45 | | - """Create clusters, databases, schemas, types, connections, and prepare sources.""" |
| 46 | +) -> dict[str, str]: |
| 47 | + """Create clusters, databases, schemas, types, connections, and prepare sources. |
| 48 | +
|
| 49 | + Returns a mapping of cluster name to original SIZE, so the caller can |
| 50 | + scale clusters back down after hydration completes. |
| 51 | + """ |
46 | 52 | c.sql( |
47 | 53 | "DROP CLUSTER IF EXISTS quickstart CASCADE", |
48 | 54 | user="mz_system", |
@@ -117,12 +123,40 @@ def run_create_objects_part_1( |
117 | 123 | ) |
118 | 124 |
|
119 | 125 | print("Creating clusters") |
| 126 | + # Create clusters at a large size for faster hydration. The original |
| 127 | + # sizes are returned so the caller can scale back down afterwards. |
| 128 | + # Pick the largest valid scale=1 size that fits on this machine. |
| 129 | + num_cpus = os.cpu_count() or 1 |
| 130 | + best_workers = max( |
| 131 | + ( |
| 132 | + cfg["workers"] |
| 133 | + for cfg in cluster_replica_sizes.values() |
| 134 | + if cfg.get("scale") == 1 |
| 135 | + and isinstance(cfg.get("workers"), int) |
| 136 | + and cfg["workers"] <= num_cpus |
| 137 | + ), |
| 138 | + default=1, |
| 139 | + ) |
| 140 | + hydration_size = f"scale=1,workers={best_workers}" |
| 141 | + original_cluster_sizes: dict[str, str] = {} |
120 | 142 | for name, cluster in workload["clusters"].items(): |
121 | 143 | if cluster["managed"]: |
122 | 144 | # Need at least one replica for everything to hydrate |
123 | 145 | create_sql = cluster["create_sql"].replace( |
124 | 146 | "REPLICATION FACTOR = 0", "REPLICATION FACTOR = 1" |
125 | 147 | ) |
| 148 | + # Swap in the hydration size, remembering the original. |
| 149 | + size_match = re.search(r"SIZE\s*=\s*'([^']+)'", create_sql, re.IGNORECASE) |
| 150 | + if size_match: |
| 151 | + original_cluster_sizes[name] = size_match.group(1) |
| 152 | + create_sql = ( |
| 153 | + create_sql[: size_match.start()] |
| 154 | + + f"SIZE = '{hydration_size}'" |
| 155 | + + create_sql[size_match.end() :] |
| 156 | + ) |
| 157 | + print( |
| 158 | + f" {name}: creating at {hydration_size} (original: {size_match.group(1)})" |
| 159 | + ) |
126 | 160 | c.sql(create_sql, user="mz_system", port=6877, print_statement=verbose) |
127 | 161 | else: |
128 | 162 | raise ValueError("Handle unmanaged clusters") |
@@ -541,6 +575,8 @@ def run_create_objects_part_1( |
541 | 575 | flags=re.DOTALL | re.IGNORECASE, |
542 | 576 | ) |
543 | 577 |
|
| 578 | + return original_cluster_sizes |
| 579 | + |
544 | 580 |
|
545 | 581 | def run_create_objects_part_2( |
546 | 582 | c: Composition, services: set[str], workload: dict[str, Any], verbose: bool |
|
0 commit comments