2222from pg8000 .native import literal
2323
2424from materialize .workload_replay .util import (
25- long_tail_choice ,
2625 long_tail_float ,
2726 long_tail_int ,
2827 long_tail_text ,
@@ -41,8 +40,9 @@ def __init__(
4140 self .default = default
4241 self .chars = string .ascii_letters + string .digits
4342 self .data_shape = data_shape
44- if data_shape :
45- assert typ in ("text" , "bytea" ), f"Can't create text shape for type { typ } "
43+
44+ self ._years = list (range (2019 , 2026 ))
45+ self ._seq_counter = 0
4646
4747 self ._hot_strings = [
4848 f"{ name } _a" ,
@@ -56,6 +56,34 @@ def __init__(
5656 "NULL" ,
5757 ]
5858
59+ def _shaped_text (self , rng : random .Random ) -> str | None :
60+ """Generate text according to data_shape, or None if not applicable."""
61+ if self .data_shape == "datetime" :
62+ return self ._random_datetime (rng )
63+ elif self .data_shape == "random" :
64+ length = rng .randrange (5 , 40 )
65+ return "" .join (rng .choice (self .chars ) for _ in range (length ))
66+ elif self .data_shape == "uuid" :
67+ return str (uuid .UUID (int = rng .getrandbits (128 ), version = 4 ))
68+ elif self .data_shape == "sequential" :
69+ self ._seq_counter += 1
70+ return f"{ self .name } _{ self ._seq_counter } "
71+ elif self .data_shape is not None :
72+ raise ValueError (f"Unhandled data_shape { self .data_shape !r} " )
73+ return None
74+
75+ def _random_date (self , rng : random .Random ) -> str :
76+ """Generate a uniformly random date string."""
77+ year = rng .choice (self ._years )
78+ return f"{ year } -{ rng .randrange (1 , 13 ):02} -{ rng .randrange (1 , 29 ):02} "
79+
80+ def _random_datetime (self , rng : random .Random ) -> str :
81+ """Generate a uniformly random datetime string."""
82+ return (
83+ f"{ self ._random_date (rng )} "
84+ f"T{ rng .randrange (0 , 24 ):02} :{ rng .randrange (0 , 60 ):02} :{ rng .randrange (0 , 60 ):02} Z"
85+ )
86+
5987 def avro_type (self ) -> str | list [str ]:
6088 """Return the Avro type for this column."""
6189 result = self .typ
@@ -99,18 +127,15 @@ def kafka_value(self, rng: random.Random) -> Any:
99127 return long_tail_float (- 1_000_000_000.0 , 1_000_000_000.0 , rng = rng )
100128
101129 elif self .typ in ("text" , "bytea" ):
102- if self .data_shape == "datetime" :
103- year = long_tail_choice (
104- [2023 , 2024 , 2025 , 2022 , 2021 , 2020 , 2019 ], hot_prob = 0.9 , rng = rng
105- )
106- return literal (
107- f"{ year } -{ rng .randrange (1 , 13 ):02} -{ rng .randrange (1 , 29 ):02} T{ rng .randrange (0 , 23 ):02} :{ rng .randrange (0 , 59 ):02} :{ rng .randrange (0 , 59 ):02} Z"
108- )
109- elif self .data_shape :
110- raise ValueError (f"Unhandled text shape { self .data_shape } " )
130+ shaped = self ._shaped_text (rng )
131+ if shaped is not None :
132+ return literal (shaped )
111133 return literal (long_tail_text (self .chars , 100 , self ._hot_strings , rng = rng ))
112134
113135 elif self .typ in ("character" , "character varying" ):
136+ shaped = self ._shaped_text (rng )
137+ if shaped is not None :
138+ return literal (shaped )
114139 return literal (long_tail_text (self .chars , 10 , self ._hot_strings , rng = rng ))
115140
116141 elif self .typ == "uuid" :
@@ -123,23 +148,15 @@ def kafka_value(self, rng: random.Random) -> Any:
123148 return json .dumps (result )
124149
125150 elif self .typ in ("timestamp with time zone" , "timestamp without time zone" ):
126- now = 1700000000000 # doesn't need to be exact
127- if rng .random () < 0.9 :
128- return now + long_tail_int (- 86_400_000 , 86_400_000 , rng = rng )
129- else :
130- return rng .randrange (0 , 9223372036854775807 )
151+ # Epoch millis spread uniformly across 2019–2025
152+ # 2019-01-01 = 1546300800000, 2026-01-01 = 1767225600000
153+ return rng .randrange (1546300800000 , 1767225600000 )
131154
132155 elif self .typ == "mz_timestamp" :
133- year = long_tail_choice (
134- [2023 , 2024 , 2025 , 2022 , 2021 , 2020 , 2019 ], hot_prob = 0.9 , rng = rng
135- )
136- return literal (f"{ year } -{ rng .randrange (1 , 13 )} -{ rng .randrange (1 , 29 )} " )
156+ return literal (self ._random_date (rng ))
137157
138158 elif self .typ == "date" :
139- year = long_tail_choice (
140- [2023 , 2024 , 2025 , 2022 , 2021 , 2020 , 2019 ], hot_prob = 0.9 , rng = rng
141- )
142- return literal (f"{ year } -{ rng .randrange (1 , 13 )} -{ rng .randrange (1 , 29 )} " )
159+ return literal (self ._random_date (rng ))
143160
144161 elif self .typ == "time" :
145162 if rng .random () < 0.8 :
@@ -150,19 +167,22 @@ def kafka_value(self, rng: random.Random) -> Any:
150167 )
151168
152169 elif self .typ == "int2range" :
153- a = str (long_tail_int (- 32768 , 32767 , rng = rng ))
154- b = str (long_tail_int (- 32768 , 32767 , rng = rng ))
155- return literal (f"[{ a } ,{ b } )" )
170+ a = long_tail_int (- 32768 , 32767 , rng = rng )
171+ b = long_tail_int (- 32768 , 32767 , rng = rng )
172+ lo , hi = min (a , b ), max (a , b )
173+ return literal (f"[{ lo } ,{ hi } )" )
156174
157175 elif self .typ == "int4range" :
158- a = str (long_tail_int (- 2147483648 , 2147483647 , rng = rng ))
159- b = str (long_tail_int (- 2147483648 , 2147483647 , rng = rng ))
160- return literal (f"[{ a } ,{ b } )" )
176+ a = long_tail_int (- 2147483648 , 2147483647 , rng = rng )
177+ b = long_tail_int (- 2147483648 , 2147483647 , rng = rng )
178+ lo , hi = min (a , b ), max (a , b )
179+ return literal (f"[{ lo } ,{ hi } )" )
161180
162181 elif self .typ == "int8range" :
163- a = str (long_tail_int (- 9223372036854775808 , 9223372036854775807 , rng = rng ))
164- b = str (long_tail_int (- 9223372036854775808 , 9223372036854775807 , rng = rng ))
165- return literal (f"[{ a } ,{ b } )" )
182+ a = long_tail_int (- 9223372036854775808 , 9223372036854775807 , rng = rng )
183+ b = long_tail_int (- 9223372036854775808 , 9223372036854775807 , rng = rng )
184+ lo , hi = min (a , b ), max (a , b )
185+ return literal (f"[{ lo } ,{ hi } )" )
166186
167187 elif self .typ == "map" :
168188 return {
@@ -220,23 +240,16 @@ def value(self, rng: random.Random, in_query: bool = True) -> Any:
220240 return str (val ) if in_query else val
221241
222242 elif self .typ in ("text" , "bytea" ):
223- if self .data_shape == "datetime" :
224- year = long_tail_choice (
225- [2023 , 2024 , 2025 , 2022 , 2021 , 2020 , 2019 ], hot_prob = 0.9 , rng = rng
226- )
227- s = (
228- f"{ year } -{ rng .randrange (1 , 13 ):02} -{ rng .randrange (1 , 29 ):02} "
229- f"T{ rng .randrange (0 , 23 ):02} :{ rng .randrange (0 , 59 ):02} :{ rng .randrange (0 , 59 ):02} Z"
230- )
231- return literal (s ) if in_query else s
232-
233- elif self .data_shape :
234- raise ValueError (f"Unhandled text shape { self .data_shape } " )
235-
243+ shaped = self ._shaped_text (rng )
244+ if shaped is not None :
245+ return literal (shaped ) if in_query else shaped
236246 s = long_tail_text (self .chars , 100 , self ._hot_strings , rng = rng )
237247 return literal (s ) if in_query else s
238248
239249 elif self .typ in ("character" , "character varying" ):
250+ shaped = self ._shaped_text (rng )
251+ if shaped is not None :
252+ return literal (shaped ) if in_query else shaped
240253 s = long_tail_text (self .chars , 10 , self ._hot_strings , rng = rng )
241254 return literal (s ) if in_query else s
242255
@@ -254,24 +267,15 @@ def value(self, rng: random.Random, in_query: bool = True) -> Any:
254267 return json .dumps (obj )
255268
256269 elif self .typ in ("timestamp with time zone" , "timestamp without time zone" ):
257- year = long_tail_choice (
258- [2023 , 2024 , 2025 , 2022 , 2021 , 2020 , 2019 ], hot_prob = 0.9 , rng = rng
259- )
260- s = f"{ year } -{ rng .randrange (1 , 13 )} -{ rng .randrange (1 , 29 )} "
270+ s = self ._random_date (rng )
261271 return literal (s ) if in_query else s
262272
263273 elif self .typ == "mz_timestamp" :
264- year = long_tail_choice (
265- [2023 , 2024 , 2025 , 2022 , 2021 , 2020 , 2019 ], hot_prob = 0.9 , rng = rng
266- )
267- s = f"{ year } -{ rng .randrange (1 , 13 )} -{ rng .randrange (1 , 29 )} "
274+ s = self ._random_date (rng )
268275 return literal (s ) if in_query else s
269276
270277 elif self .typ == "date" :
271- year = long_tail_choice (
272- [2023 , 2024 , 2025 , 2022 , 2021 , 2020 , 2019 ], hot_prob = 0.9 , rng = rng
273- )
274- s = f"{ year } -{ rng .randrange (1 , 13 )} -{ rng .randrange (1 , 29 )} "
278+ s = self ._random_date (rng )
275279 return literal (s ) if in_query else s
276280
277281 elif self .typ == "time" :
@@ -288,21 +292,24 @@ def value(self, rng: random.Random, in_query: bool = True) -> Any:
288292 return literal (s ) if in_query else s
289293
290294 elif self .typ == "int2range" :
291- a = str (long_tail_int (- 32768 , 32767 , rng = rng ))
292- b = str (long_tail_int (- 32768 , 32767 , rng = rng ))
293- s = f"[{ a } ,{ b } )"
295+ a = long_tail_int (- 32768 , 32767 , rng = rng )
296+ b = long_tail_int (- 32768 , 32767 , rng = rng )
297+ lo , hi = min (a , b ), max (a , b )
298+ s = f"[{ lo } ,{ hi } )"
294299 return literal (s ) if in_query else s
295300
296301 elif self .typ == "int4range" :
297- a = str (long_tail_int (- 2147483648 , 2147483647 , rng = rng ))
298- b = str (long_tail_int (- 2147483648 , 2147483647 , rng = rng ))
299- s = f"[{ a } ,{ b } )"
302+ a = long_tail_int (- 2147483648 , 2147483647 , rng = rng )
303+ b = long_tail_int (- 2147483648 , 2147483647 , rng = rng )
304+ lo , hi = min (a , b ), max (a , b )
305+ s = f"[{ lo } ,{ hi } )"
300306 return literal (s ) if in_query else s
301307
302308 elif self .typ == "int8range" :
303- a = str (long_tail_int (- 9223372036854775808 , 9223372036854775807 , rng = rng ))
304- b = str (long_tail_int (- 9223372036854775808 , 9223372036854775807 , rng = rng ))
305- s = f"[{ a } ,{ b } )"
309+ a = long_tail_int (- 9223372036854775808 , 9223372036854775807 , rng = rng )
310+ b = long_tail_int (- 9223372036854775808 , 9223372036854775807 , rng = rng )
311+ lo , hi = min (a , b ), max (a , b )
312+ s = f"[{ lo } ,{ hi } )"
306313 return literal (s ) if in_query else s
307314
308315 elif self .typ == "map" :
0 commit comments