diff --git a/benchmarks/ahrefs-devkit/dune b/benchmarks/ahrefs-devkit/dune
new file mode 100644
index 000000000..1002056ad
--- /dev/null
+++ b/benchmarks/ahrefs-devkit/dune
@@ -0,0 +1,25 @@
+(executable
+ (name htmlStream_bench)
+ (modules htmlStream_bench)
+ (libraries devkit))
+
+(executable
+ (name stre_bench)
+ (modules stre_bench)
+ (libraries devkit))
+
+(executable
+ (name network_bench)
+ (modules network_bench)
+ (libraries devkit))
+
+(executable
+ (name gzip_bench)
+ (modules gzip_bench)
+ (libraries devkit))
+
+; Add benchmarks to buildbench alias for sandmark
+
+(alias
+ (name buildbench)
+ (deps htmlStream_bench.exe stre_bench.exe network_bench.exe gzip_bench.exe))
diff --git a/benchmarks/ahrefs-devkit/gzip_bench.ml b/benchmarks/ahrefs-devkit/gzip_bench.ml
new file mode 100644
index 000000000..9013df7c9
--- /dev/null
+++ b/benchmarks/ahrefs-devkit/gzip_bench.ml
@@ -0,0 +1,355 @@
+(** Gzip GC Benchmark Suite
+
+ This suite stresses the OCaml garbage collector through intensive
+ compression and decompression operations. The benchmarks test:
+ - Buffer-based I/O with continuous allocations
+ - Zlib compression/decompression creating temporary buffers
+ - Streaming patterns with state management
+ - CRC calculations and header processing
+ - Variable-sized buffer operations *)
+
+open Devkit
+open ExtLib
+
+(* Helper functions for string compression/decompression *)
+let compress_string ?level str =
+ let _ = level in
+ let oc = Gzip_io.output (IO.output_string ()) in
+ IO.nwrite_string oc str;
+ IO.close_out oc
+
+let uncompress_string str =
+ let ic = Gzip_io.input (IO.input_string str) in
+ let result = IO.read_all ic in
+ IO.close_in ic;
+ result
+
+(* Helper function to generate test data *)
+let generate_test_data size pattern =
+ let data = Bytes.create size in
+ for i = 0 to size - 1 do
+ Bytes.set data i (char_of_int (i * pattern mod 256))
+ done;
+ Bytes.to_string data
+
+(* Benchmark 1: Small Buffer Compression Storm *)
+let bench_small_buffer_storm () =
+ let compressed_data = ref [] in
+
+ for i = 1 to 5000 do
+ let size = 100 + (i mod 900) in
+ let data = generate_test_data size (i mod 256) in
+
+ let compressed = compress_string data in
+ let decompressed = uncompress_string compressed in
+
+ let compressed_fast = compress_string ~level:1 data in
+ let compressed_best = compress_string ~level:9 data in
+
+ if i mod 100 = 0 then
+ compressed_data :=
+ compressed :: compressed_fast :: compressed_best :: !compressed_data;
+
+ let _ = String.length compressed in
+ let _ = String.length decompressed in
+
+ if List.length !compressed_data > 300 then
+ compressed_data := ExtList.List.take 150 !compressed_data
+ done
+
+(* Benchmark 2: Large Block Compression *)
+let bench_large_block_compression () =
+ let retained_blocks = ref [] in
+
+ for i = 1 to 100 do
+ let size = 10000 * (1 + (i mod 10)) in
+ let data = generate_test_data size i in
+
+ let compressed_default = compress_string data in
+ let compressed_filtered = compress_string ~level:5 data in
+
+ let decompressed1 = uncompress_string compressed_default in
+ let decompressed2 = uncompress_string compressed_filtered in
+
+ assert (decompressed1 = data);
+ assert (decompressed2 = data);
+
+ let chunk_size = 1024 in
+ let chunks = ref [] in
+ let pos = ref 0 in
+ while !pos < String.length data do
+ let len = min chunk_size (String.length data - !pos) in
+ let chunk = String.sub data !pos len in
+ let compressed_chunk = compress_string chunk in
+ chunks := compressed_chunk :: !chunks;
+ pos := !pos + len
+ done;
+
+ if i mod 10 = 0 then
+ retained_blocks :=
+ compressed_default :: compressed_filtered :: !retained_blocks;
+
+ if List.length !retained_blocks > 20 then
+ retained_blocks := ExtList.List.take 10 !retained_blocks
+ done
+
+(* Benchmark 3: Streaming Compression/Decompression *)
+let bench_streaming_operations () =
+ let stream_buffers = ref [] in
+
+ for i = 1 to 500 do
+ let data_size = 5000 + (i * 100) in
+ let source_data = generate_test_data data_size i in
+
+ let out_channel = Gzip_io.output (IO.output_string ()) in
+
+ let chunk_size = 256 in
+ let pos = ref 0 in
+ while !pos < String.length source_data do
+ let len = min chunk_size (String.length source_data - !pos) in
+ let chunk = String.sub source_data !pos len in
+ IO.nwrite_string out_channel chunk;
+ pos := !pos + len
+ done;
+
+ let compressed = IO.close_out out_channel in
+
+ let in_channel = Gzip_io.input (IO.input_string compressed) in
+ let decompressed_buf = Buffer.create data_size in
+
+ let read_buf = Bytes.create 128 in
+ let rec read_loop () =
+ try
+ let n = IO.input in_channel read_buf 0 128 in
+ if n > 0 then (
+ Buffer.add_subbytes decompressed_buf read_buf 0 n;
+ read_loop ())
+ with IO.No_more_input -> ()
+ in
+ read_loop ();
+ IO.close_in in_channel;
+
+ let decompressed = Buffer.contents decompressed_buf in
+
+ assert (decompressed = source_data);
+
+ if i mod 50 = 0 then
+ stream_buffers := compressed :: decompressed :: !stream_buffers;
+
+ if List.length !stream_buffers > 100 then
+ stream_buffers := ExtList.List.take 50 !stream_buffers
+ done
+
+(* Benchmark 4: Mixed Size Compression Patterns *)
+let bench_mixed_size_patterns () =
+ let mixed_cache = Hashtbl.create 1000 in
+
+ for i = 1 to 1000 do
+ let sizes = [| 64; 128; 256; 512; 1024; 2048; 4096; 8192; 128; 64 |] in
+ let size = sizes.(i mod Array.length sizes) in
+ let data = generate_test_data size (i * 7) in
+
+ let levels = [ 1; 3; 5; 7; 9 ] in
+ let compressed_versions =
+ List.map (fun level -> compress_string ~level data) levels
+ in
+
+ let decompressed_versions =
+ List.map uncompress_string compressed_versions
+ in
+
+ List.iter (fun d -> assert (d = data)) decompressed_versions;
+
+ let mixed = String.concat "" compressed_versions in
+ let mixed_compressed = compress_string mixed in
+
+ if i mod 13 = 0 || i mod 17 = 0 then
+ Hashtbl.replace mixed_cache i mixed_compressed;
+
+ let chain =
+ List.fold_left
+ (fun acc comp -> compress_string (acc ^ comp))
+ ""
+ (ExtList.List.take 3 compressed_versions)
+ in
+
+ if i mod 23 = 0 then Hashtbl.replace mixed_cache (i + 10000) chain;
+
+ if i mod 100 = 0 then
+ Hashtbl.iter
+ (fun k _ -> if k < i - 200 then Hashtbl.remove mixed_cache k)
+ mixed_cache
+ done
+
+(* Benchmark 5: Concurrent-style Compression *)
+let bench_concurrent_style () =
+ let active_streams = Array.init 10 (fun _ -> ref []) in
+ let completed = ref [] in
+
+ for i = 1 to 2000 do
+ let stream_id = i mod Array.length active_streams in
+ let stream = active_streams.(stream_id) in
+
+ let data = generate_test_data (500 + (stream_id * 100)) i in
+
+ let compressed = compress_string data in
+ stream := compressed :: !stream;
+
+ if i mod 50 = 0 then
+ Array.iteri
+ (fun _idx s ->
+ if List.length !s > 5 then (
+ let combined = String.concat "" (List.rev !s) in
+ let recompressed = compress_string combined in
+
+ let _ = uncompress_string recompressed in
+
+ completed := recompressed :: !completed;
+ s := []))
+ active_streams;
+
+ if List.length !completed > 100 then
+ completed := ExtList.List.take 50 !completed
+ done
+
+(* Benchmark 6: Compression with Headers and Metadata *)
+let bench_headers_metadata () =
+ let metadata_cache = ref [] in
+
+ for i = 1 to 2000 do
+ let data =
+ Printf.sprintf "File_%d_Content_%s" i
+ (String.make (100 + (i mod 400)) (char_of_int (65 + (i mod 26))))
+ in
+
+ let compressed = compress_string data in
+
+ let header_size = min 10 (String.length compressed) in
+ let header = String.sub compressed 0 header_size in
+
+ let checksum = ref 0 in
+ String.iter
+ (fun c -> checksum := ((!checksum * 31) + Char.code c) mod 65536)
+ data;
+
+ let metadata =
+ (header, !checksum, String.length data, String.length compressed)
+ in
+
+ let decompressed = uncompress_string compressed in
+ assert (decompressed = data);
+
+ let recompressed = compress_string ~level:(1 + (i mod 9)) decompressed in
+
+ let ratio =
+ float_of_int (String.length compressed)
+ /. float_of_int (String.length data)
+ in
+
+ if i mod 50 = 0 then
+ metadata_cache := (metadata, ratio, recompressed) :: !metadata_cache;
+
+ if List.length !metadata_cache > 100 then
+ metadata_cache := ExtList.List.take 50 !metadata_cache
+ done
+
+(* Benchmark 7: Buffer Reuse and Recycling *)
+let bench_buffer_recycling () =
+ let buffer_pool = Array.init 20 (fun _ -> Buffer.create 1024) in
+ let compressed_pool = ref [] in
+ let generation_counter = ref 0 in
+
+ for i = 1 to 1500 do
+ let buffer_idx = i mod Array.length buffer_pool in
+ let buffer = buffer_pool.(buffer_idx) in
+
+ Buffer.clear buffer;
+
+ for j = 1 to 100 + (i mod 200) do
+ Buffer.add_string buffer (Printf.sprintf "Line_%d_%d " i j)
+ done;
+
+ let data = Buffer.contents buffer in
+
+ let compressed = compress_string data in
+
+ let decode_buffer =
+ buffer_pool.((buffer_idx + 1) mod Array.length buffer_pool)
+ in
+ Buffer.clear decode_buffer;
+ let decompressed = uncompress_string compressed in
+ Buffer.add_string decode_buffer decompressed;
+
+ incr generation_counter;
+
+ if !generation_counter mod 30 = 0 then (
+ compressed_pool := compressed :: !compressed_pool;
+
+ if List.length !compressed_pool > 10 then
+ let old_data = List.hd !compressed_pool in
+ let mixed = old_data ^ compressed in
+ let mixed_compressed = compress_string mixed in
+ compressed_pool := mixed_compressed :: List.tl !compressed_pool);
+
+ if !generation_counter mod 100 = 0 && List.length !compressed_pool > 50 then
+ compressed_pool := ExtList.List.take 25 !compressed_pool
+ done
+
+(* Benchmark 8: Complex Compression Pipelines *)
+let bench_compression_pipelines () =
+ let pipeline_stages = Hashtbl.create 500 in
+ let final_results = ref [] in
+
+ for i = 1 to 1000 do
+ let base_data = generate_test_data (1000 + (i * 10)) i in
+ let stage1 = compress_string base_data in
+
+ let decompressed = uncompress_string stage1 in
+ let modified = decompressed ^ Printf.sprintf "_modified_%d" i in
+ let stage2 = compress_string ~level:5 modified in
+
+ let stage3 =
+ match Hashtbl.find_opt pipeline_stages (i - 10) with
+ | Some (prev1, prev2, _) ->
+ let combined = stage1 ^ prev1 ^ stage2 ^ prev2 in
+ compress_string ~level:3 combined
+ | None -> compress_string (stage1 ^ stage2)
+ in
+
+ Hashtbl.replace pipeline_stages i (stage1, stage2, stage3);
+
+ let multi_compressed =
+ let temp1 = compress_string ~level:1 base_data in
+ let temp2 = compress_string ~level:5 temp1 in
+ compress_string ~level:9 temp2
+ in
+
+ let multi_decompressed =
+ let temp1 = uncompress_string multi_compressed in
+ let temp2 = uncompress_string temp1 in
+ uncompress_string temp2
+ in
+
+ assert (multi_decompressed = base_data);
+
+ if i mod 50 = 0 then
+ final_results := (stage3, multi_compressed) :: !final_results;
+
+ if i mod 100 = 0 then (
+ Hashtbl.iter
+ (fun k _ -> if k < i - 50 then Hashtbl.remove pipeline_stages k)
+ pipeline_stages;
+ if List.length !final_results > 40 then
+ final_results := ExtList.List.take 20 !final_results)
+ done
+
+(* Main benchmark suite runner *)
+let () =
+ bench_small_buffer_storm ();
+ bench_large_block_compression ();
+ bench_streaming_operations ();
+ bench_mixed_size_patterns ();
+ bench_concurrent_style ();
+ bench_headers_metadata ();
+ bench_buffer_recycling ();
+ bench_compression_pipelines ()
diff --git a/benchmarks/ahrefs-devkit/htmlStream_bench.ml b/benchmarks/ahrefs-devkit/htmlStream_bench.ml
new file mode 100644
index 000000000..c60c321de
--- /dev/null
+++ b/benchmarks/ahrefs-devkit/htmlStream_bench.ml
@@ -0,0 +1,315 @@
+(** HtmlStream GC Benchmark Suite
+
+ This suite is designed to stress the OCaml garbage collector with various
+ allocation patterns and heap shape changes over time. Each benchmark targets
+ different GC behaviors:
+ - Minor collection pressure (ephemeral allocations)
+ - Major collection pressure (long-lived data)
+ - Heap fragmentation
+ - Generational hypothesis violations
+ - Large object handling *)
+
+open Devkit
+
+(* Benchmark 1: Small String Pressure (Minor GC stress) *)
+let bench_small_strings () =
+ let collected_texts = ref [] in
+
+ for _ = 1 to 10 do
+ let buf = Buffer.create (1024 * 1024) in
+ for i = 1 to 10000 do
+ Buffer.add_string buf "
";
+ Buffer.add_string buf (string_of_int i);
+ Buffer.add_string buf " small text ";
+ Buffer.add_string buf (String.make 10 (char_of_int (65 + (i mod 26))));
+ Buffer.add_string buf "
"
+ done;
+ let html = Buffer.contents buf in
+
+ let ctx = HtmlStream.init () in
+ HtmlStream.parse ~ctx
+ (function
+ | HtmlStream.Text t ->
+ if Random.int 100 < 10 then
+ collected_texts := HtmlStream.Raw.project t :: !collected_texts
+ | _ -> ())
+ html
+ done
+
+(* Benchmark 2: Attribute List Pressure *)
+let bench_attribute_lists () =
+ let total_attrs = ref 0 in
+
+ for _ = 1 to 10 do
+ let buf = Buffer.create (1024 * 1024 * 2) in
+ for i = 1 to 5000 do
+ Buffer.add_string buf "";
+ Buffer.add_string buf (string_of_int i);
+ Buffer.add_string buf "
"
+ done;
+ let html = Buffer.contents buf in
+
+ let ctx = HtmlStream.init () in
+ HtmlStream.parse ~ctx
+ (function
+ | HtmlStream.Tag (_, attrs) ->
+ total_attrs := !total_attrs + List.length attrs
+ | _ -> ())
+ html
+ done
+
+(* Benchmark 3: Large Block Allocations *)
+let bench_large_blocks () =
+ let retained_blocks = ref [] in
+
+ for _ = 1 to 10 do
+ let buf = Buffer.create (1024 * 1024 * 5) in
+ for i = 1 to 1000 do
+ let size = 1024 * (1 + (i mod 100)) in
+ Buffer.add_string buf "";
+
+ if i mod 2 = 0 then (
+ Buffer.add_string buf "")
+ done;
+ let html = Buffer.contents buf in
+
+ let ctx = HtmlStream.init () in
+ HtmlStream.parse ~ctx
+ (function
+ | HtmlStream.Script (_, s) | HtmlStream.Style (_, s) ->
+ if Random.int 100 < 20 then retained_blocks := s :: !retained_blocks
+ | _ -> ())
+ html
+ done
+
+(* Benchmark 4: Heap Shape Morphing *)
+let bench_morphing_heap () =
+ let phase_data = ref [] in
+
+ for _ = 1 to 10 do
+ let buf = Buffer.create (1024 * 1024 * 3) in
+ for phase = 1 to 100 do
+ if phase mod 3 = 0 then
+ for _ = 1 to 100 do
+ Buffer.add_string buf "small"
+ done;
+
+ if phase mod 3 = 1 then (
+ for depth = 1 to 20 do
+ Printf.bprintf buf "" depth
+ (phase * depth)
+ done;
+ Buffer.add_string buf "nested content";
+ for _ = 1 to 20 do
+ Buffer.add_string buf "
"
+ done);
+
+ if phase mod 3 = 2 then (
+ Buffer.add_string buf "")
+ done;
+ let html = Buffer.contents buf in
+
+ let ctx = HtmlStream.init () in
+ HtmlStream.parse ~ctx
+ (fun elem ->
+ if Random.int 100 < 15 then phase_data := elem :: !phase_data;
+ if List.length !phase_data > 1000 then phase_data := List.tl !phase_data)
+ html
+ done
+
+(* Benchmark 5: Fragmentation Stress *)
+let bench_fragmentation () =
+ let retained = Hashtbl.create 1000 in
+ let counter = ref 0 in
+
+ for _ = 1 to 10 do
+ let buf = Buffer.create (1024 * 1024 * 4) in
+ let sizes = [| 10; 100; 1000; 10000; 100; 10; 5000; 50; 500 |] in
+ for i = 1 to 2000 do
+ let size = sizes.(i mod Array.length sizes) in
+
+ match i mod 4 with
+ | 0 -> Printf.bprintf buf "%s
" (String.make size 'a')
+ | 1 ->
+ Buffer.add_string buf "content
"
+ | 2 -> Printf.bprintf buf "" (String.make size 'c')
+ | _ ->
+ for _ = 1 to size / 100 do
+ Buffer.add_string buf "x"
+ done
+ done;
+ let html = Buffer.contents buf in
+
+ let ctx = HtmlStream.init () in
+ HtmlStream.parse ~ctx
+ (fun elem ->
+ incr counter;
+ if !counter mod 7 = 0 || !counter mod 11 = 0 || !counter mod 13 = 0 then
+ Hashtbl.replace retained !counter elem;
+ if !counter mod 100 = 0 then
+ Hashtbl.iter
+ (fun k _ -> if k < !counter - 500 then Hashtbl.remove retained k)
+ retained)
+ html
+ done
+
+(* Benchmark 6: Generational Hypothesis Violation *)
+let bench_generational_violation () =
+ let old_generation = ref [] in
+ let middle_generation = ref [] in
+ let young_generation = ref [] in
+ let counter = ref 0 in
+
+ for _ = 1 to 10 do
+ let buf = Buffer.create (1024 * 1024 * 2) in
+ for batch = 1 to 100 do
+ for i = 1 to 100 do
+ Printf.bprintf buf "" batch i;
+ Printf.bprintf buf "Generation %d Item %d" batch i;
+ for j = 1 to batch do
+ Printf.bprintf buf "%d" j (batch * i * j)
+ done;
+ Buffer.add_string buf "
"
+ done
+ done;
+ let html = Buffer.contents buf in
+
+ let ctx = HtmlStream.init () in
+ HtmlStream.parse ~ctx
+ (fun elem ->
+ incr counter;
+ if !counter mod 100 = 0 then (
+ old_generation := !middle_generation;
+ middle_generation := !young_generation;
+ young_generation := []);
+ young_generation := elem :: !young_generation;
+
+ if !counter mod 50 = 0 then
+ let mixed = !old_generation @ !young_generation in
+ young_generation :=
+ let rec take n = function
+ | [] -> []
+ | _ when n <= 0 -> []
+ | h :: t -> h :: take (n - 1) t
+ in
+ List.rev (take 10 (List.rev mixed)))
+ html
+ done
+
+(* Benchmark 7: Allocation Rate Variation *)
+let bench_variable_rate () =
+ let allocation_history = Array.make 1000 [] in
+ let index = ref 0 in
+
+ for _ = 1 to 10 do
+ let buf = Buffer.create (1024 * 1024 * 3) in
+ for phase = 1 to 50 do
+ let intensity =
+ int_of_float (50.0 +. (45.0 *. sin (float_of_int phase *. 0.3)))
+ in
+
+ if intensity < 30 then
+ for _ = 1 to intensity do
+ Buffer.add_string buf "low intensity
"
+ done
+ else if intensity < 70 then
+ for i = 1 to intensity * 10 do
+ Printf.bprintf buf "content %d
" i i
+ done
+ else
+ for i = 1 to intensity * 20 do
+ Buffer.add_string buf "%d" i
+ done
+ done;
+ let html = Buffer.contents buf in
+
+ let ctx = HtmlStream.init () in
+ HtmlStream.parse ~ctx
+ (fun elem ->
+ let i = !index mod Array.length allocation_history in
+ allocation_history.(i) <- elem :: allocation_history.(i);
+ incr index;
+ if !index mod 100 = 0 then
+ let clear_index =
+ (!index - 500) mod Array.length allocation_history
+ in
+ if clear_index >= 0 then allocation_history.(clear_index) <- [])
+ html
+ done
+
+(* Benchmark 8: Reference Graph Complexity *)
+let bench_complex_references () =
+ let graph = Hashtbl.create 1000 in
+ let edges = ref [] in
+ let node_counter = ref 0 in
+
+ for _ = 1 to 10 do
+ let buf = Buffer.create (1024 * 1024 * 2) in
+ for layer = 1 to 20 do
+ for node = 1 to 50 do
+ Printf.bprintf buf "" layer node;
+ for ref_layer = 1 to 5 do
+ for ref_node = 1 to 10 do
+ Printf.bprintf buf "
ref"
+ (((layer + ref_layer) mod 20) + 1)
+ (((node + ref_node) mod 50) + 1)
+ done
+ done;
+ Buffer.add_string buf "
"
+ done
+ done;
+ let html = Buffer.contents buf in
+
+ let ctx = HtmlStream.init () in
+ HtmlStream.parse ~ctx
+ (fun elem ->
+ incr node_counter;
+ let node_id = !node_counter in
+ Hashtbl.add graph node_id elem;
+
+ if node_id > 10 then
+ for _ = 1 to Random.int 5 + 1 do
+ let target = Random.int (node_id - 1) + 1 in
+ edges := (node_id, target) :: !edges
+ done;
+
+ if node_id mod 200 = 0 then (
+ edges :=
+ List.filter
+ (fun (s, t) -> s > node_id - 1000 && t > node_id - 1000)
+ !edges;
+ Hashtbl.iter
+ (fun k _ -> if k < node_id - 1000 then Hashtbl.remove graph k)
+ graph))
+ html
+ done
+
+(* Main benchmark suite runner *)
+let () =
+ bench_small_strings ();
+ bench_attribute_lists ();
+ bench_large_blocks ();
+ bench_morphing_heap ();
+ bench_fragmentation ();
+ bench_generational_violation ();
+ bench_variable_rate ();
+ bench_complex_references ()
diff --git a/benchmarks/ahrefs-devkit/network_bench.ml b/benchmarks/ahrefs-devkit/network_bench.ml
new file mode 100644
index 000000000..570fb2c6a
--- /dev/null
+++ b/benchmarks/ahrefs-devkit/network_bench.ml
@@ -0,0 +1,485 @@
+(** Network GC Benchmark Suite
+
+ This suite stresses the OCaml garbage collector through intensive network
+ parsing and manipulation operations. The benchmarks test:
+ - IPv4 address parsing (ragel-based parser creating intermediate values)
+ - CIDR subnet calculations with bitwise operations
+ - Int32 boxing/unboxing patterns
+ - String-to-structured data conversions
+ - Network address comparisons and transformations *)
+
+open Devkit
+
+(* Benchmark 1: IPv4 Address Parsing Storm *)
+let bench_ipv4_parsing_storm () =
+ let parsed_ips = ref [] in
+
+ for i = 1 to 10000 do
+ let ip_strings =
+ [
+ Printf.sprintf "%d.%d.%d.%d" (i mod 256)
+ (i * 7 mod 256)
+ (i * 13 mod 256)
+ (i * 17 mod 256);
+ Printf.sprintf "192.168.%d.%d" (i mod 256) (i * 3 mod 256);
+ Printf.sprintf "10.%d.%d.%d" (i * 5 mod 256) (i * 11 mod 256) (i mod 256);
+ Printf.sprintf "172.%d.%d.%d"
+ (16 + (i mod 16))
+ (i * 2 mod 256)
+ (i * 19 mod 256);
+ ]
+ in
+
+ List.iter
+ (fun ip_str ->
+ try
+ let ip = Network.ipv4_of_string_exn ip_str in
+ let str_back = Network.string_of_ipv4 ip in
+ let cidr_str = ip_str ^ "/" ^ string_of_int (8 + (i mod 25)) in
+ let cidr = Network.cidr_of_string_exn cidr_str in
+ let cidr_back = Network.string_of_cidr cidr in
+
+ if i mod 100 = 0 then
+ parsed_ips := (ip, cidr, str_back, cidr_back) :: !parsed_ips;
+
+ let _ = Network.ipv4_matches ip cidr in
+ ()
+ with _ -> ())
+ ip_strings;
+
+ if List.length !parsed_ips > 100 then
+ parsed_ips := ExtList.List.take 50 !parsed_ips
+ done
+
+(* Benchmark 2: CIDR Subnet Calculations *)
+let bench_cidr_calculations () =
+ let subnet_cache = Hashtbl.create 1000 in
+
+ for i = 1 to 5000 do
+ let base_ip =
+ Network.ipv4_of_int32 (Int32.of_int (0x0A000000 + (i * 256)))
+ in
+ let masks = [ 8; 16; 24; 28; 30; 32 ] in
+
+ List.iter
+ (fun mask ->
+ let cidr_str =
+ Printf.sprintf "%s/%d" (Network.string_of_ipv4 base_ip) mask
+ in
+ let cidr = Network.cidr_of_string_exn cidr_str in
+ let net_ip = Network.int32_of_ipv4 (Network.prefix_of_cidr cidr) in
+ let net_mask = mask in
+
+ let test_ips =
+ List.init 50 (fun j ->
+ let offset = Int32.of_int j in
+ let test_ip = Int32.add net_ip offset in
+ Network.ipv4_of_int32 test_ip)
+ in
+
+ let members =
+ List.filter (fun ip -> Network.ipv4_matches ip cidr) test_ips
+ in
+
+ let member_strings = List.map Network.string_of_ipv4 members in
+
+ if i mod 20 = 0 then
+ Hashtbl.replace subnet_cache (i, mask) member_strings;
+
+ let subnet_size = Int32.shift_left 1l (32 - net_mask) in
+ let broadcast = Int32.sub (Int32.add net_ip subnet_size) 1l in
+ let broadcast_ip = Network.ipv4_of_int32 broadcast in
+ let _ = Network.string_of_ipv4 broadcast_ip in
+ ())
+ masks;
+
+ if i mod 100 = 0 then
+ Hashtbl.iter
+ (fun (idx, _) _ ->
+ if idx < i - 200 then Hashtbl.remove subnet_cache (idx, 0))
+ subnet_cache
+ done
+
+(* Benchmark 3: Network Range Operations *)
+let bench_range_operations () =
+ let range_results = ref [] in
+
+ for i = 1 to 2000 do
+ let start_ip =
+ Network.ipv4_of_int32 (Int32.of_int (0xC0A80000 + (i * 10)))
+ in
+ let end_ip =
+ Network.ipv4_of_int32 (Int32.of_int (0xC0A80000 + (i * 10) + 255))
+ in
+
+ let ips_in_range = ref [] in
+ let current = ref (Network.int32_of_ipv4 start_ip) in
+ let end_val = Network.int32_of_ipv4 end_ip in
+
+ while Int32.compare !current end_val <= 0 do
+ let ip = Network.ipv4_of_int32 !current in
+ let ip_str = Network.string_of_ipv4 ip in
+ ips_in_range := ip_str :: !ips_in_range;
+ current := Int32.succ !current
+ done;
+
+ let sorted =
+ List.sort
+ (fun a b ->
+ let ip_a = Network.ipv4_of_string_exn a in
+ let ip_b = Network.ipv4_of_string_exn b in
+ Int32.compare
+ (Network.int32_of_ipv4 ip_a)
+ (Network.int32_of_ipv4 ip_b))
+ !ips_in_range
+ in
+
+ let private_ips =
+ List.filter
+ (fun ip_str ->
+ try
+ let ip = Network.ipv4_of_string_exn ip_str in
+ let ip_val = Network.int32_of_ipv4 ip in
+ Int32.logand ip_val 0xFF000000l = 0x0A000000l
+ || Int32.logand ip_val 0xFFF00000l = 0xAC100000l
+ || Int32.logand ip_val 0xFFFF0000l = 0xC0A80000l
+ with _ -> false)
+ sorted
+ in
+
+ if i mod 50 = 0 then range_results := private_ips @ !range_results;
+
+ if List.length !range_results > 500 then
+ range_results := ExtList.List.take 250 !range_results
+ done
+
+(* Benchmark 4: Mixed Network Format Parsing *)
+let bench_mixed_format_parsing () =
+ let parsed_data = ref [] in
+
+ for i = 1 to 3000 do
+ let formats =
+ [
+ Printf.sprintf "%d.%d.%d.%d"
+ (i * 7 mod 256)
+ (i * 11 mod 256)
+ (i * 13 mod 256)
+ (i * 17 mod 256);
+ Printf.sprintf "%d.%d.%d.%d/%d"
+ (i * 3 mod 256)
+ (i * 5 mod 256)
+ (i * 7 mod 256)
+ (i * 9 mod 256)
+ (16 + (i mod 17));
+ Printf.sprintf "%03d.%03d.%03d.%03d" (i mod 256)
+ (i * 2 mod 256)
+ (i * 3 mod 256)
+ (i * 4 mod 256);
+ "0.0.0.0";
+ "255.255.255.255";
+ "127.0.0.1";
+ ]
+ in
+
+ List.iter
+ (fun format ->
+ (try
+ let ip = Network.ipv4_of_string_exn format in
+ let back = Network.string_of_ipv4 ip in
+ let int_val = Network.int32_of_ipv4 ip in
+ let from_int = Network.ipv4_of_int32 int_val in
+ let final = Network.string_of_ipv4 from_int in
+
+ if i mod 100 = 0 then parsed_data := (back, final) :: !parsed_data
+ with _ -> ());
+
+ try
+ let cidr = Network.cidr_of_string_exn format in
+ let back = Network.string_of_cidr cidr in
+ let ip = Network.int32_of_ipv4 (Network.prefix_of_cidr cidr) in
+ let mask =
+ try
+ let slash_pos = String.index format '/' in
+ int_of_string
+ (String.sub format (slash_pos + 1)
+ (String.length format - slash_pos - 1))
+ with _ -> 32
+ in
+
+ let network_addr =
+ Int32.logand ip
+ (Int32.lognot (Int32.sub (Int32.shift_left 1l (32 - mask)) 1l))
+ in
+ let network_ip = Network.ipv4_of_int32 network_addr in
+ let network_str = Network.string_of_ipv4 network_ip in
+
+ if i mod 100 = 0 then
+ parsed_data := (back, network_str) :: !parsed_data
+ with _ -> ())
+ formats;
+
+ if List.length !parsed_data > 200 then
+ parsed_data := ExtList.List.take 100 !parsed_data
+ done
+
+(* Benchmark 5: Network Address Translation Tables *)
+let bench_nat_tables () =
+ let nat_table = Hashtbl.create 10000 in
+ let reverse_table = Hashtbl.create 10000 in
+
+ for i = 1 to 5000 do
+ let internal_ip =
+ Network.ipv4_of_string_exn
+ (Printf.sprintf "192.168.%d.%d" (i * 3 mod 256) (i mod 256))
+ in
+ let external_ip =
+ Network.ipv4_of_string_exn (Printf.sprintf "203.0.113.%d" (i mod 256))
+ in
+
+ for port = 0 to 9 do
+ let internal_port = 1024 + (i * 10) + port in
+ let external_port = 30000 + (i * 10) + port in
+
+ let internal_addr = (internal_ip, internal_port) in
+ let external_addr = (external_ip, external_port) in
+
+ Hashtbl.replace nat_table internal_addr external_addr;
+ Hashtbl.replace reverse_table external_addr internal_addr;
+
+ if i mod 10 = 0 then (
+ (match Hashtbl.find_opt nat_table internal_addr with
+ | Some (ext_ip, ext_port) ->
+ let _ = Network.string_of_ipv4 ext_ip in
+ let _ = string_of_int ext_port in
+ ()
+ | None -> ());
+
+ match Hashtbl.find_opt reverse_table external_addr with
+ | Some (int_ip, int_port) ->
+ let _ = Network.string_of_ipv4 int_ip in
+ let _ = string_of_int int_port in
+ ()
+ | None -> ())
+ done;
+
+ if i mod 100 = 0 then (
+ let to_remove = ref [] in
+ Hashtbl.iter
+ (fun k _v ->
+ let _ip, port = k in
+ if port < 1024 + ((i - 200) * 10) then to_remove := k :: !to_remove)
+ nat_table;
+ List.iter (Hashtbl.remove nat_table) !to_remove;
+ List.iter
+ (fun k ->
+ match Hashtbl.find_opt nat_table k with
+ | Some v -> Hashtbl.remove reverse_table v
+ | None -> ())
+ !to_remove)
+ done
+
+(* Benchmark 6: IP Address Sorting and Comparison *)
+let bench_ip_sorting () =
+ let sorted_lists = ref [] in
+
+ for i = 1 to 1000 do
+ let ip_list =
+ List.init 200 (fun j ->
+ let idx = (i * 200) + j in
+ Printf.sprintf "%d.%d.%d.%d"
+ (idx * 7 mod 256)
+ (idx * 11 mod 256)
+ (idx * 13 mod 256)
+ (idx * 17 mod 256))
+ in
+
+ let parsed =
+ List.map
+ (fun s -> try Some (Network.ipv4_of_string_exn s, s) with _ -> None)
+ ip_list
+ |> List.filter_map (fun x -> x)
+ in
+
+ let sorted =
+ List.sort
+ (fun (ip1, _) (ip2, _) ->
+ Int32.compare (Network.int32_of_ipv4 ip1) (Network.int32_of_ipv4 ip2))
+ parsed
+ in
+
+ let sorted_strings =
+ List.map (fun (ip, _) -> Network.string_of_ipv4 ip) sorted
+ in
+
+ let grouped = Hashtbl.create 256 in
+ List.iter
+ (fun (ip, orig) ->
+ let subnet = Int32.shift_right (Network.int32_of_ipv4 ip) 8 in
+ let existing =
+ match Hashtbl.find_opt grouped subnet with Some l -> l | None -> []
+ in
+ Hashtbl.replace grouped subnet ((ip, orig) :: existing))
+ sorted;
+
+ let _max_subnet =
+ Hashtbl.fold
+ (fun subnet ips (max_sub, max_count) ->
+ let count = List.length ips in
+ if count > max_count then (subnet, count) else (max_sub, max_count))
+ grouped (0l, 0)
+ in
+
+ if i mod 50 = 0 then sorted_lists := sorted_strings :: !sorted_lists;
+
+ if List.length !sorted_lists > 20 then
+ sorted_lists := ExtList.List.take 10 !sorted_lists
+ done
+
+(* Benchmark 7: Broadcast and Network Address Calculations *)
+let bench_broadcast_calculations () =
+ let boundary_cache = ref [] in
+
+ for i = 1 to 3000 do
+ let subnet_sizes = [ 30; 29; 28; 27; 26; 25; 24; 23; 22; 21; 20; 16; 8 ] in
+
+ List.iter
+ (fun mask ->
+ let base = Int32.of_int (0x0A000000 + (i * 0x10000)) in
+ let cidr_str =
+ Printf.sprintf "%s/%d"
+ (Network.string_of_ipv4 (Network.ipv4_of_int32 base))
+ mask
+ in
+
+ try
+ let cidr = Network.cidr_of_string_exn cidr_str in
+ let net_ip = Network.int32_of_ipv4 (Network.prefix_of_cidr cidr) in
+ let net_mask = mask in
+
+ let mask_val =
+ Int32.lognot (Int32.sub (Int32.shift_left 1l (32 - net_mask)) 1l)
+ in
+ let network = Int32.logand net_ip mask_val in
+ let network_ip = Network.ipv4_of_int32 network in
+
+ let host_bits = 32 - net_mask in
+ let broadcast =
+ Int32.logor network (Int32.sub (Int32.shift_left 1l host_bits) 1l)
+ in
+ let broadcast_ip = Network.ipv4_of_int32 broadcast in
+
+ let first_usable =
+ if host_bits > 1 then Network.ipv4_of_int32 (Int32.succ network)
+ else network_ip
+ in
+ let last_usable =
+ if host_bits > 1 then Network.ipv4_of_int32 (Int32.pred broadcast)
+ else broadcast_ip
+ in
+
+ let total_hosts =
+ if host_bits >= 2 then Int32.sub (Int32.shift_left 1l host_bits) 2l
+ else 0l
+ in
+
+ let boundaries =
+ ( Network.string_of_ipv4 network_ip,
+ Network.string_of_ipv4 broadcast_ip,
+ Network.string_of_ipv4 first_usable,
+ Network.string_of_ipv4 last_usable,
+ Int32.to_string total_hosts )
+ in
+
+ if i mod 100 = 0 then boundary_cache := boundaries :: !boundary_cache
+ with _ -> ())
+ subnet_sizes;
+
+ if List.length !boundary_cache > 100 then
+ boundary_cache := ExtList.List.take 50 !boundary_cache
+ done
+
+(* Benchmark 8: Complex Network Operations *)
+let bench_complex_network_ops () =
+ let operation_cache = Hashtbl.create 1000 in
+ let results = ref [] in
+
+ for i = 1 to 2000 do
+ let source_net =
+ Network.cidr_of_string_exn (Printf.sprintf "10.%d.0.0/16" (i mod 256))
+ in
+
+ let base_ip = Network.int32_of_ipv4 (Network.prefix_of_cidr source_net) in
+ let subnets =
+ List.init 16 (fun j ->
+ let subnet_base = Int32.add base_ip (Int32.of_int (j * 256)) in
+ Printf.sprintf "%s/24"
+ (Network.string_of_ipv4 (Network.ipv4_of_int32 subnet_base)))
+ in
+
+ let valid_subnets =
+ List.filter_map
+ (fun s ->
+ try
+ let cidr = Network.cidr_of_string_exn s in
+ let sub_ip_obj = Network.prefix_of_cidr cidr in
+ if Network.ipv4_matches sub_ip_obj source_net then Some cidr
+ else None
+ with _ -> None)
+ subnets
+ in
+
+ let all_ips =
+ List.concat_map
+ (fun cidr ->
+ let net = Network.int32_of_ipv4 (Network.prefix_of_cidr cidr) in
+ List.init 10 (fun k ->
+ let ip = Int32.add net (Int32.of_int k) in
+ Network.ipv4_of_int32 ip))
+ valid_subnets
+ in
+
+ let ip_strings = List.map Network.string_of_ipv4 all_ips in
+ let reparsed =
+ List.filter_map
+ (fun s -> try Some (Network.ipv4_of_string_exn s) with _ -> None)
+ ip_strings
+ in
+
+ List.iteri
+ (fun j ip ->
+ let key = (i, j) in
+ let value =
+ (ip, List.nth_opt valid_subnets (j mod List.length valid_subnets))
+ in
+ Hashtbl.replace operation_cache key value)
+ reparsed;
+
+ if i mod 10 = 0 then
+ for j = 0 to 50 do
+ match Hashtbl.find_opt operation_cache (i - 5, j) with
+ | Some (ip, Some cidr) ->
+ let ip_str = Network.string_of_ipv4 ip in
+ let cidr_str = Network.string_of_cidr cidr in
+ results := (ip_str, cidr_str) :: !results
+ | _ -> ()
+ done;
+
+ if i mod 100 = 0 then (
+ Hashtbl.iter
+ (fun (idx, _) _ ->
+ if idx < i - 200 then Hashtbl.remove operation_cache (idx, 0))
+ operation_cache;
+ if List.length !results > 200 then
+ results := ExtList.List.take 100 !results)
+ done
+
+(* Main benchmark suite runner *)
+let () =
+ bench_ipv4_parsing_storm ();
+ bench_cidr_calculations ();
+ bench_range_operations ();
+ bench_mixed_format_parsing ();
+ bench_nat_tables ();
+ bench_ip_sorting ();
+ bench_broadcast_calculations ();
+ bench_complex_network_ops ()
diff --git a/benchmarks/ahrefs-devkit/stre_bench.ml b/benchmarks/ahrefs-devkit/stre_bench.ml
new file mode 100644
index 000000000..b0b609842
--- /dev/null
+++ b/benchmarks/ahrefs-devkit/stre_bench.ml
@@ -0,0 +1,367 @@
+(** Stre GC Benchmark Suite
+
+ This suite stresses the OCaml garbage collector through intensive string
+ manipulation operations using the Stre module. Each benchmark targets
+ different GC behaviors through string allocation patterns:
+ - Substring allocation pressure
+ - String splitting and concatenation
+ - Pattern-based operations with regular expressions
+ - Temporary string creation and disposal *)
+
+open Devkit
+
+(* Benchmark 1: String Split Storm (Minor GC stress) *)
+let bench_split_storm () =
+ let retained = ref [] in
+
+ let base_string =
+ String.concat ","
+ (List.init 1000 (fun i -> Printf.sprintf "item_%d_value_%d" i (i * 7)))
+ in
+
+ for i = 1 to 1000 do
+ let parts1 = Stre.nsplitc base_string ',' in
+ let _parts2 = Stre.nsplitc_rev base_string ',' in
+
+ let nested = String.concat "|" parts1 in
+ let parts3 = Stre.nsplitc nested '|' in
+
+ if i mod 50 = 0 then retained := parts3 @ !retained;
+
+ let count = ref 0 in
+ let _ =
+ Stre.nsplitc_fold base_string ','
+ (fun acc s ->
+ incr count;
+ if !count mod 10 = 0 then retained := s :: !retained;
+ acc)
+ ()
+ in
+ ();
+
+ if List.length !retained > 1000 then
+ retained := ExtList.List.take 500 !retained
+ done
+
+(* Benchmark 2: Substring Slicing Pressure *)
+let bench_substring_slicing () =
+ let retained_slices = ref [] in
+
+ let source = String.init 100000 (fun i -> char_of_int (65 + (i mod 26))) in
+
+ for i = 1 to 500 do
+ let slice_size = 10 + (i mod 1000) in
+ let offset = i mod (String.length source - slice_size) in
+
+ let slice1 = Stre.from_to source offset (offset + slice_size) in
+ let slice2 = Stre.unsafe_from_to source offset (offset + slice_size) in
+ let slice3 = Stre.slice ~first:offset ~last:(offset + slice_size) source in
+
+ for j = 0 to 9 do
+ let overlap_start = offset + (j * (slice_size / 10)) in
+ if overlap_start + slice_size < String.length source then
+ let overlap =
+ Stre.slice ~first:overlap_start
+ ~last:(overlap_start + slice_size)
+ source
+ in
+ if j mod 3 = 0 then retained_slices := overlap :: !retained_slices
+ done;
+
+ if i mod 20 = 0 then
+ retained_slices := slice1 :: slice2 :: slice3 :: !retained_slices;
+
+ if List.length !retained_slices > 500 then
+ retained_slices := ExtList.List.take 250 !retained_slices
+ done
+
+(* Benchmark 3: Pattern-based String Operations *)
+let bench_pattern_operations () =
+ let retained_matches = ref [] in
+
+ let text =
+ String.concat "\n"
+ (List.init 500 (fun i ->
+ Printf.sprintf
+ "Line %d: email_%d@example.com, phone: 555-%04d, code: ABC%03d" i i
+ (i * 13 mod 9999)
+ (i mod 1000)))
+ in
+
+ for i = 1 to 100 do
+ let lines = Stre.nsplitc text '\n' in
+
+ let processed =
+ List.map
+ (fun line ->
+ let parts1 = Stre.nsplitc line ':' in
+ let parts2 = List.concat_map (fun p -> Stre.nsplitc p ',') parts1 in
+
+ let extracted =
+ List.filter_map
+ (fun p ->
+ if String.length p > 5 then Some (Stre.slice ~first:0 ~last:5 p)
+ else None)
+ parts2
+ in
+
+ String.concat "|" extracted)
+ lines
+ in
+
+ if i mod 10 = 0 then retained_matches := processed @ !retained_matches;
+
+ let _ =
+ List.map
+ (fun s ->
+ let upper = String.uppercase_ascii s in
+ let lower = String.lowercase_ascii s in
+ let reversed =
+ String.init (String.length s) (fun j ->
+ String.get s (String.length s - 1 - j))
+ in
+ if i mod 20 = 0 then
+ retained_matches := upper :: lower :: reversed :: !retained_matches)
+ (ExtList.List.take 10 processed)
+ in
+
+ if List.length !retained_matches > 1000 then
+ retained_matches := ExtList.List.take 500 !retained_matches
+ done
+
+(* Benchmark 4: String Concatenation Chains *)
+let bench_concatenation_chains () =
+ let retained_chains = ref [] in
+
+ for i = 1 to 200 do
+ let chain1 = ref "" in
+ let chain2 = ref "" in
+
+ for j = 1 to 100 do
+ chain1 := !chain1 ^ string_of_int j ^ ",";
+ if j mod 10 = 0 then chain2 := !chain2 ^ !chain1
+ done;
+
+ let parts = Stre.nsplitc !chain1 ',' in
+ let rejoined1 = String.concat "|" parts in
+ let rejoined2 = String.concat ";" parts in
+ let rejoined3 = String.concat "::" parts in
+
+ let nested =
+ List.fold_left
+ (fun acc p -> acc ^ "[" ^ p ^ "]")
+ ""
+ (ExtList.List.take 50 parts)
+ in
+
+ if i mod 10 = 0 then
+ retained_chains :=
+ !chain1 :: !chain2 :: rejoined1 :: rejoined2 :: rejoined3 :: nested
+ :: !retained_chains;
+
+ if List.length !retained_chains > 200 then
+ retained_chains := ExtList.List.take 100 !retained_chains
+ done
+
+(* Benchmark 5: Enumeration-based String Processing *)
+let bench_enum_string_ops () =
+ let retained_enums = ref [] in
+
+ for i = 1 to 300 do
+ let text =
+ String.init 10000 (fun j ->
+ if j mod 100 = 0 then '\n' else char_of_int (65 + ((i + j) mod 26)))
+ in
+
+ let enum1 = Stre.nsplitc_enum text '\n' in
+
+ let processed =
+ Enum.map
+ (fun line ->
+ let words = Stre.nsplitc line ' ' in
+ String.concat "_" (List.map String.uppercase_ascii words))
+ enum1
+ in
+
+ let partial = Enum.take 50 processed |> ExtList.List.of_enum in
+
+ let enum2 = Stre.nsplitc_enum text '\n' in
+ let filtered = Enum.filter (fun s -> String.length s > 50) enum2 in
+ let filtered_list = Enum.take 20 filtered |> ExtList.List.of_enum in
+
+ if i mod 15 = 0 then
+ retained_enums := partial @ filtered_list @ !retained_enums;
+
+ let enum3 = Stre.nsplitc_enum text '\n' in
+ let chain =
+ Enum.map
+ (fun s -> Stre.slice ~first:0 ~last:(min 10 (String.length s)) s)
+ enum3
+ in
+ let chain_list = Enum.take 30 chain |> ExtList.List.of_enum in
+
+ if i mod 20 = 0 then retained_enums := chain_list @ !retained_enums;
+
+ if List.length !retained_enums > 500 then
+ retained_enums := ExtList.List.take 250 !retained_enums
+ done
+
+(* Benchmark 6: Mixed-size String Allocations *)
+let bench_mixed_size_allocations () =
+ let retained_mixed = Hashtbl.create 1000 in
+ let counter = ref 0 in
+
+ for i = 1 to 500 do
+ let sizes = [| 10; 100; 1000; 50; 500; 5000; 20; 200; 2000 |] in
+
+ Array.iter
+ (fun size ->
+ incr counter;
+
+ let s = String.init size (fun j -> char_of_int (65 + (i * j mod 26))) in
+
+ let chunk_size = max 1 (size / (10 + (i mod 10))) in
+ let chunks = ref [] in
+ let pos = ref 0 in
+ while !pos < String.length s do
+ let len = min chunk_size (String.length s - !pos) in
+ chunks := Stre.slice ~first:!pos ~last:(!pos + len) s :: !chunks;
+ pos := !pos + len
+ done;
+
+ let processed =
+ List.map
+ (fun chunk ->
+ let upper = String.uppercase_ascii chunk in
+ let doubled = chunk ^ chunk in
+ if !counter mod 7 = 0 then doubled else upper)
+ !chunks
+ in
+
+ if !counter mod 13 = 0 || !counter mod 17 = 0 then
+ Hashtbl.replace retained_mixed !counter processed;
+
+ if !counter mod 100 = 0 then
+ Hashtbl.iter
+ (fun k _ ->
+ if k < !counter - 500 then Hashtbl.remove retained_mixed k)
+ retained_mixed)
+ sizes
+ done
+
+(* Benchmark 7: String Building with Buffers *)
+let bench_string_building () =
+ let retained_built = ref [] in
+
+ for i = 1 to 200 do
+ let direct = ref "" in
+ for j = 1 to 100 do
+ direct := !direct ^ Printf.sprintf "item_%d_%d " i j
+ done;
+
+ let parts = List.init 100 (fun j -> Printf.sprintf "item_%d_%d" i j) in
+ let from_list = String.concat " " parts in
+
+ let base = String.init 5000 (fun _ -> 'x') in
+ let substrings =
+ List.init 50 (fun j ->
+ let start = j * 100 in
+ let len = 50 + (j mod 50) in
+ Stre.slice ~first:start ~last:(start + len) base)
+ in
+ let from_subs = String.concat "-" substrings in
+
+ let split1 = Stre.nsplitc from_list ' ' in
+ let rebuilt1 = String.concat "," split1 in
+
+ let split2 = Stre.nsplitc from_subs '-' in
+ let rebuilt2 = String.concat ";" split2 in
+
+ if i mod 10 = 0 then
+ retained_built :=
+ !direct :: from_list :: from_subs :: rebuilt1 :: rebuilt2
+ :: !retained_built;
+
+ if List.length !retained_built > 300 then
+ retained_built := ExtList.List.take 150 !retained_built
+ done
+
+(* Benchmark 8: Deep String Transformation Chains *)
+let bench_transformation_chains () =
+ let transformation_cache = Hashtbl.create 500 in
+ let stage_results = ref [] in
+
+ for i = 1 to 150 do
+ let base =
+ String.concat ","
+ (List.init 200 (fun j -> Printf.sprintf "data_%d_%d" i j))
+ in
+
+ let stage1 = Stre.nsplitc base ',' in
+ let stage1_transformed =
+ List.map
+ (fun s ->
+ let len = String.length s in
+ if len > 5 then Stre.slice ~first:2 ~last:(len - 1) s else s ^ s)
+ stage1
+ in
+
+ let stage2 = String.concat "|" stage1_transformed in
+ let stage2_split = Stre.nsplitc stage2 '|' in
+
+ let stage3 =
+ List.filter_map
+ (fun s ->
+ if String.length s mod 2 = 0 then Some (String.uppercase_ascii s)
+ else if String.length s > 3 then
+ Some (Stre.slice ~first:1 ~last:(String.length s - 1) s)
+ else None)
+ stage2_split
+ in
+
+ let stage4 =
+ if List.length stage3 < 100 then
+ List.concat_map
+ (fun s1 ->
+ List.map
+ (fun s2 ->
+ if String.length s1 + String.length s2 < 50 then s1 ^ "_" ^ s2
+ else
+ Stre.slice ~first:0 ~last:10 s1
+ ^ "_"
+ ^ Stre.slice ~first:0 ~last:10 s2)
+ (ExtList.List.take 5 stage3))
+ (ExtList.List.take 10 stage3)
+ else stage3
+ in
+
+ Hashtbl.replace transformation_cache i stage4;
+
+ (if i > 10 then
+ match Hashtbl.find_opt transformation_cache (i - 5) with
+ | Some old_stage ->
+ let combined =
+ ExtList.List.take 10 stage4 @ ExtList.List.take 10 old_stage
+ in
+ stage_results := combined :: !stage_results
+ | None -> ());
+
+ if i mod 50 = 0 then
+ Hashtbl.iter
+ (fun k _ -> if k < i - 20 then Hashtbl.remove transformation_cache k)
+ transformation_cache;
+
+ if List.length !stage_results > 50 then
+ stage_results := ExtList.List.take 25 !stage_results
+ done
+
+(* Main benchmark suite runner *)
+let () =
+ bench_split_storm ();
+ bench_substring_slicing ();
+ bench_pattern_operations ();
+ bench_concatenation_chains ();
+ bench_enum_string_ops ();
+ bench_mixed_size_allocations ();
+ bench_string_building ();
+ bench_transformation_chains ()
diff --git a/dependencies/template/dev.opam b/dependencies/template/dev.opam
index 326f15564..eeefc40b4 100644
--- a/dependencies/template/dev.opam
+++ b/dependencies/template/dev.opam
@@ -22,6 +22,7 @@ depends: [
"conf-pkg-config" {= "1.2"}
"cppo" {= "1.6.7"}
"decompress" {= "1.1.0"}
+ "devkit"
"digestif" {= "1.0.0"}
"fmt" {= "0.9.0"}
"irmin" {= "3.3.2"}
diff --git a/run_config.json b/run_config.json
index 78c48e815..5f0eb3cce 100644
--- a/run_config.json
+++ b/run_config.json
@@ -1559,6 +1559,58 @@
"params": ""
}
]
+ },
+ {
+ "executable": "benchmarks/ahrefs-devkit/htmlStream_bench.exe",
+ "name": "devkit_htmlstream_bench",
+ "tags": [
+ "10s_100s",
+ "macro_bench"
+ ],
+ "runs": [
+ {
+ "params": ""
+ }
+ ]
+ },
+ {
+ "executable": "benchmarks/ahrefs-devkit/stre_bench.exe",
+ "name": "devkit_stre_bench",
+ "tags": [
+ "10s_100s",
+ "macro_bench"
+ ],
+ "runs": [
+ {
+ "params": ""
+ }
+ ]
+ },
+ {
+ "executable": "benchmarks/ahrefs-devkit/network_bench.exe",
+ "name": "devkit_network_bench",
+ "tags": [
+ "10s_100s",
+ "macro_bench"
+ ],
+ "runs": [
+ {
+ "params": ""
+ }
+ ]
+ },
+ {
+ "executable": "benchmarks/ahrefs-devkit/gzip_bench.exe",
+ "name": "devkit_gzip_bench",
+ "tags": [
+ "10s_100s",
+ "macro_bench"
+ ],
+ "runs": [
+ {
+ "params": ""
+ }
+ ]
}
]
}