diff --git a/benchmarks/ahrefs-devkit/dune b/benchmarks/ahrefs-devkit/dune new file mode 100644 index 000000000..1002056ad --- /dev/null +++ b/benchmarks/ahrefs-devkit/dune @@ -0,0 +1,25 @@ +(executable + (name htmlStream_bench) + (modules htmlStream_bench) + (libraries devkit)) + +(executable + (name stre_bench) + (modules stre_bench) + (libraries devkit)) + +(executable + (name network_bench) + (modules network_bench) + (libraries devkit)) + +(executable + (name gzip_bench) + (modules gzip_bench) + (libraries devkit)) + +; Add benchmarks to buildbench alias for sandmark + +(alias + (name buildbench) + (deps htmlStream_bench.exe stre_bench.exe network_bench.exe gzip_bench.exe)) diff --git a/benchmarks/ahrefs-devkit/gzip_bench.ml b/benchmarks/ahrefs-devkit/gzip_bench.ml new file mode 100644 index 000000000..9013df7c9 --- /dev/null +++ b/benchmarks/ahrefs-devkit/gzip_bench.ml @@ -0,0 +1,355 @@ +(** Gzip GC Benchmark Suite + + This suite stresses the OCaml garbage collector through intensive + compression and decompression operations. The benchmarks test: + - Buffer-based I/O with continuous allocations + - Zlib compression/decompression creating temporary buffers + - Streaming patterns with state management + - CRC calculations and header processing + - Variable-sized buffer operations *) + +open Devkit +open ExtLib + +(* Helper functions for string compression/decompression *) +let compress_string ?level str = + let _ = level in + let oc = Gzip_io.output (IO.output_string ()) in + IO.nwrite_string oc str; + IO.close_out oc + +let uncompress_string str = + let ic = Gzip_io.input (IO.input_string str) in + let result = IO.read_all ic in + IO.close_in ic; + result + +(* Helper function to generate test data *) +let generate_test_data size pattern = + let data = Bytes.create size in + for i = 0 to size - 1 do + Bytes.set data i (char_of_int (i * pattern mod 256)) + done; + Bytes.to_string data + +(* Benchmark 1: Small Buffer Compression Storm *) +let bench_small_buffer_storm () = + let compressed_data = ref [] in + + for i = 1 to 5000 do + let size = 100 + (i mod 900) in + let data = generate_test_data size (i mod 256) in + + let compressed = compress_string data in + let decompressed = uncompress_string compressed in + + let compressed_fast = compress_string ~level:1 data in + let compressed_best = compress_string ~level:9 data in + + if i mod 100 = 0 then + compressed_data := + compressed :: compressed_fast :: compressed_best :: !compressed_data; + + let _ = String.length compressed in + let _ = String.length decompressed in + + if List.length !compressed_data > 300 then + compressed_data := ExtList.List.take 150 !compressed_data + done + +(* Benchmark 2: Large Block Compression *) +let bench_large_block_compression () = + let retained_blocks = ref [] in + + for i = 1 to 100 do + let size = 10000 * (1 + (i mod 10)) in + let data = generate_test_data size i in + + let compressed_default = compress_string data in + let compressed_filtered = compress_string ~level:5 data in + + let decompressed1 = uncompress_string compressed_default in + let decompressed2 = uncompress_string compressed_filtered in + + assert (decompressed1 = data); + assert (decompressed2 = data); + + let chunk_size = 1024 in + let chunks = ref [] in + let pos = ref 0 in + while !pos < String.length data do + let len = min chunk_size (String.length data - !pos) in + let chunk = String.sub data !pos len in + let compressed_chunk = compress_string chunk in + chunks := compressed_chunk :: !chunks; + pos := !pos + len + done; + + if i mod 10 = 0 then + retained_blocks := + compressed_default :: compressed_filtered :: !retained_blocks; + + if List.length !retained_blocks > 20 then + retained_blocks := ExtList.List.take 10 !retained_blocks + done + +(* Benchmark 3: Streaming Compression/Decompression *) +let bench_streaming_operations () = + let stream_buffers = ref [] in + + for i = 1 to 500 do + let data_size = 5000 + (i * 100) in + let source_data = generate_test_data data_size i in + + let out_channel = Gzip_io.output (IO.output_string ()) in + + let chunk_size = 256 in + let pos = ref 0 in + while !pos < String.length source_data do + let len = min chunk_size (String.length source_data - !pos) in + let chunk = String.sub source_data !pos len in + IO.nwrite_string out_channel chunk; + pos := !pos + len + done; + + let compressed = IO.close_out out_channel in + + let in_channel = Gzip_io.input (IO.input_string compressed) in + let decompressed_buf = Buffer.create data_size in + + let read_buf = Bytes.create 128 in + let rec read_loop () = + try + let n = IO.input in_channel read_buf 0 128 in + if n > 0 then ( + Buffer.add_subbytes decompressed_buf read_buf 0 n; + read_loop ()) + with IO.No_more_input -> () + in + read_loop (); + IO.close_in in_channel; + + let decompressed = Buffer.contents decompressed_buf in + + assert (decompressed = source_data); + + if i mod 50 = 0 then + stream_buffers := compressed :: decompressed :: !stream_buffers; + + if List.length !stream_buffers > 100 then + stream_buffers := ExtList.List.take 50 !stream_buffers + done + +(* Benchmark 4: Mixed Size Compression Patterns *) +let bench_mixed_size_patterns () = + let mixed_cache = Hashtbl.create 1000 in + + for i = 1 to 1000 do + let sizes = [| 64; 128; 256; 512; 1024; 2048; 4096; 8192; 128; 64 |] in + let size = sizes.(i mod Array.length sizes) in + let data = generate_test_data size (i * 7) in + + let levels = [ 1; 3; 5; 7; 9 ] in + let compressed_versions = + List.map (fun level -> compress_string ~level data) levels + in + + let decompressed_versions = + List.map uncompress_string compressed_versions + in + + List.iter (fun d -> assert (d = data)) decompressed_versions; + + let mixed = String.concat "" compressed_versions in + let mixed_compressed = compress_string mixed in + + if i mod 13 = 0 || i mod 17 = 0 then + Hashtbl.replace mixed_cache i mixed_compressed; + + let chain = + List.fold_left + (fun acc comp -> compress_string (acc ^ comp)) + "" + (ExtList.List.take 3 compressed_versions) + in + + if i mod 23 = 0 then Hashtbl.replace mixed_cache (i + 10000) chain; + + if i mod 100 = 0 then + Hashtbl.iter + (fun k _ -> if k < i - 200 then Hashtbl.remove mixed_cache k) + mixed_cache + done + +(* Benchmark 5: Concurrent-style Compression *) +let bench_concurrent_style () = + let active_streams = Array.init 10 (fun _ -> ref []) in + let completed = ref [] in + + for i = 1 to 2000 do + let stream_id = i mod Array.length active_streams in + let stream = active_streams.(stream_id) in + + let data = generate_test_data (500 + (stream_id * 100)) i in + + let compressed = compress_string data in + stream := compressed :: !stream; + + if i mod 50 = 0 then + Array.iteri + (fun _idx s -> + if List.length !s > 5 then ( + let combined = String.concat "" (List.rev !s) in + let recompressed = compress_string combined in + + let _ = uncompress_string recompressed in + + completed := recompressed :: !completed; + s := [])) + active_streams; + + if List.length !completed > 100 then + completed := ExtList.List.take 50 !completed + done + +(* Benchmark 6: Compression with Headers and Metadata *) +let bench_headers_metadata () = + let metadata_cache = ref [] in + + for i = 1 to 2000 do + let data = + Printf.sprintf "File_%d_Content_%s" i + (String.make (100 + (i mod 400)) (char_of_int (65 + (i mod 26)))) + in + + let compressed = compress_string data in + + let header_size = min 10 (String.length compressed) in + let header = String.sub compressed 0 header_size in + + let checksum = ref 0 in + String.iter + (fun c -> checksum := ((!checksum * 31) + Char.code c) mod 65536) + data; + + let metadata = + (header, !checksum, String.length data, String.length compressed) + in + + let decompressed = uncompress_string compressed in + assert (decompressed = data); + + let recompressed = compress_string ~level:(1 + (i mod 9)) decompressed in + + let ratio = + float_of_int (String.length compressed) + /. float_of_int (String.length data) + in + + if i mod 50 = 0 then + metadata_cache := (metadata, ratio, recompressed) :: !metadata_cache; + + if List.length !metadata_cache > 100 then + metadata_cache := ExtList.List.take 50 !metadata_cache + done + +(* Benchmark 7: Buffer Reuse and Recycling *) +let bench_buffer_recycling () = + let buffer_pool = Array.init 20 (fun _ -> Buffer.create 1024) in + let compressed_pool = ref [] in + let generation_counter = ref 0 in + + for i = 1 to 1500 do + let buffer_idx = i mod Array.length buffer_pool in + let buffer = buffer_pool.(buffer_idx) in + + Buffer.clear buffer; + + for j = 1 to 100 + (i mod 200) do + Buffer.add_string buffer (Printf.sprintf "Line_%d_%d " i j) + done; + + let data = Buffer.contents buffer in + + let compressed = compress_string data in + + let decode_buffer = + buffer_pool.((buffer_idx + 1) mod Array.length buffer_pool) + in + Buffer.clear decode_buffer; + let decompressed = uncompress_string compressed in + Buffer.add_string decode_buffer decompressed; + + incr generation_counter; + + if !generation_counter mod 30 = 0 then ( + compressed_pool := compressed :: !compressed_pool; + + if List.length !compressed_pool > 10 then + let old_data = List.hd !compressed_pool in + let mixed = old_data ^ compressed in + let mixed_compressed = compress_string mixed in + compressed_pool := mixed_compressed :: List.tl !compressed_pool); + + if !generation_counter mod 100 = 0 && List.length !compressed_pool > 50 then + compressed_pool := ExtList.List.take 25 !compressed_pool + done + +(* Benchmark 8: Complex Compression Pipelines *) +let bench_compression_pipelines () = + let pipeline_stages = Hashtbl.create 500 in + let final_results = ref [] in + + for i = 1 to 1000 do + let base_data = generate_test_data (1000 + (i * 10)) i in + let stage1 = compress_string base_data in + + let decompressed = uncompress_string stage1 in + let modified = decompressed ^ Printf.sprintf "_modified_%d" i in + let stage2 = compress_string ~level:5 modified in + + let stage3 = + match Hashtbl.find_opt pipeline_stages (i - 10) with + | Some (prev1, prev2, _) -> + let combined = stage1 ^ prev1 ^ stage2 ^ prev2 in + compress_string ~level:3 combined + | None -> compress_string (stage1 ^ stage2) + in + + Hashtbl.replace pipeline_stages i (stage1, stage2, stage3); + + let multi_compressed = + let temp1 = compress_string ~level:1 base_data in + let temp2 = compress_string ~level:5 temp1 in + compress_string ~level:9 temp2 + in + + let multi_decompressed = + let temp1 = uncompress_string multi_compressed in + let temp2 = uncompress_string temp1 in + uncompress_string temp2 + in + + assert (multi_decompressed = base_data); + + if i mod 50 = 0 then + final_results := (stage3, multi_compressed) :: !final_results; + + if i mod 100 = 0 then ( + Hashtbl.iter + (fun k _ -> if k < i - 50 then Hashtbl.remove pipeline_stages k) + pipeline_stages; + if List.length !final_results > 40 then + final_results := ExtList.List.take 20 !final_results) + done + +(* Main benchmark suite runner *) +let () = + bench_small_buffer_storm (); + bench_large_block_compression (); + bench_streaming_operations (); + bench_mixed_size_patterns (); + bench_concurrent_style (); + bench_headers_metadata (); + bench_buffer_recycling (); + bench_compression_pipelines () diff --git a/benchmarks/ahrefs-devkit/htmlStream_bench.ml b/benchmarks/ahrefs-devkit/htmlStream_bench.ml new file mode 100644 index 000000000..c60c321de --- /dev/null +++ b/benchmarks/ahrefs-devkit/htmlStream_bench.ml @@ -0,0 +1,315 @@ +(** HtmlStream GC Benchmark Suite + + This suite is designed to stress the OCaml garbage collector with various + allocation patterns and heap shape changes over time. Each benchmark targets + different GC behaviors: + - Minor collection pressure (ephemeral allocations) + - Major collection pressure (long-lived data) + - Heap fragmentation + - Generational hypothesis violations + - Large object handling *) + +open Devkit + +(* Benchmark 1: Small String Pressure (Minor GC stress) *) +let bench_small_strings () = + let collected_texts = ref [] in + + for _ = 1 to 10 do + let buf = Buffer.create (1024 * 1024) in + for i = 1 to 10000 do + Buffer.add_string buf "

"; + Buffer.add_string buf (string_of_int i); + Buffer.add_string buf " small text "; + Buffer.add_string buf (String.make 10 (char_of_int (65 + (i mod 26)))); + Buffer.add_string buf "

" + done; + let html = Buffer.contents buf in + + let ctx = HtmlStream.init () in + HtmlStream.parse ~ctx + (function + | HtmlStream.Text t -> + if Random.int 100 < 10 then + collected_texts := HtmlStream.Raw.project t :: !collected_texts + | _ -> ()) + html + done + +(* Benchmark 2: Attribute List Pressure *) +let bench_attribute_lists () = + let total_attrs = ref 0 in + + for _ = 1 to 10 do + let buf = Buffer.create (1024 * 1024 * 2) in + for i = 1 to 5000 do + Buffer.add_string buf ""; + Buffer.add_string buf (string_of_int i); + Buffer.add_string buf "" + done; + let html = Buffer.contents buf in + + let ctx = HtmlStream.init () in + HtmlStream.parse ~ctx + (function + | HtmlStream.Tag (_, attrs) -> + total_attrs := !total_attrs + List.length attrs + | _ -> ()) + html + done + +(* Benchmark 3: Large Block Allocations *) +let bench_large_blocks () = + let retained_blocks = ref [] in + + for _ = 1 to 10 do + let buf = Buffer.create (1024 * 1024 * 5) in + for i = 1 to 1000 do + let size = 1024 * (1 + (i mod 100)) in + Buffer.add_string buf ""; + + if i mod 2 = 0 then ( + Buffer.add_string buf "") + done; + let html = Buffer.contents buf in + + let ctx = HtmlStream.init () in + HtmlStream.parse ~ctx + (function + | HtmlStream.Script (_, s) | HtmlStream.Style (_, s) -> + if Random.int 100 < 20 then retained_blocks := s :: !retained_blocks + | _ -> ()) + html + done + +(* Benchmark 4: Heap Shape Morphing *) +let bench_morphing_heap () = + let phase_data = ref [] in + + for _ = 1 to 10 do + let buf = Buffer.create (1024 * 1024 * 3) in + for phase = 1 to 100 do + if phase mod 3 = 0 then + for _ = 1 to 100 do + Buffer.add_string buf "small" + done; + + if phase mod 3 = 1 then ( + for depth = 1 to 20 do + Printf.bprintf buf "
" depth + (phase * depth) + done; + Buffer.add_string buf "nested content"; + for _ = 1 to 20 do + Buffer.add_string buf "
" + done); + + if phase mod 3 = 2 then ( + Buffer.add_string buf "") + done; + let html = Buffer.contents buf in + + let ctx = HtmlStream.init () in + HtmlStream.parse ~ctx + (fun elem -> + if Random.int 100 < 15 then phase_data := elem :: !phase_data; + if List.length !phase_data > 1000 then phase_data := List.tl !phase_data) + html + done + +(* Benchmark 5: Fragmentation Stress *) +let bench_fragmentation () = + let retained = Hashtbl.create 1000 in + let counter = ref 0 in + + for _ = 1 to 10 do + let buf = Buffer.create (1024 * 1024 * 4) in + let sizes = [| 10; 100; 1000; 10000; 100; 10; 5000; 50; 500 |] in + for i = 1 to 2000 do + let size = sizes.(i mod Array.length sizes) in + + match i mod 4 with + | 0 -> Printf.bprintf buf "

%s

" (String.make size 'a') + | 1 -> + Buffer.add_string buf "content" + | 2 -> Printf.bprintf buf "" (String.make size 'c') + | _ -> + for _ = 1 to size / 100 do + Buffer.add_string buf "x" + done + done; + let html = Buffer.contents buf in + + let ctx = HtmlStream.init () in + HtmlStream.parse ~ctx + (fun elem -> + incr counter; + if !counter mod 7 = 0 || !counter mod 11 = 0 || !counter mod 13 = 0 then + Hashtbl.replace retained !counter elem; + if !counter mod 100 = 0 then + Hashtbl.iter + (fun k _ -> if k < !counter - 500 then Hashtbl.remove retained k) + retained) + html + done + +(* Benchmark 6: Generational Hypothesis Violation *) +let bench_generational_violation () = + let old_generation = ref [] in + let middle_generation = ref [] in + let young_generation = ref [] in + let counter = ref 0 in + + for _ = 1 to 10 do + let buf = Buffer.create (1024 * 1024 * 2) in + for batch = 1 to 100 do + for i = 1 to 100 do + Printf.bprintf buf "
" batch i; + Printf.bprintf buf "Generation %d Item %d" batch i; + for j = 1 to batch do + Printf.bprintf buf "%d" j (batch * i * j) + done; + Buffer.add_string buf "
" + done + done; + let html = Buffer.contents buf in + + let ctx = HtmlStream.init () in + HtmlStream.parse ~ctx + (fun elem -> + incr counter; + if !counter mod 100 = 0 then ( + old_generation := !middle_generation; + middle_generation := !young_generation; + young_generation := []); + young_generation := elem :: !young_generation; + + if !counter mod 50 = 0 then + let mixed = !old_generation @ !young_generation in + young_generation := + let rec take n = function + | [] -> [] + | _ when n <= 0 -> [] + | h :: t -> h :: take (n - 1) t + in + List.rev (take 10 (List.rev mixed))) + html + done + +(* Benchmark 7: Allocation Rate Variation *) +let bench_variable_rate () = + let allocation_history = Array.make 1000 [] in + let index = ref 0 in + + for _ = 1 to 10 do + let buf = Buffer.create (1024 * 1024 * 3) in + for phase = 1 to 50 do + let intensity = + int_of_float (50.0 +. (45.0 *. sin (float_of_int phase *. 0.3))) + in + + if intensity < 30 then + for _ = 1 to intensity do + Buffer.add_string buf "

low intensity

" + done + else if intensity < 70 then + for i = 1 to intensity * 10 do + Printf.bprintf buf "
content %d
" i i + done + else + for i = 1 to intensity * 20 do + Buffer.add_string buf "%d" i + done + done; + let html = Buffer.contents buf in + + let ctx = HtmlStream.init () in + HtmlStream.parse ~ctx + (fun elem -> + let i = !index mod Array.length allocation_history in + allocation_history.(i) <- elem :: allocation_history.(i); + incr index; + if !index mod 100 = 0 then + let clear_index = + (!index - 500) mod Array.length allocation_history + in + if clear_index >= 0 then allocation_history.(clear_index) <- []) + html + done + +(* Benchmark 8: Reference Graph Complexity *) +let bench_complex_references () = + let graph = Hashtbl.create 1000 in + let edges = ref [] in + let node_counter = ref 0 in + + for _ = 1 to 10 do + let buf = Buffer.create (1024 * 1024 * 2) in + for layer = 1 to 20 do + for node = 1 to 50 do + Printf.bprintf buf "
" layer node; + for ref_layer = 1 to 5 do + for ref_node = 1 to 10 do + Printf.bprintf buf "ref" + (((layer + ref_layer) mod 20) + 1) + (((node + ref_node) mod 50) + 1) + done + done; + Buffer.add_string buf "
" + done + done; + let html = Buffer.contents buf in + + let ctx = HtmlStream.init () in + HtmlStream.parse ~ctx + (fun elem -> + incr node_counter; + let node_id = !node_counter in + Hashtbl.add graph node_id elem; + + if node_id > 10 then + for _ = 1 to Random.int 5 + 1 do + let target = Random.int (node_id - 1) + 1 in + edges := (node_id, target) :: !edges + done; + + if node_id mod 200 = 0 then ( + edges := + List.filter + (fun (s, t) -> s > node_id - 1000 && t > node_id - 1000) + !edges; + Hashtbl.iter + (fun k _ -> if k < node_id - 1000 then Hashtbl.remove graph k) + graph)) + html + done + +(* Main benchmark suite runner *) +let () = + bench_small_strings (); + bench_attribute_lists (); + bench_large_blocks (); + bench_morphing_heap (); + bench_fragmentation (); + bench_generational_violation (); + bench_variable_rate (); + bench_complex_references () diff --git a/benchmarks/ahrefs-devkit/network_bench.ml b/benchmarks/ahrefs-devkit/network_bench.ml new file mode 100644 index 000000000..570fb2c6a --- /dev/null +++ b/benchmarks/ahrefs-devkit/network_bench.ml @@ -0,0 +1,485 @@ +(** Network GC Benchmark Suite + + This suite stresses the OCaml garbage collector through intensive network + parsing and manipulation operations. The benchmarks test: + - IPv4 address parsing (ragel-based parser creating intermediate values) + - CIDR subnet calculations with bitwise operations + - Int32 boxing/unboxing patterns + - String-to-structured data conversions + - Network address comparisons and transformations *) + +open Devkit + +(* Benchmark 1: IPv4 Address Parsing Storm *) +let bench_ipv4_parsing_storm () = + let parsed_ips = ref [] in + + for i = 1 to 10000 do + let ip_strings = + [ + Printf.sprintf "%d.%d.%d.%d" (i mod 256) + (i * 7 mod 256) + (i * 13 mod 256) + (i * 17 mod 256); + Printf.sprintf "192.168.%d.%d" (i mod 256) (i * 3 mod 256); + Printf.sprintf "10.%d.%d.%d" (i * 5 mod 256) (i * 11 mod 256) (i mod 256); + Printf.sprintf "172.%d.%d.%d" + (16 + (i mod 16)) + (i * 2 mod 256) + (i * 19 mod 256); + ] + in + + List.iter + (fun ip_str -> + try + let ip = Network.ipv4_of_string_exn ip_str in + let str_back = Network.string_of_ipv4 ip in + let cidr_str = ip_str ^ "/" ^ string_of_int (8 + (i mod 25)) in + let cidr = Network.cidr_of_string_exn cidr_str in + let cidr_back = Network.string_of_cidr cidr in + + if i mod 100 = 0 then + parsed_ips := (ip, cidr, str_back, cidr_back) :: !parsed_ips; + + let _ = Network.ipv4_matches ip cidr in + () + with _ -> ()) + ip_strings; + + if List.length !parsed_ips > 100 then + parsed_ips := ExtList.List.take 50 !parsed_ips + done + +(* Benchmark 2: CIDR Subnet Calculations *) +let bench_cidr_calculations () = + let subnet_cache = Hashtbl.create 1000 in + + for i = 1 to 5000 do + let base_ip = + Network.ipv4_of_int32 (Int32.of_int (0x0A000000 + (i * 256))) + in + let masks = [ 8; 16; 24; 28; 30; 32 ] in + + List.iter + (fun mask -> + let cidr_str = + Printf.sprintf "%s/%d" (Network.string_of_ipv4 base_ip) mask + in + let cidr = Network.cidr_of_string_exn cidr_str in + let net_ip = Network.int32_of_ipv4 (Network.prefix_of_cidr cidr) in + let net_mask = mask in + + let test_ips = + List.init 50 (fun j -> + let offset = Int32.of_int j in + let test_ip = Int32.add net_ip offset in + Network.ipv4_of_int32 test_ip) + in + + let members = + List.filter (fun ip -> Network.ipv4_matches ip cidr) test_ips + in + + let member_strings = List.map Network.string_of_ipv4 members in + + if i mod 20 = 0 then + Hashtbl.replace subnet_cache (i, mask) member_strings; + + let subnet_size = Int32.shift_left 1l (32 - net_mask) in + let broadcast = Int32.sub (Int32.add net_ip subnet_size) 1l in + let broadcast_ip = Network.ipv4_of_int32 broadcast in + let _ = Network.string_of_ipv4 broadcast_ip in + ()) + masks; + + if i mod 100 = 0 then + Hashtbl.iter + (fun (idx, _) _ -> + if idx < i - 200 then Hashtbl.remove subnet_cache (idx, 0)) + subnet_cache + done + +(* Benchmark 3: Network Range Operations *) +let bench_range_operations () = + let range_results = ref [] in + + for i = 1 to 2000 do + let start_ip = + Network.ipv4_of_int32 (Int32.of_int (0xC0A80000 + (i * 10))) + in + let end_ip = + Network.ipv4_of_int32 (Int32.of_int (0xC0A80000 + (i * 10) + 255)) + in + + let ips_in_range = ref [] in + let current = ref (Network.int32_of_ipv4 start_ip) in + let end_val = Network.int32_of_ipv4 end_ip in + + while Int32.compare !current end_val <= 0 do + let ip = Network.ipv4_of_int32 !current in + let ip_str = Network.string_of_ipv4 ip in + ips_in_range := ip_str :: !ips_in_range; + current := Int32.succ !current + done; + + let sorted = + List.sort + (fun a b -> + let ip_a = Network.ipv4_of_string_exn a in + let ip_b = Network.ipv4_of_string_exn b in + Int32.compare + (Network.int32_of_ipv4 ip_a) + (Network.int32_of_ipv4 ip_b)) + !ips_in_range + in + + let private_ips = + List.filter + (fun ip_str -> + try + let ip = Network.ipv4_of_string_exn ip_str in + let ip_val = Network.int32_of_ipv4 ip in + Int32.logand ip_val 0xFF000000l = 0x0A000000l + || Int32.logand ip_val 0xFFF00000l = 0xAC100000l + || Int32.logand ip_val 0xFFFF0000l = 0xC0A80000l + with _ -> false) + sorted + in + + if i mod 50 = 0 then range_results := private_ips @ !range_results; + + if List.length !range_results > 500 then + range_results := ExtList.List.take 250 !range_results + done + +(* Benchmark 4: Mixed Network Format Parsing *) +let bench_mixed_format_parsing () = + let parsed_data = ref [] in + + for i = 1 to 3000 do + let formats = + [ + Printf.sprintf "%d.%d.%d.%d" + (i * 7 mod 256) + (i * 11 mod 256) + (i * 13 mod 256) + (i * 17 mod 256); + Printf.sprintf "%d.%d.%d.%d/%d" + (i * 3 mod 256) + (i * 5 mod 256) + (i * 7 mod 256) + (i * 9 mod 256) + (16 + (i mod 17)); + Printf.sprintf "%03d.%03d.%03d.%03d" (i mod 256) + (i * 2 mod 256) + (i * 3 mod 256) + (i * 4 mod 256); + "0.0.0.0"; + "255.255.255.255"; + "127.0.0.1"; + ] + in + + List.iter + (fun format -> + (try + let ip = Network.ipv4_of_string_exn format in + let back = Network.string_of_ipv4 ip in + let int_val = Network.int32_of_ipv4 ip in + let from_int = Network.ipv4_of_int32 int_val in + let final = Network.string_of_ipv4 from_int in + + if i mod 100 = 0 then parsed_data := (back, final) :: !parsed_data + with _ -> ()); + + try + let cidr = Network.cidr_of_string_exn format in + let back = Network.string_of_cidr cidr in + let ip = Network.int32_of_ipv4 (Network.prefix_of_cidr cidr) in + let mask = + try + let slash_pos = String.index format '/' in + int_of_string + (String.sub format (slash_pos + 1) + (String.length format - slash_pos - 1)) + with _ -> 32 + in + + let network_addr = + Int32.logand ip + (Int32.lognot (Int32.sub (Int32.shift_left 1l (32 - mask)) 1l)) + in + let network_ip = Network.ipv4_of_int32 network_addr in + let network_str = Network.string_of_ipv4 network_ip in + + if i mod 100 = 0 then + parsed_data := (back, network_str) :: !parsed_data + with _ -> ()) + formats; + + if List.length !parsed_data > 200 then + parsed_data := ExtList.List.take 100 !parsed_data + done + +(* Benchmark 5: Network Address Translation Tables *) +let bench_nat_tables () = + let nat_table = Hashtbl.create 10000 in + let reverse_table = Hashtbl.create 10000 in + + for i = 1 to 5000 do + let internal_ip = + Network.ipv4_of_string_exn + (Printf.sprintf "192.168.%d.%d" (i * 3 mod 256) (i mod 256)) + in + let external_ip = + Network.ipv4_of_string_exn (Printf.sprintf "203.0.113.%d" (i mod 256)) + in + + for port = 0 to 9 do + let internal_port = 1024 + (i * 10) + port in + let external_port = 30000 + (i * 10) + port in + + let internal_addr = (internal_ip, internal_port) in + let external_addr = (external_ip, external_port) in + + Hashtbl.replace nat_table internal_addr external_addr; + Hashtbl.replace reverse_table external_addr internal_addr; + + if i mod 10 = 0 then ( + (match Hashtbl.find_opt nat_table internal_addr with + | Some (ext_ip, ext_port) -> + let _ = Network.string_of_ipv4 ext_ip in + let _ = string_of_int ext_port in + () + | None -> ()); + + match Hashtbl.find_opt reverse_table external_addr with + | Some (int_ip, int_port) -> + let _ = Network.string_of_ipv4 int_ip in + let _ = string_of_int int_port in + () + | None -> ()) + done; + + if i mod 100 = 0 then ( + let to_remove = ref [] in + Hashtbl.iter + (fun k _v -> + let _ip, port = k in + if port < 1024 + ((i - 200) * 10) then to_remove := k :: !to_remove) + nat_table; + List.iter (Hashtbl.remove nat_table) !to_remove; + List.iter + (fun k -> + match Hashtbl.find_opt nat_table k with + | Some v -> Hashtbl.remove reverse_table v + | None -> ()) + !to_remove) + done + +(* Benchmark 6: IP Address Sorting and Comparison *) +let bench_ip_sorting () = + let sorted_lists = ref [] in + + for i = 1 to 1000 do + let ip_list = + List.init 200 (fun j -> + let idx = (i * 200) + j in + Printf.sprintf "%d.%d.%d.%d" + (idx * 7 mod 256) + (idx * 11 mod 256) + (idx * 13 mod 256) + (idx * 17 mod 256)) + in + + let parsed = + List.map + (fun s -> try Some (Network.ipv4_of_string_exn s, s) with _ -> None) + ip_list + |> List.filter_map (fun x -> x) + in + + let sorted = + List.sort + (fun (ip1, _) (ip2, _) -> + Int32.compare (Network.int32_of_ipv4 ip1) (Network.int32_of_ipv4 ip2)) + parsed + in + + let sorted_strings = + List.map (fun (ip, _) -> Network.string_of_ipv4 ip) sorted + in + + let grouped = Hashtbl.create 256 in + List.iter + (fun (ip, orig) -> + let subnet = Int32.shift_right (Network.int32_of_ipv4 ip) 8 in + let existing = + match Hashtbl.find_opt grouped subnet with Some l -> l | None -> [] + in + Hashtbl.replace grouped subnet ((ip, orig) :: existing)) + sorted; + + let _max_subnet = + Hashtbl.fold + (fun subnet ips (max_sub, max_count) -> + let count = List.length ips in + if count > max_count then (subnet, count) else (max_sub, max_count)) + grouped (0l, 0) + in + + if i mod 50 = 0 then sorted_lists := sorted_strings :: !sorted_lists; + + if List.length !sorted_lists > 20 then + sorted_lists := ExtList.List.take 10 !sorted_lists + done + +(* Benchmark 7: Broadcast and Network Address Calculations *) +let bench_broadcast_calculations () = + let boundary_cache = ref [] in + + for i = 1 to 3000 do + let subnet_sizes = [ 30; 29; 28; 27; 26; 25; 24; 23; 22; 21; 20; 16; 8 ] in + + List.iter + (fun mask -> + let base = Int32.of_int (0x0A000000 + (i * 0x10000)) in + let cidr_str = + Printf.sprintf "%s/%d" + (Network.string_of_ipv4 (Network.ipv4_of_int32 base)) + mask + in + + try + let cidr = Network.cidr_of_string_exn cidr_str in + let net_ip = Network.int32_of_ipv4 (Network.prefix_of_cidr cidr) in + let net_mask = mask in + + let mask_val = + Int32.lognot (Int32.sub (Int32.shift_left 1l (32 - net_mask)) 1l) + in + let network = Int32.logand net_ip mask_val in + let network_ip = Network.ipv4_of_int32 network in + + let host_bits = 32 - net_mask in + let broadcast = + Int32.logor network (Int32.sub (Int32.shift_left 1l host_bits) 1l) + in + let broadcast_ip = Network.ipv4_of_int32 broadcast in + + let first_usable = + if host_bits > 1 then Network.ipv4_of_int32 (Int32.succ network) + else network_ip + in + let last_usable = + if host_bits > 1 then Network.ipv4_of_int32 (Int32.pred broadcast) + else broadcast_ip + in + + let total_hosts = + if host_bits >= 2 then Int32.sub (Int32.shift_left 1l host_bits) 2l + else 0l + in + + let boundaries = + ( Network.string_of_ipv4 network_ip, + Network.string_of_ipv4 broadcast_ip, + Network.string_of_ipv4 first_usable, + Network.string_of_ipv4 last_usable, + Int32.to_string total_hosts ) + in + + if i mod 100 = 0 then boundary_cache := boundaries :: !boundary_cache + with _ -> ()) + subnet_sizes; + + if List.length !boundary_cache > 100 then + boundary_cache := ExtList.List.take 50 !boundary_cache + done + +(* Benchmark 8: Complex Network Operations *) +let bench_complex_network_ops () = + let operation_cache = Hashtbl.create 1000 in + let results = ref [] in + + for i = 1 to 2000 do + let source_net = + Network.cidr_of_string_exn (Printf.sprintf "10.%d.0.0/16" (i mod 256)) + in + + let base_ip = Network.int32_of_ipv4 (Network.prefix_of_cidr source_net) in + let subnets = + List.init 16 (fun j -> + let subnet_base = Int32.add base_ip (Int32.of_int (j * 256)) in + Printf.sprintf "%s/24" + (Network.string_of_ipv4 (Network.ipv4_of_int32 subnet_base))) + in + + let valid_subnets = + List.filter_map + (fun s -> + try + let cidr = Network.cidr_of_string_exn s in + let sub_ip_obj = Network.prefix_of_cidr cidr in + if Network.ipv4_matches sub_ip_obj source_net then Some cidr + else None + with _ -> None) + subnets + in + + let all_ips = + List.concat_map + (fun cidr -> + let net = Network.int32_of_ipv4 (Network.prefix_of_cidr cidr) in + List.init 10 (fun k -> + let ip = Int32.add net (Int32.of_int k) in + Network.ipv4_of_int32 ip)) + valid_subnets + in + + let ip_strings = List.map Network.string_of_ipv4 all_ips in + let reparsed = + List.filter_map + (fun s -> try Some (Network.ipv4_of_string_exn s) with _ -> None) + ip_strings + in + + List.iteri + (fun j ip -> + let key = (i, j) in + let value = + (ip, List.nth_opt valid_subnets (j mod List.length valid_subnets)) + in + Hashtbl.replace operation_cache key value) + reparsed; + + if i mod 10 = 0 then + for j = 0 to 50 do + match Hashtbl.find_opt operation_cache (i - 5, j) with + | Some (ip, Some cidr) -> + let ip_str = Network.string_of_ipv4 ip in + let cidr_str = Network.string_of_cidr cidr in + results := (ip_str, cidr_str) :: !results + | _ -> () + done; + + if i mod 100 = 0 then ( + Hashtbl.iter + (fun (idx, _) _ -> + if idx < i - 200 then Hashtbl.remove operation_cache (idx, 0)) + operation_cache; + if List.length !results > 200 then + results := ExtList.List.take 100 !results) + done + +(* Main benchmark suite runner *) +let () = + bench_ipv4_parsing_storm (); + bench_cidr_calculations (); + bench_range_operations (); + bench_mixed_format_parsing (); + bench_nat_tables (); + bench_ip_sorting (); + bench_broadcast_calculations (); + bench_complex_network_ops () diff --git a/benchmarks/ahrefs-devkit/stre_bench.ml b/benchmarks/ahrefs-devkit/stre_bench.ml new file mode 100644 index 000000000..b0b609842 --- /dev/null +++ b/benchmarks/ahrefs-devkit/stre_bench.ml @@ -0,0 +1,367 @@ +(** Stre GC Benchmark Suite + + This suite stresses the OCaml garbage collector through intensive string + manipulation operations using the Stre module. Each benchmark targets + different GC behaviors through string allocation patterns: + - Substring allocation pressure + - String splitting and concatenation + - Pattern-based operations with regular expressions + - Temporary string creation and disposal *) + +open Devkit + +(* Benchmark 1: String Split Storm (Minor GC stress) *) +let bench_split_storm () = + let retained = ref [] in + + let base_string = + String.concat "," + (List.init 1000 (fun i -> Printf.sprintf "item_%d_value_%d" i (i * 7))) + in + + for i = 1 to 1000 do + let parts1 = Stre.nsplitc base_string ',' in + let _parts2 = Stre.nsplitc_rev base_string ',' in + + let nested = String.concat "|" parts1 in + let parts3 = Stre.nsplitc nested '|' in + + if i mod 50 = 0 then retained := parts3 @ !retained; + + let count = ref 0 in + let _ = + Stre.nsplitc_fold base_string ',' + (fun acc s -> + incr count; + if !count mod 10 = 0 then retained := s :: !retained; + acc) + () + in + (); + + if List.length !retained > 1000 then + retained := ExtList.List.take 500 !retained + done + +(* Benchmark 2: Substring Slicing Pressure *) +let bench_substring_slicing () = + let retained_slices = ref [] in + + let source = String.init 100000 (fun i -> char_of_int (65 + (i mod 26))) in + + for i = 1 to 500 do + let slice_size = 10 + (i mod 1000) in + let offset = i mod (String.length source - slice_size) in + + let slice1 = Stre.from_to source offset (offset + slice_size) in + let slice2 = Stre.unsafe_from_to source offset (offset + slice_size) in + let slice3 = Stre.slice ~first:offset ~last:(offset + slice_size) source in + + for j = 0 to 9 do + let overlap_start = offset + (j * (slice_size / 10)) in + if overlap_start + slice_size < String.length source then + let overlap = + Stre.slice ~first:overlap_start + ~last:(overlap_start + slice_size) + source + in + if j mod 3 = 0 then retained_slices := overlap :: !retained_slices + done; + + if i mod 20 = 0 then + retained_slices := slice1 :: slice2 :: slice3 :: !retained_slices; + + if List.length !retained_slices > 500 then + retained_slices := ExtList.List.take 250 !retained_slices + done + +(* Benchmark 3: Pattern-based String Operations *) +let bench_pattern_operations () = + let retained_matches = ref [] in + + let text = + String.concat "\n" + (List.init 500 (fun i -> + Printf.sprintf + "Line %d: email_%d@example.com, phone: 555-%04d, code: ABC%03d" i i + (i * 13 mod 9999) + (i mod 1000))) + in + + for i = 1 to 100 do + let lines = Stre.nsplitc text '\n' in + + let processed = + List.map + (fun line -> + let parts1 = Stre.nsplitc line ':' in + let parts2 = List.concat_map (fun p -> Stre.nsplitc p ',') parts1 in + + let extracted = + List.filter_map + (fun p -> + if String.length p > 5 then Some (Stre.slice ~first:0 ~last:5 p) + else None) + parts2 + in + + String.concat "|" extracted) + lines + in + + if i mod 10 = 0 then retained_matches := processed @ !retained_matches; + + let _ = + List.map + (fun s -> + let upper = String.uppercase_ascii s in + let lower = String.lowercase_ascii s in + let reversed = + String.init (String.length s) (fun j -> + String.get s (String.length s - 1 - j)) + in + if i mod 20 = 0 then + retained_matches := upper :: lower :: reversed :: !retained_matches) + (ExtList.List.take 10 processed) + in + + if List.length !retained_matches > 1000 then + retained_matches := ExtList.List.take 500 !retained_matches + done + +(* Benchmark 4: String Concatenation Chains *) +let bench_concatenation_chains () = + let retained_chains = ref [] in + + for i = 1 to 200 do + let chain1 = ref "" in + let chain2 = ref "" in + + for j = 1 to 100 do + chain1 := !chain1 ^ string_of_int j ^ ","; + if j mod 10 = 0 then chain2 := !chain2 ^ !chain1 + done; + + let parts = Stre.nsplitc !chain1 ',' in + let rejoined1 = String.concat "|" parts in + let rejoined2 = String.concat ";" parts in + let rejoined3 = String.concat "::" parts in + + let nested = + List.fold_left + (fun acc p -> acc ^ "[" ^ p ^ "]") + "" + (ExtList.List.take 50 parts) + in + + if i mod 10 = 0 then + retained_chains := + !chain1 :: !chain2 :: rejoined1 :: rejoined2 :: rejoined3 :: nested + :: !retained_chains; + + if List.length !retained_chains > 200 then + retained_chains := ExtList.List.take 100 !retained_chains + done + +(* Benchmark 5: Enumeration-based String Processing *) +let bench_enum_string_ops () = + let retained_enums = ref [] in + + for i = 1 to 300 do + let text = + String.init 10000 (fun j -> + if j mod 100 = 0 then '\n' else char_of_int (65 + ((i + j) mod 26))) + in + + let enum1 = Stre.nsplitc_enum text '\n' in + + let processed = + Enum.map + (fun line -> + let words = Stre.nsplitc line ' ' in + String.concat "_" (List.map String.uppercase_ascii words)) + enum1 + in + + let partial = Enum.take 50 processed |> ExtList.List.of_enum in + + let enum2 = Stre.nsplitc_enum text '\n' in + let filtered = Enum.filter (fun s -> String.length s > 50) enum2 in + let filtered_list = Enum.take 20 filtered |> ExtList.List.of_enum in + + if i mod 15 = 0 then + retained_enums := partial @ filtered_list @ !retained_enums; + + let enum3 = Stre.nsplitc_enum text '\n' in + let chain = + Enum.map + (fun s -> Stre.slice ~first:0 ~last:(min 10 (String.length s)) s) + enum3 + in + let chain_list = Enum.take 30 chain |> ExtList.List.of_enum in + + if i mod 20 = 0 then retained_enums := chain_list @ !retained_enums; + + if List.length !retained_enums > 500 then + retained_enums := ExtList.List.take 250 !retained_enums + done + +(* Benchmark 6: Mixed-size String Allocations *) +let bench_mixed_size_allocations () = + let retained_mixed = Hashtbl.create 1000 in + let counter = ref 0 in + + for i = 1 to 500 do + let sizes = [| 10; 100; 1000; 50; 500; 5000; 20; 200; 2000 |] in + + Array.iter + (fun size -> + incr counter; + + let s = String.init size (fun j -> char_of_int (65 + (i * j mod 26))) in + + let chunk_size = max 1 (size / (10 + (i mod 10))) in + let chunks = ref [] in + let pos = ref 0 in + while !pos < String.length s do + let len = min chunk_size (String.length s - !pos) in + chunks := Stre.slice ~first:!pos ~last:(!pos + len) s :: !chunks; + pos := !pos + len + done; + + let processed = + List.map + (fun chunk -> + let upper = String.uppercase_ascii chunk in + let doubled = chunk ^ chunk in + if !counter mod 7 = 0 then doubled else upper) + !chunks + in + + if !counter mod 13 = 0 || !counter mod 17 = 0 then + Hashtbl.replace retained_mixed !counter processed; + + if !counter mod 100 = 0 then + Hashtbl.iter + (fun k _ -> + if k < !counter - 500 then Hashtbl.remove retained_mixed k) + retained_mixed) + sizes + done + +(* Benchmark 7: String Building with Buffers *) +let bench_string_building () = + let retained_built = ref [] in + + for i = 1 to 200 do + let direct = ref "" in + for j = 1 to 100 do + direct := !direct ^ Printf.sprintf "item_%d_%d " i j + done; + + let parts = List.init 100 (fun j -> Printf.sprintf "item_%d_%d" i j) in + let from_list = String.concat " " parts in + + let base = String.init 5000 (fun _ -> 'x') in + let substrings = + List.init 50 (fun j -> + let start = j * 100 in + let len = 50 + (j mod 50) in + Stre.slice ~first:start ~last:(start + len) base) + in + let from_subs = String.concat "-" substrings in + + let split1 = Stre.nsplitc from_list ' ' in + let rebuilt1 = String.concat "," split1 in + + let split2 = Stre.nsplitc from_subs '-' in + let rebuilt2 = String.concat ";" split2 in + + if i mod 10 = 0 then + retained_built := + !direct :: from_list :: from_subs :: rebuilt1 :: rebuilt2 + :: !retained_built; + + if List.length !retained_built > 300 then + retained_built := ExtList.List.take 150 !retained_built + done + +(* Benchmark 8: Deep String Transformation Chains *) +let bench_transformation_chains () = + let transformation_cache = Hashtbl.create 500 in + let stage_results = ref [] in + + for i = 1 to 150 do + let base = + String.concat "," + (List.init 200 (fun j -> Printf.sprintf "data_%d_%d" i j)) + in + + let stage1 = Stre.nsplitc base ',' in + let stage1_transformed = + List.map + (fun s -> + let len = String.length s in + if len > 5 then Stre.slice ~first:2 ~last:(len - 1) s else s ^ s) + stage1 + in + + let stage2 = String.concat "|" stage1_transformed in + let stage2_split = Stre.nsplitc stage2 '|' in + + let stage3 = + List.filter_map + (fun s -> + if String.length s mod 2 = 0 then Some (String.uppercase_ascii s) + else if String.length s > 3 then + Some (Stre.slice ~first:1 ~last:(String.length s - 1) s) + else None) + stage2_split + in + + let stage4 = + if List.length stage3 < 100 then + List.concat_map + (fun s1 -> + List.map + (fun s2 -> + if String.length s1 + String.length s2 < 50 then s1 ^ "_" ^ s2 + else + Stre.slice ~first:0 ~last:10 s1 + ^ "_" + ^ Stre.slice ~first:0 ~last:10 s2) + (ExtList.List.take 5 stage3)) + (ExtList.List.take 10 stage3) + else stage3 + in + + Hashtbl.replace transformation_cache i stage4; + + (if i > 10 then + match Hashtbl.find_opt transformation_cache (i - 5) with + | Some old_stage -> + let combined = + ExtList.List.take 10 stage4 @ ExtList.List.take 10 old_stage + in + stage_results := combined :: !stage_results + | None -> ()); + + if i mod 50 = 0 then + Hashtbl.iter + (fun k _ -> if k < i - 20 then Hashtbl.remove transformation_cache k) + transformation_cache; + + if List.length !stage_results > 50 then + stage_results := ExtList.List.take 25 !stage_results + done + +(* Main benchmark suite runner *) +let () = + bench_split_storm (); + bench_substring_slicing (); + bench_pattern_operations (); + bench_concatenation_chains (); + bench_enum_string_ops (); + bench_mixed_size_allocations (); + bench_string_building (); + bench_transformation_chains () diff --git a/dependencies/template/dev.opam b/dependencies/template/dev.opam index 326f15564..eeefc40b4 100644 --- a/dependencies/template/dev.opam +++ b/dependencies/template/dev.opam @@ -22,6 +22,7 @@ depends: [ "conf-pkg-config" {= "1.2"} "cppo" {= "1.6.7"} "decompress" {= "1.1.0"} + "devkit" "digestif" {= "1.0.0"} "fmt" {= "0.9.0"} "irmin" {= "3.3.2"} diff --git a/run_config.json b/run_config.json index 78c48e815..5f0eb3cce 100644 --- a/run_config.json +++ b/run_config.json @@ -1559,6 +1559,58 @@ "params": "" } ] + }, + { + "executable": "benchmarks/ahrefs-devkit/htmlStream_bench.exe", + "name": "devkit_htmlstream_bench", + "tags": [ + "10s_100s", + "macro_bench" + ], + "runs": [ + { + "params": "" + } + ] + }, + { + "executable": "benchmarks/ahrefs-devkit/stre_bench.exe", + "name": "devkit_stre_bench", + "tags": [ + "10s_100s", + "macro_bench" + ], + "runs": [ + { + "params": "" + } + ] + }, + { + "executable": "benchmarks/ahrefs-devkit/network_bench.exe", + "name": "devkit_network_bench", + "tags": [ + "10s_100s", + "macro_bench" + ], + "runs": [ + { + "params": "" + } + ] + }, + { + "executable": "benchmarks/ahrefs-devkit/gzip_bench.exe", + "name": "devkit_gzip_bench", + "tags": [ + "10s_100s", + "macro_bench" + ], + "runs": [ + { + "params": "" + } + ] } ] }