diff --git a/.github/workflows/zig.yml b/.github/workflows/zig.yml index 4aebb9f..2cc0e46 100644 --- a/.github/workflows/zig.yml +++ b/.github/workflows/zig.yml @@ -10,69 +10,69 @@ jobs: c-build-gcc: name: C Build (GCC) runs-on: ubuntu-latest - + steps: - uses: actions/checkout@v4 - + - name: Check GCC version run: gcc --version - + - name: Build with GCC (C17 and C23) run: cd c && make clean && make gcc-builds - + - name: Test GCC C17 executable run: cd c && echo -e "hi\npython\nexit" | timeout 5 ./chat-gcc-c17 || true - + - name: Test GCC C23 executable run: cd c && echo -e "hi\npython\nexit" | timeout 5 ./chat-gcc-c23 || true - + - name: Show binary sizes run: ls -lh c/chat-gcc-* c-build-clang: name: C Build (Clang) runs-on: ubuntu-latest - + steps: - uses: actions/checkout@v4 - + - name: Install Clang run: sudo apt-get update && sudo apt-get install -y clang - + - name: Check Clang version run: clang --version - + - name: Build with Clang (C17 and C23) run: cd c && make clean && make clang-builds - + - name: Test Clang C17 executable run: cd c && echo -e "hi\npython\nexit" | timeout 5 ./chat-clang-c17 || true - + - name: Test Clang C23 executable run: cd c && echo -e "hi\npython\nexit" | timeout 5 ./chat-clang-c23 || true - + - name: Show binary sizes run: ls -lh c/chat-clang-* zig-build-and-test: name: Zig Build & Test runs-on: ubuntu-latest - + steps: - uses: actions/checkout@v4 - + - uses: goto-bus-stop/setup-zig@v2 with: - version: 0.15.2 - + version: 0.16.0 + - name: Build Zig chatbot run: cd zig && zig build - + - name: Run Zig tests run: cd zig && zig build test - + - name: Test Zig executable runs run: cd zig && echo -e "hi\npython\nexit" | timeout 5 ./zig-out/bin/chat || true - + - name: Show binary size run: ls -lh zig/zig-out/bin/chat diff --git a/test_inputs.txt b/test_inputs.txt index 32c832f..c61cff7 100644 --- a/test_inputs.txt +++ b/test_inputs.txt @@ -1,4 +1,5 @@ hi +pure hey python light diff --git a/zig/build.zig b/zig/build.zig index 51157f2..f2ce570 100644 --- a/zig/build.zig +++ b/zig/build.zig @@ -34,4 +34,29 @@ pub fn build(b: *std.Build) void { const run_test = b.addRunArtifact(tests); const test_step = b.step("test", "Run tests"); test_step.dependOn(&run_test.step); + + // Fuzz tests + const fuzz_tests = b.addTest(.{ + .root_module = b.createModule(.{ + .root_source_file = b.path("src/fuzz.zig"), + .target = target, + .optimize = optimize, + }), + }); + + const run_fuzz_test = b.addRunArtifact(fuzz_tests); + const fuzz_step = b.step("fuzz", "Run fuzz tests"); + fuzz_step.dependOn(&run_fuzz_test.step); + + // Fuzz executable for use with external fuzzers (afl-fuzz, etc.) + const fuzz_exe = b.addExecutable(.{ + .name = "fuzz", + .root_module = b.createModule(.{ + .root_source_file = b.path("src/fuzz.zig"), + .target = target, + .optimize = .ReleaseSafe, // keep safety checks for fuzzing + }), + }); + + b.installArtifact(fuzz_exe); } diff --git a/zig/src/chatbot.zig b/zig/src/chatbot.zig index 8197282..3da3f70 100644 --- a/zig/src/chatbot.zig +++ b/zig/src/chatbot.zig @@ -115,7 +115,7 @@ pub const HashTable = struct { }; test "create_hashtable" { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa: std.heap.DebugAllocator(.{}) = .init; defer _ = gpa.deinit(); const allocator = gpa.allocator(); @@ -126,7 +126,7 @@ test "create_hashtable" { } test "set_and_get" { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa: std.heap.DebugAllocator(.{}) = .init; defer _ = gpa.deinit(); const allocator = gpa.allocator(); @@ -141,7 +141,7 @@ test "set_and_get" { } test "get_nonexistent_key" { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa: std.heap.DebugAllocator(.{}) = .init; defer _ = gpa.deinit(); const allocator = gpa.allocator(); @@ -154,7 +154,7 @@ test "get_nonexistent_key" { } test "update_existing_key" { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa: std.heap.DebugAllocator(.{}) = .init; defer _ = gpa.deinit(); const allocator = gpa.allocator(); @@ -170,7 +170,7 @@ test "update_existing_key" { } test "multiple_entries" { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var gpa: std.heap.DebugAllocator(.{}) = .init; defer _ = gpa.deinit(); const allocator = gpa.allocator(); diff --git a/zig/src/fuzz.zig b/zig/src/fuzz.zig new file mode 100644 index 0000000..cc631fd --- /dev/null +++ b/zig/src/fuzz.zig @@ -0,0 +1,183 @@ +const std = @import("std"); +const chatbot = @import("chatbot.zig"); +const HashTable = chatbot.HashTable; + +/// Fuzz target: hammer the hash table with arbitrary key/value pairs +/// Tests for memory safety, correct get-after-set semantics, and collision handling +/// +/// To use with external fuzzer (afl-fuzz, honggfuzz, etc.), pipe input: +/// echo -n "fuzz_data" | zig-out/bin/fuzz +pub fn main(init: std.process.Init) !void { + // use a fixed-buffer allocator that will catch OOM without crashing + var buffer: [1024 * 1024]u8 = undefined; + var fba = std.heap.FixedBufferAllocator.init(&buffer); + const allocator = fba.allocator(); + + var ht = HashTable.create(allocator, 64) catch return; + defer ht.destroy(); + + // read stdin for fuzz input + const io = init.io; + const stdin_file = std.Io.File.stdin(); + var input_buf: [1024 * 512]u8 = undefined; + var total_read: usize = 0; + + // read until EOF using streaming reads + while (total_read < input_buf.len) { + const chunk = stdin_file.readStreaming(io, &.{input_buf[total_read..]}) catch break; + if (chunk == 0) break; + total_read += chunk; + } + + fuzzHashTable(ht, allocator, input_buf[0..total_read]) catch return; +} + +fn fuzzHashTable(ht: *HashTable, _: std.mem.Allocator, input: []const u8) !void { + var offset: usize = 0; + + while (offset + 4 <= input.len) { + // read key length (1 byte, capped to 255) + const key_len = @min(input[offset], 127); + offset += 1; + if (offset + key_len > input.len) break; + + const key = input[offset .. offset + key_len]; + offset += key_len; + + // read value length (1 byte, capped) + if (offset >= input.len) break; + const val_len = @min(input[offset], 127); + offset += 1; + if (offset + val_len > input.len) break; + + const value = input[offset .. offset + val_len]; + offset += val_len; + + // read operation byte: 0 = set, 1 = get, 2 = set+get verify + if (offset >= input.len) break; + const op = input[offset] % 3; + offset += 1; + + switch (op) { + 0 => { + // set operation + ht.set(key, value) catch continue; + }, + 1 => { + // get operation - just exercise the code path + _ = ht.get(key); + }, + 2 => { + // set then verify get returns the same value + ht.set(key, value) catch continue; + const result = ht.get(key); + if (result) |r| { + // value must match what we just set + if (!std.mem.eql(u8, r, value)) { + // this would be a bug! + @panic("get returned wrong value after set"); + } + } else { + @panic("get returned null immediately after set"); + } + }, + else => unreachable, + } + } + + // stress test: verify all empty key edge case + ht.set("", "empty_key_value") catch {}; + _ = ht.get(""); + + // stress test: same key multiple times to test update path + var i: usize = 0; + while (i < @min(input.len, 100)) : (i += 1) { + var val_buf: [8]u8 = undefined; + const val_slice = std.fmt.bufPrint(&val_buf, "{d}", .{i}) catch break; + ht.set("stress", val_slice) catch break; + } + + // final verification + if (ht.get("stress")) |v| { + // value should be the last iteration number + _ = v; + } +} + +test "fuzz_empty_input" { + var gpa: std.heap.DebugAllocator(.{}) = .init; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + var ht = try HashTable.create(allocator, 64); + defer ht.destroy(); + + try fuzzHashTable(ht, allocator, ""); +} + +test "fuzz_minimal_input" { + var gpa: std.heap.DebugAllocator(.{}) = .init; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + var ht = try HashTable.create(allocator, 64); + defer ht.destroy(); + + // format: key_len, key, val_len, val, op + const input = "\x03key\x05value\x02"; + try fuzzHashTable(ht, allocator, input); + + // verify it actually set + const result = ht.get("key"); + try std.testing.expect(result != null); + try std.testing.expectEqualSlices(u8, result.?, "value"); +} + +test "fuzz_collision_stress" { + var gpa: std.heap.DebugAllocator(.{}) = .init; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + // small table to force collisions + var ht = try HashTable.create(allocator, 4); + defer ht.destroy(); + + // generate input that will cause hash collisions + var input_buf: [1024]u8 = undefined; + var offset: usize = 0; + + var i: u8 = 0; + while (i < 50) : (i += 1) { + if (offset + 10 > input_buf.len) break; + input_buf[offset] = 4; // key len + offset += 1; + input_buf[offset] = 'k'; + input_buf[offset + 1] = 'e'; + input_buf[offset + 2] = 'y'; + input_buf[offset + 3] = '0' + (i % 10); + offset += 4; + input_buf[offset] = 1; // val len + offset += 1; + input_buf[offset] = 'a' + i; + offset += 1; + input_buf[offset] = 2; // set+verify op + offset += 1; + } + + try fuzzHashTable(ht, allocator, input_buf[0..offset]); +} + +test "fuzz_special_chars" { + var gpa: std.heap.DebugAllocator(.{}) = .init; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + var ht = try HashTable.create(allocator, 64); + defer ht.destroy(); + + // keys/values with null bytes, high bytes, separators + const nasty_input = "\x05\x00\x01\x02\xff\xfe\x05\x00\x01\x02\xff\xfe\x02" ++ + "\x06 .,\"\n\x00\x03abc\x02"; + + try fuzzHashTable(ht, allocator, nasty_input); +} diff --git a/zig/src/main.zig b/zig/src/main.zig index f8d67df..1490c65 100644 --- a/zig/src/main.zig +++ b/zig/src/main.zig @@ -1,17 +1,15 @@ const std = @import("std"); const chatbot = @import("chatbot.zig"); -pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - const allocator = gpa.allocator(); +pub fn main(init: std.process.Init) !void { + const allocator = init.gpa; + const io = init.io; - // Zig 0.15 I/O: explicit buffer management var stdout_buf: [4096]u8 = undefined; - var stdout = std.fs.File.stdout().writer(&stdout_buf); + var stdout = std.Io.File.stdout().writerStreaming(io, &stdout_buf); var stdin_buf: [4096]u8 = undefined; - var stdin = std.fs.File.stdin().reader(&stdin_buf); + var stdin = std.Io.File.stdin().readerStreaming(io, &stdin_buf); try stdout.interface.print("$ Chatbot v1.0.0!\n", .{}); try stdout.interface.flush(); @@ -32,12 +30,9 @@ pub fn main() !void { try stdout.interface.print("\n$ (user) ", .{}); try stdout.interface.flush(); - // Read line using Zig 0.15 delimiter API - // Use takeDelimiter which returns null on EOF with empty remaining const line = stdin.interface.takeDelimiter('\n') catch |err| { switch (err) { error.StreamTooLong => { - // Line too long, skip it continue; }, else => return err, @@ -47,7 +42,7 @@ pub fn main() !void { if (line == null) break; const trimmed = std.mem.trim(u8, line.?, " \t\r\n"); - if (trimmed.len == 0) continue; // Empty line, keep going + if (trimmed.len == 0) continue; var word_iter = std.mem.tokenizeAny(u8, trimmed, chatbot.SeparatorChars);