diff --git a/.github/workflows/ci-tap.yml b/.github/workflows/ci-tap.yml index 0d7727a..5a6ac03 100644 --- a/.github/workflows/ci-tap.yml +++ b/.github/workflows/ci-tap.yml @@ -73,28 +73,19 @@ jobs: cmake --build build --parallel cmake --install build - - name: Start ClickHouse + - name: Start ClickHouse and OTel collector run: | - docker rm -f clickhouse-test 2>/dev/null || true - docker run -d --name clickhouse-test \ - --network host \ - clickhouse/clickhouse-server:26.1 - # Wait for ClickHouse to be ready - for i in {1..30}; do - if curl -sf 'http://localhost:8123/' --data 'SELECT 1' >/dev/null 2>&1; then - echo "ClickHouse ready" - exit 0 + docker compose -f docker/docker-compose.test.yml up -d --wait + docker compose -f docker/docker-compose.otel.yml up -d + # Poll OTel health endpoint (distroless image has no shell for healthcheck) + for i in $(seq 1 30); do + if curl -sf http://localhost:13133/ >/dev/null 2>&1; then + echo "OTel collector ready" + break fi - echo "Waiting for ClickHouse... ($i/30)" + echo "Waiting for OTel collector... ($i/30)" sleep 1 done - echo "ClickHouse not ready after 30s" - docker logs clickhouse-test - exit 1 - - - name: Initialize ClickHouse schema - run: | - docker exec clickhouse-test clickhouse-client --multiquery < docker/init/00-schema.sql - name: Run TAP tests run: | @@ -110,6 +101,8 @@ jobs: name: tap-test-logs path: t/tmp_check/ - - name: Cleanup ClickHouse + - name: Cleanup if: always() - run: docker rm -f clickhouse-test 2>/dev/null || true + run: | + docker compose -f docker/docker-compose.test.yml down --volumes 2>/dev/null || true + docker compose -f docker/docker-compose.otel.yml down --volumes 2>/dev/null || true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d590f0b..13d908b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -131,7 +131,7 @@ jobs: -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build - run: cmake --build build_unit --target hostname_test --parallel + run: cmake --build build_unit --parallel - name: Run unit tests run: ctest --test-dir build_unit --output-on-failure diff --git a/CMakeLists.txt b/CMakeLists.txt index 259bd92..6372ece 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,6 +79,7 @@ target_include_directories(pg_stat_ch SYSTEM PRIVATE ${CMAKE_SOURCE_DIR}/third_party/opentelemetry-cpp/api/include ${CMAKE_SOURCE_DIR}/third_party/opentelemetry-cpp/sdk/include ${CMAKE_SOURCE_DIR}/third_party/opentelemetry-cpp/exporters/otlp/include + ${CMAKE_SOURCE_DIR}/third_party/opentelemetry-cpp/third_party/nlohmann-json/single_include ) target_link_libraries(pg_stat_ch PRIVATE PostgreSQLServer::PostgreSQLServer @@ -149,6 +150,19 @@ if(PSCH_BUILD_UNIT_TESTS) ) pg_stat_ch_set_warnings(hostname_test) + add_executable(sqlcommenter_parse_test + test/unit/sqlcommenter_parse_test.cc + src/export/sqlcommenter_parse.cc + ) + target_include_directories(sqlcommenter_parse_test PRIVATE src include) + target_include_directories(sqlcommenter_parse_test SYSTEM PRIVATE + ${CMAKE_SOURCE_DIR}/third_party/opentelemetry-cpp/third_party/nlohmann-json/single_include + ) + target_compile_features(sqlcommenter_parse_test PRIVATE cxx_std_17) + target_link_libraries(sqlcommenter_parse_test PRIVATE GTest::gtest_main) + pg_stat_ch_set_warnings(sqlcommenter_parse_test) + include(GoogleTest) gtest_discover_tests(hostname_test) + gtest_discover_tests(sqlcommenter_parse_test) endif() diff --git a/docker/docker-compose.otel.yml b/docker/docker-compose.otel.yml index 4cf52c3..7c5dcd4 100644 --- a/docker/docker-compose.otel.yml +++ b/docker/docker-compose.otel.yml @@ -9,8 +9,5 @@ services: - "4317:4317" # gRPC OTLP receiver (matches psch_otel_endpoint default) - "9091:9090" # Prometheus metrics exporter (host:9091 → container:9090) - "13133:13133" # Health check HTTP endpoint - healthcheck: - test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:13133/"] - interval: 5s - timeout: 5s - retries: 10 + # Note: no healthcheck — the contrib image is distroless (no shell/curl/wget). + # Use host-side polling (curl localhost:13133) to verify readiness. diff --git a/docker/init/00-schema.sql b/docker/init/00-schema.sql index 4b15ef2..9aca1f6 100644 --- a/docker/init/00-schema.sql +++ b/docker/init/00-schema.sql @@ -59,6 +59,8 @@ CREATE TABLE pg_stat_ch.events_raw query String COMMENT 'Full SQL query text (may be truncated). Used for debugging and query analysis.', + labels String DEFAULT '{}' COMMENT 'Query labels from sqlcommenter comments (key=value pairs in /* */ blocks). Access subpaths directly: labels.controller, labels.action. Empty {} when no labels present. See: https://google.github.io/sqlcommenter/', + -- ======================================================================== -- Shared buffer metrics (main buffer cache) -- ======================================================================== diff --git a/docker/migrations/001_add_labels_column.sql b/docker/migrations/001_add_labels_column.sql new file mode 100644 index 0000000..ef4c626 --- /dev/null +++ b/docker/migrations/001_add_labels_column.sql @@ -0,0 +1,15 @@ +-- Migration 001: Add labels column for sqlcommenter support +-- +-- This migration adds the `labels` column to `events_raw` for existing +-- installations. New installations already include this column via +-- docker/init/00-schema.sql. +-- +-- Run with: +-- clickhouse-client < docker/migrations/001_add_labels_column.sql +-- +-- Safe to re-run: ALTER TABLE ADD COLUMN IF NOT EXISTS is idempotent. + +ALTER TABLE pg_stat_ch.events_raw + ADD COLUMN IF NOT EXISTS labels String DEFAULT '{}' + COMMENT 'Query labels from sqlcommenter comments (key=value pairs in /* */ blocks). Access subpaths directly: labels.controller, labels.action. Empty {} when no labels present. See: https://google.github.io/sqlcommenter/' + AFTER query; diff --git a/docs/guides/clickhouse.md b/docs/guides/clickhouse.md index 99f2c60..341fc66 100644 --- a/docs/guides/clickhouse.md +++ b/docs/guides/clickhouse.md @@ -80,6 +80,32 @@ Four materialized views provide pre-aggregated analytics: For view schemas, query patterns, and the `-State`/`-Merge` aggregation pattern, see [materialized views](/reference/materialized-views). +## Schema migrations + +When upgrading pg_stat_ch, new columns or schema changes may be required. Migration scripts are provided in [`docker/migrations/`](https://github.com/ClickHouse/pg_stat_ch/tree/main/docker/migrations) and are safe to re-run (idempotent). + +Apply all pending migrations: + +```bash +for f in docker/migrations/*.sql; do + clickhouse-client < "$f" +done +``` + +Or apply a specific migration: + +```bash +clickhouse-client < docker/migrations/001_add_labels_column.sql +``` + +| Migration | Version | Description | +|---|---|---| +| `001_add_labels_column.sql` | 0.2+ | Adds `labels JSON` column for [sqlcommenter](https://google.github.io/sqlcommenter/) query label support | + + +New installations using `docker/init/00-schema.sql` already include all schema changes. Migrations are only needed for existing ClickHouse instances. + + ## Data retention The `events_raw` table has no TTL by default. To limit storage, add a TTL: diff --git a/docs/reference/events-schema.mdx b/docs/reference/events-schema.mdx index c163154..41149ad 100644 --- a/docs/reference/events-schema.mdx +++ b/docs/reference/events-schema.mdx @@ -30,6 +30,16 @@ The table is partitioned by date (`toDate(ts_start)`) and ordered by `ts_start` Query normalization replaces literals with placeholders (`$N`). This means `SELECT * FROM users WHERE id = 42` becomes `SELECT * FROM users WHERE id = $1`. No passwords, tokens, or PII are exported in query text. +## Query labels + +| Column | Type | Description | +|---|---|---| +| `labels` | `String DEFAULT '{}'` | Key-value labels extracted from [sqlcommenter](https://google.github.io/sqlcommenter/) comments appended to the query. For example, `/* controller='users',action='show' */` produces `{"controller":"users","action":"show"}`. Access subpaths directly in ClickHouse: `labels.controller`, `labels.action`. Empty `{}` when no labels are present. | + + +Labels are parsed from the **last** `/* */` comment block in the query text. The parser supports URL-encoded values and escaped single quotes per the sqlcommenter specification. Controlled by the [`pg_stat_ch.track_labels`](/reference/configuration) GUC (default: `true`). + + ## Shared buffer usage These columns show how queries interact with PostgreSQL's `shared_buffers` cache. The cache hit ratio for a query is `shared_blks_hit / (shared_blks_hit + shared_blks_read)`. Target above 99% for OLTP workloads. diff --git a/include/config/guc.h b/include/config/guc.h index 70d9273..2d6b27c 100644 --- a/include/config/guc.h +++ b/include/config/guc.h @@ -28,6 +28,7 @@ extern int psch_otel_log_batch_size; extern int psch_otel_log_max_bytes; extern int psch_otel_log_delay_ms; extern int psch_otel_metric_interval_ms; +extern bool psch_track_labels; extern bool psch_debug_force_locked_overflow; // Initialize GUC variables diff --git a/src/config/guc.cc b/src/config/guc.cc index 9535025..7c1b088 100644 --- a/src/config/guc.cc +++ b/src/config/guc.cc @@ -32,6 +32,7 @@ int psch_otel_log_batch_size = 8192; int psch_otel_log_max_bytes = 3 * 1024 * 1024; // 3 MiB: gRPC default max is 4 MiB int psch_otel_log_delay_ms = 100; int psch_otel_metric_interval_ms = 5000; +bool psch_track_labels = true; bool psch_debug_force_locked_overflow = false; // Log level options (matches PostgreSQL's server_message_level_options pattern) @@ -307,6 +308,17 @@ void PschInitGuc(void) { PGC_SUSET, 0, nullptr, nullptr, nullptr); + DefineCustomBoolVariable( + "pg_stat_ch.track_labels", + "Extract sqlcommenter labels from query comments.", + "When enabled, the background worker parses /* key='value' */ comments " + "and exports structured labels to ClickHouse (JSON) or OTel (attributes).", + &psch_track_labels, + true, + PGC_SIGHUP, + 0, + nullptr, nullptr, nullptr); + DefineCustomBoolVariable( "pg_stat_ch.debug_force_locked_overflow", "Force HandleOverflow in locked path (debug/test only).", diff --git a/src/export/clickhouse_exporter.cc b/src/export/clickhouse_exporter.cc index 743c537..4455a46 100644 --- a/src/export/clickhouse_exporter.cc +++ b/src/export/clickhouse_exporter.cc @@ -75,10 +75,15 @@ class ClickHouseExporter : public StatsExporter { shared_ptr> DbOperationColumn() final { return TagString("cmd_type"); } shared_ptr> DbQueryTextColumn() final { return RecordString("query"); } + void AppendLabels(const ParseResult& labels) final { + labels_col_->Append(SerializeLabelsJson(labels)); + } + void BeginBatch() final { block = std::make_unique(); columns.clear(); exported_count = 0; + labels_col_ = Wrap("labels"); } void BeginRow() final { ++exported_count; } bool CommitBatch() final; @@ -116,6 +121,7 @@ class ClickHouseExporter : public StatsExporter { std::unique_ptr client; std::unique_ptr block; std::vector> columns; + shared_ptr> labels_col_; int consecutive_failures = 0; int exported_count = 0; }; diff --git a/src/export/exporter_interface.h b/src/export/exporter_interface.h index 183ecb7..d87ea9e 100644 --- a/src/export/exporter_interface.h +++ b/src/export/exporter_interface.h @@ -6,6 +6,8 @@ #include #include +#include "export/sqlcommenter_parse.h" + class StatsExporter { protected: using string = std::string; @@ -65,6 +67,10 @@ class StatsExporter { virtual shared_ptr> DbOperationColumn() = 0; // Query text. CH: RecordString "query"; OTel semconv: "db.query.text". virtual shared_ptr> DbQueryTextColumn() = 0; + // Query labels from sqlcommenter comments. Called inside the event loop. + // CH: serializes to JSON, appends to a String "labels" column; + // OTel: sets per-label attributes prefixed with "db.query.label.". + virtual void AppendLabels(const ParseResult& labels) = 0; virtual void BeginBatch() = 0; virtual void BeginRow() = 0; diff --git a/src/export/otel_exporter.cc b/src/export/otel_exporter.cc index cef7290..3ddfef7 100644 --- a/src/export/otel_exporter.cc +++ b/src/export/otel_exporter.cc @@ -129,6 +129,15 @@ class OTelExporter : public StatsExporter { shared_ptr> DbQueryTextColumn() final { return Wrap>("db.query.text"); } + void AppendLabels(const ParseResult& labels) final { + for (int i = 0; i < labels.count; ++i) { + string attr_name = "db.query.label."; + attr_name.append(labels.labels[i].key.data(), labels.labels[i].key.size()); + string val(labels.labels[i].value); + current_log_record->SetAttribute(attr_name, val); + current_row_tags[attr_name] = std::move(val); + } + } bool EstablishNewConnection() final; bool IsConnected() const final { return metrics_provider && log_provider; } diff --git a/src/export/sqlcommenter_parse.cc b/src/export/sqlcommenter_parse.cc new file mode 100644 index 0000000..e5de3c9 --- /dev/null +++ b/src/export/sqlcommenter_parse.cc @@ -0,0 +1,180 @@ +#include "export/sqlcommenter_parse.h" + +#include + +#include + +namespace { + +int DecodeHexByte(char hi, char lo) { + char buf[2] = {hi, lo}; + unsigned value = 0; + auto [ptr, ec] = std::from_chars(buf, buf + 2, value, 16); + if (ec != std::errc{} || ptr != buf + 2) + return -1; + return static_cast(value); +} + +bool IsWhitespace(char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} + +bool IsKeyTerminator(char c) { + return c == '=' || c == ',' || IsWhitespace(c); +} + +bool NeedsDecode(std::string_view sv) { + return sv.find('%') != std::string_view::npos || sv.find('\\') != std::string_view::npos; +} + +// Meta-unescape (\' -> ') and URL-decode (%XX) in a single pass. +// Per sqlcommenter spec, meta unescaping happens before URL decoding, +// but the two transforms don't overlap so a combined pass is equivalent. +size_t MetaUnescapeAndUrlDecode(std::string_view src, char* dst, size_t max_len) { + size_t written = 0; + size_t i = 0; + while (i < src.size() && written < max_len) { + if (src[i] == '\\' && i + 1 < src.size() && src[i + 1] == '\'') { + dst[written++] = '\''; + i += 2; + } else if (src[i] == '%' && i + 2 < src.size()) { + int byte = DecodeHexByte(src[i + 1], src[i + 2]); + if (byte >= 0) { + dst[written++] = static_cast(byte); + i += 3; + continue; + } + dst[written++] = src[i++]; + } else { + dst[written++] = src[i++]; + } + } + return written; +} + +// Decode a raw field (meta-unescape + URL-decode) into buf, or truncate if no decoding needed. +std::string_view DecodeField(std::string_view raw, char* buf, size_t max_len) { + if (NeedsDecode(raw)) { + size_t len = MetaUnescapeAndUrlDecode(raw, buf, max_len); + return {buf, len}; + } + return raw.substr(0, max_len); +} + +// Lightweight scanner for walking through a sqlcommenter comment. +struct Scanner { + std::string_view text; + size_t pos = 0; + + bool AtEnd() const { return pos >= text.size(); } + + void SkipWhitespace() { + while (!AtEnd() && IsWhitespace(text[pos])) + ++pos; + } + + // Consume a specific character; returns false without advancing if not matched. + bool Consume(char expected) { + if (AtEnd() || text[pos] != expected) + return false; + ++pos; + return true; + } + + // Scan a key: sequence of non-terminator characters. + std::string_view ScanKey() { + size_t start = pos; + while (!AtEnd() && !IsKeyTerminator(text[pos])) + ++pos; + return text.substr(start, pos - start); + } + + // Scan a single-quoted value, handling \' escapes per sqlcommenter spec. + // Assumes the opening quote was already consumed. + // Returns the raw content (before decoding). Sets *ok = false on unterminated quote. + std::string_view ScanQuotedValue(bool* ok) { + size_t start = pos; + while (!AtEnd()) { + if (text[pos] == '\\' && pos + 1 < text.size() && text[pos + 1] == '\'') { + pos += 2; + } else if (text[pos] == '\'') { + auto val = text.substr(start, pos - start); + ++pos; + *ok = true; + return val; + } else { + ++pos; + } + } + *ok = false; + return {}; + } +}; + +} // namespace + +std::string_view ExtractLastComment(std::string_view query) { + auto end_pos = query.rfind("*/"); + if (end_pos == std::string_view::npos) + return {}; + + auto start_pos = query.rfind("/*", end_pos); + if (start_pos == std::string_view::npos) + return {}; + + size_t content_start = start_pos + 2; + if (content_start >= end_pos) + return {}; + return query.substr(content_start, end_pos - content_start); +} + +ParseResult ParseSqlcommenter(std::string_view comment) { + ParseResult result; + Scanner scan{comment}; + + while (!scan.AtEnd() && result.count < kMaxLabels) { + scan.SkipWhitespace(); + + auto raw_key = scan.ScanKey(); + if (raw_key.empty()) { + if (!scan.AtEnd()) + ++scan.pos; + continue; + } + + scan.SkipWhitespace(); + if (!scan.Consume('=')) + continue; + scan.SkipWhitespace(); + if (!scan.Consume('\'')) + continue; + + bool ok = false; + auto raw_value = scan.ScanQuotedValue(&ok); + if (!ok) + break; + + Label& label = result.labels[result.count++]; + label.key = DecodeField(raw_key, label.decoded_key, kMaxKeyLen); + label.value = DecodeField(raw_value, label.decoded_value, kMaxValueLen); + + scan.SkipWhitespace(); + scan.Consume(','); + } + + if (result.count == kMaxLabels) { + scan.SkipWhitespace(); + if (!scan.AtEnd()) + result.truncated = true; + } + + return result; +} + +std::string SerializeLabelsJson(const ParseResult& result) { + nlohmann::ordered_json obj = nlohmann::ordered_json::object(); + for (int i = 0; i < result.count; ++i) { + obj[std::string(result.labels[i].key)] = std::string(result.labels[i].value); + } + return obj.dump(-1, ' ', false, nlohmann::ordered_json::error_handler_t::replace); +} diff --git a/src/export/sqlcommenter_parse.h b/src/export/sqlcommenter_parse.h new file mode 100644 index 0000000..f120a4b --- /dev/null +++ b/src/export/sqlcommenter_parse.h @@ -0,0 +1,38 @@ +#ifndef PG_STAT_CH_SRC_EXPORT_SQLCOMMENTER_PARSE_H_ +#define PG_STAT_CH_SRC_EXPORT_SQLCOMMENTER_PARSE_H_ + +#include +#include +#include + +constexpr int kMaxLabels = 16; +constexpr int kMaxKeyLen = 32; +constexpr int kMaxValueLen = 128; + +struct Label { + std::string_view key; // Points into query buffer or decoded_key + std::string_view value; // Points into query buffer or decoded_value + char decoded_key[kMaxKeyLen]; + char decoded_value[kMaxValueLen]; +}; + +struct ParseResult { + std::array labels; + int count = 0; + bool truncated = false; +}; + +// Find the last /* ... */ comment in query text. +// Returns the content between delimiters (exclusive), or empty if not found. +std::string_view ExtractLastComment(std::string_view query); + +// Parse sqlcommenter format: key1='value1',key2='value2' +// URL-decodes percent-encoded values per sqlcommenter spec. +// Keys truncated to 32 chars, values to 128 chars. +ParseResult ParseSqlcommenter(std::string_view comment); + +// Serialize parsed labels to JSON: {"key1":"val1","key2":"val2"} +// Returns "{}" if count == 0. +std::string SerializeLabelsJson(const ParseResult& result); + +#endif // PG_STAT_CH_SRC_EXPORT_SQLCOMMENTER_PARSE_H_ diff --git a/src/export/stats_exporter.cc b/src/export/stats_exporter.cc index e11af99..51cc60b 100644 --- a/src/export/stats_exporter.cc +++ b/src/export/stats_exporter.cc @@ -15,6 +15,7 @@ extern "C" { #include "export/clickhouse_exporter.h" #include "export/exporter_interface.h" #include "export/otel_exporter.h" +#include "export/sqlcommenter_parse.h" #include "export/stats_exporter.h" #include "queue/event.h" #include "queue/shmem.h" @@ -201,6 +202,18 @@ void ExportEventStats(const std::vector& events, StatsExporter* expor "client_addr_len"); col_app->Append(std::string(ev.application_name, alen)); col_client_addr->Append(std::string(ev.client_addr, clen)); + + if (psch_track_labels) { + std::string_view query_text(ev.query, qlen); + std::string_view comment = ExtractLastComment(query_text); + if (!comment.empty()) { + exporter->AppendLabels(ParseSqlcommenter(comment)); + } else { + exporter->AppendLabels(ParseResult{}); + } + } else { + exporter->AppendLabels(ParseResult{}); + } } elog(DEBUG1, "pg_stat_ch: finished processing %zu events", events.size()); } diff --git a/t/024_otel_export.pl b/t/024_otel_export.pl index e5e7e60..4c9cbf2 100755 --- a/t/024_otel_export.pl +++ b/t/024_otel_export.pl @@ -14,7 +14,7 @@ use psch; # Skip if Docker not available -if (!system("docker ps >/dev/null 2>&1") == 0) { +if (system("docker ps >/dev/null 2>&1") != 0) { plan skip_all => 'Docker not available, skipping OTel tests'; } @@ -26,8 +26,9 @@ # Initialize node with OTel export enabled my $node = psch_init_node_with_otel('otel_export', - flush_interval_ms => 100, - batch_max => 100, + flush_interval_ms => 100, + batch_max => 100, + otel_metric_interval_ms => 1000, ); # Test 1: Basic export - run queries and verify events are exported @@ -48,8 +49,11 @@ my $stats = psch_get_stats($node); is($stats->{send_failures}, 0, 'No send failures'); + # Wait for OTel periodic metric reader to export (1s interval + margin) + sleep(3); + # Verify metrics arrived at the collector (Prometheus endpoint) - my $count = psch_get_otel_histogram_total('pg_stat_ch_duration_us_unit'); + my $count = psch_get_otel_histogram_total('pg_stat_ch_db_client_operation_duration_seconds'); cmp_ok($count, '>=', 3, "duration_us metric has >= 3 observations (got $count)"); }; @@ -95,7 +99,8 @@ $node->safe_psql('postgres', 'DROP TABLE test_otel_labels'); $node->safe_psql('postgres', 'SELECT pg_stat_ch_flush()'); - sleep(2); + # Wait for OTel periodic metric reader to export (1s interval + margin) + sleep(3); # service.name resource attribute is mapped to the job label by the Prometheus exporter my $prometheus_output = `curl -s 'http://localhost:9091/metrics' 2>/dev/null`; @@ -103,7 +108,7 @@ 'metrics carry job="pg_stat_ch" label (from service.name resource attribute)'); # db tag should appear as a label on duration_us observations - ok(psch_otel_metric_has_label('pg_stat_ch_duration_us_unit', 'db', 'postgres'), + ok(psch_otel_metric_has_label('pg_stat_ch_db_client_operation_duration_seconds', 'db_name', 'postgres'), 'duration_us metric has db="postgres" label'); # rows metric captures how many rows were returned/affected @@ -114,7 +119,7 @@ my $sum = 0; for my $line (split /\n/, $prometheus_output) { next if $line =~ /^#/; - if ($line =~ /^pg_stat_ch_duration_us_unit_sum(?:\{[^}]*\})?\s+(\d+(?:\.\d+)?)/) { + if ($line =~ /^pg_stat_ch_db_client_operation_duration_seconds_sum(?:\{[^}]*\})?\s+(\S+)/) { $sum += $1; } } diff --git a/t/025_otel_reconnect.pl b/t/025_otel_reconnect.pl index 2ac042c..f0cad4b 100755 --- a/t/025_otel_reconnect.pl +++ b/t/025_otel_reconnect.pl @@ -14,7 +14,7 @@ use psch; # Skip if Docker not available -if (!system("docker ps >/dev/null 2>&1") == 0) { +if (system("docker ps >/dev/null 2>&1") != 0) { plan skip_all => 'Docker not available, skipping OTel tests'; } diff --git a/t/032_query_labels.pl b/t/032_query_labels.pl new file mode 100644 index 0000000..79ca39f --- /dev/null +++ b/t/032_query_labels.pl @@ -0,0 +1,179 @@ +#!/usr/bin/env perl +# Test: Query labels via sqlcommenter comments → ClickHouse JSON column +# Prerequisites: ClickHouse container must be running +# docker compose -f docker/docker-compose.test.yml up -d + +use strict; +use warnings; +use lib 't'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +use psch; + +# Skip if Docker/ClickHouse not available +if (!psch_clickhouse_available()) { + plan skip_all => 'Docker not available, skipping ClickHouse labels tests'; +} + +my $ch_check = `curl -s 'http://localhost:18123/' --data 'SELECT 1' 2>/dev/null`; +if ($ch_check !~ /^1/) { + plan skip_all => + 'ClickHouse container not running. Start with: docker compose -f docker/docker-compose.test.yml up -d'; +} + +psch_query_clickhouse("TRUNCATE TABLE IF EXISTS pg_stat_ch.events_raw"); + +my $node = psch_init_node_with_clickhouse('ch_labels', + flush_interval_ms => 100, + batch_max => 100, +); + +# Test 1: Query with sqlcommenter labels appears with JSON labels in CH +subtest 'basic labels export' => sub { + psch_query_clickhouse("TRUNCATE TABLE pg_stat_ch.events_raw"); + psch_reset_stats($node); + + $node->safe_psql('postgres', + "SELECT 1 /* controller='users',action='show' */"); + + $node->safe_psql('postgres', 'SELECT pg_stat_ch_flush()'); + + my $labels = psch_wait_for_clickhouse_query( + "SELECT labels FROM pg_stat_ch.events_raw " + . "WHERE query LIKE '%controller%' LIMIT 1", + sub { $_[0] ne '' && $_[0] ne '{}' }, + 10 + ); + + like($labels, qr/controller/, 'labels JSON contains controller key'); + like($labels, qr/users/, 'labels JSON contains users value'); + like($labels, qr/action/, 'labels JSON contains action key'); + like($labels, qr/show/, 'labels JSON contains show value'); +}; + +# Test 2: Query without comment gets empty labels +subtest 'no comment produces empty labels' => sub { + psch_query_clickhouse("TRUNCATE TABLE pg_stat_ch.events_raw"); + psch_reset_stats($node); + + $node->safe_psql('postgres', 'CREATE TABLE nocomment_test(id int)'); + $node->safe_psql('postgres', 'DROP TABLE nocomment_test'); + $node->safe_psql('postgres', 'SELECT pg_stat_ch_flush()'); + + my $labels = psch_wait_for_clickhouse_query( + "SELECT labels FROM pg_stat_ch.events_raw " + . "WHERE query LIKE '%nocomment_test%' LIMIT 1", + sub { defined $_[0] && $_[0] ne '' }, + 10 + ); + + like($labels, qr/^\{\}$/, 'no-comment query has empty {} labels'); +}; + +# Test 3: Multiple label keys +subtest 'multiple labels' => sub { + psch_query_clickhouse("TRUNCATE TABLE pg_stat_ch.events_raw"); + psch_reset_stats($node); + + $node->safe_psql('postgres', + "SELECT 1 /* controller='orders',action='index',framework='rails',db_driver='pg' */"); + + $node->safe_psql('postgres', 'SELECT pg_stat_ch_flush()'); + + # Query individual label values via JSONExtractString + my $controller = psch_wait_for_clickhouse_query( + "SELECT JSONExtractString(labels, 'controller') FROM pg_stat_ch.events_raw " + . "WHERE query LIKE '%orders%' LIMIT 1", + sub { $_[0] =~ /orders/ }, + 10 + ); + like($controller, qr/orders/, 'labels.controller = orders'); + + my $framework = psch_query_clickhouse( + "SELECT JSONExtractString(labels, 'framework') FROM pg_stat_ch.events_raw " + . "WHERE query LIKE '%orders%' LIMIT 1"); + like($framework, qr/rails/, 'labels.framework = rails'); +}; + +# Test 4: track_labels = off produces empty labels +subtest 'track_labels off' => sub { + psch_query_clickhouse("TRUNCATE TABLE pg_stat_ch.events_raw"); + psch_reset_stats($node); + + $node->safe_psql('postgres', + "ALTER SYSTEM SET pg_stat_ch.track_labels = off"); + $node->reload(); + sleep(2); # Give bgworker time to pick up SIGHUP + + $node->safe_psql('postgres', + "CREATE TABLE labels_off_test(id int)"); + $node->safe_psql('postgres', + "DROP TABLE labels_off_test"); + + $node->safe_psql('postgres', 'SELECT pg_stat_ch_flush()'); + + # Wait for rows to arrive in ClickHouse first + psch_wait_for_clickhouse_query( + "SELECT count() FROM pg_stat_ch.events_raw WHERE query LIKE '%labels_off_test%'", + sub { $_[0] >= 1 }, + 10 + ); + + my $labels = psch_query_clickhouse( + "SELECT labels FROM pg_stat_ch.events_raw " + . "WHERE query LIKE '%labels_off_test%' LIMIT 1"); + like($labels, qr/^\{\}$/, 'track_labels=off produces empty labels'); + + # Re-enable + $node->safe_psql('postgres', + "ALTER SYSTEM SET pg_stat_ch.track_labels = on"); + $node->reload(); + sleep(1); +}; + +# Test 5: URL-encoded values are decoded +subtest 'url encoded values' => sub { + psch_query_clickhouse("TRUNCATE TABLE pg_stat_ch.events_raw"); + psch_reset_stats($node); + + $node->safe_psql('postgres', + "SELECT 1 /* route='/api/users%3Fid%3D1' */"); + + $node->safe_psql('postgres', 'SELECT pg_stat_ch_flush()'); + + my $labels = psch_wait_for_clickhouse_query( + "SELECT labels FROM pg_stat_ch.events_raw " + . "WHERE query LIKE '%route%' LIMIT 1", + sub { $_[0] ne '' && $_[0] ne '{}' }, + 10 + ); + + like($labels, qr{/api/users\?id=1}, 'URL-encoded value decoded correctly'); +}; + +# Test 6: Only last comment is extracted +subtest 'multiple comments takes last' => sub { + psch_query_clickhouse("TRUNCATE TABLE pg_stat_ch.events_raw"); + psch_reset_stats($node); + + $node->safe_psql('postgres', + "SELECT /* first='ignored' */ 1 /* second='captured' */"); + + $node->safe_psql('postgres', 'SELECT pg_stat_ch_flush()'); + + my $labels = psch_wait_for_clickhouse_query( + "SELECT labels FROM pg_stat_ch.events_raw " + . "WHERE query LIKE '%second%' LIMIT 1", + sub { $_[0] ne '' && $_[0] ne '{}' }, + 10 + ); + + like($labels, qr/captured/, 'last comment labels exported'); + unlike($labels, qr/ignored/, 'first comment labels not exported'); +}; + +$node->stop(); +done_testing(); diff --git a/t/psch.pm b/t/psch.pm index 51dcb2b..26126dd 100644 --- a/t/psch.pm +++ b/t/psch.pm @@ -240,12 +240,13 @@ sub psch_stop_otelcol { sub psch_init_node_with_otel { my ($name, %opts) = @_; - my $queue_capacity = $opts{queue_capacity} // 65536; - my $flush_interval_ms = $opts{flush_interval_ms} // 100; # Fast flush for tests - my $batch_max = $opts{batch_max} // 1000; - my $enabled = $opts{enabled} // 'on'; - my $otel_endpoint = $opts{otel_endpoint} // 'localhost:4317'; - my $hostname = $opts{hostname} // 'test-host'; + my $queue_capacity = $opts{queue_capacity} // 65536; + my $flush_interval_ms = $opts{flush_interval_ms} // 100; # Fast flush for tests + my $batch_max = $opts{batch_max} // 1000; + my $enabled = $opts{enabled} // 'on'; + my $otel_endpoint = $opts{otel_endpoint} // 'localhost:4317'; + my $hostname = $opts{hostname} // 'test-host'; + my $otel_metric_interval_ms = $opts{otel_metric_interval_ms} // 1000; my $node = PostgreSQL::Test::Cluster->new($name); $node->init(); @@ -258,6 +259,7 @@ pg_stat_ch.batch_max = $batch_max pg_stat_ch.use_otel = on pg_stat_ch.otel_endpoint = '$otel_endpoint' pg_stat_ch.hostname = '$hostname' +pg_stat_ch.otel_metric_interval_ms = $otel_metric_interval_ms }); $node->start(); $node->safe_psql('postgres', 'CREATE EXTENSION pg_stat_ch'); diff --git a/test/regression/expected/guc.out b/test/regression/expected/guc.out index 833de5d..3d31689 100644 --- a/test/regression/expected/guc.out +++ b/test/regression/expected/guc.out @@ -24,8 +24,9 @@ SELECT name FROM pg_settings WHERE name LIKE 'pg_stat_ch.%' ORDER BY name COLLAT pg_stat_ch.otel_metric_interval_ms pg_stat_ch.queue_capacity pg_stat_ch.string_area_size + pg_stat_ch.track_labels pg_stat_ch.use_otel -(22 rows) +(23 rows) SHOW pg_stat_ch.enabled; pg_stat_ch.enabled @@ -67,6 +68,13 @@ SHOW pg_stat_ch.log_min_elevel; warning (1 row) +-- Test track_labels GUC (PGC_SIGHUP: can't SET, only ALTER SYSTEM + reload) +SHOW pg_stat_ch.track_labels; + pg_stat_ch.track_labels +------------------------- + on +(1 row) + -- Test that clickhouse_password is hidden from non-superusers (issue #3) CREATE ROLE psch_test_user LOGIN; SET ROLE psch_test_user; diff --git a/test/regression/sql/guc.sql b/test/regression/sql/guc.sql index 39c95ee..a543ca0 100644 --- a/test/regression/sql/guc.sql +++ b/test/regression/sql/guc.sql @@ -11,6 +11,8 @@ SET pg_stat_ch.log_min_elevel = 'notice'; SHOW pg_stat_ch.log_min_elevel; RESET pg_stat_ch.log_min_elevel; SHOW pg_stat_ch.log_min_elevel; +-- Test track_labels GUC (PGC_SIGHUP: can't SET, only ALTER SYSTEM + reload) +SHOW pg_stat_ch.track_labels; -- Test that clickhouse_password is hidden from non-superusers (issue #3) CREATE ROLE psch_test_user LOGIN; SET ROLE psch_test_user; diff --git a/test/unit/sqlcommenter_parse_test.cc b/test/unit/sqlcommenter_parse_test.cc new file mode 100644 index 0000000..9b706d3 --- /dev/null +++ b/test/unit/sqlcommenter_parse_test.cc @@ -0,0 +1,547 @@ +#include + +#include +#include + +#include "export/sqlcommenter_parse.h" + +// ============================================================================ +// ExtractLastComment tests +// ============================================================================ + +TEST(ExtractLastComment, BasicComment) { + auto result = ExtractLastComment("SELECT 1 /* hello */"); + EXPECT_EQ(result, " hello "); +} + +TEST(ExtractLastComment, NoComment) { + auto result = ExtractLastComment("SELECT 1"); + EXPECT_TRUE(result.empty()); +} + +TEST(ExtractLastComment, EmptyComment) { + auto result = ExtractLastComment("SELECT 1 /**/"); + EXPECT_TRUE(result.empty()); +} + +TEST(ExtractLastComment, MultipleComments) { + auto result = ExtractLastComment("SELECT /* first */ 1 /* second */"); + EXPECT_EQ(result, " second "); +} + +TEST(ExtractLastComment, CommentWithSqlcommenter) { + auto result = ExtractLastComment( + "SELECT * FROM users WHERE id = $1 /* controller='users',action='show' */"); + EXPECT_EQ(result, " controller='users',action='show' "); +} + +TEST(ExtractLastComment, NoClosingDelimiter) { + auto result = ExtractLastComment("SELECT 1 /* unclosed"); + EXPECT_TRUE(result.empty()); +} + +TEST(ExtractLastComment, NoOpeningDelimiter) { + auto result = ExtractLastComment("SELECT 1 */"); + EXPECT_TRUE(result.empty()); +} + +TEST(ExtractLastComment, CommentAtStart) { + auto result = ExtractLastComment("/* comment */ SELECT 1"); + EXPECT_EQ(result, " comment "); +} + +TEST(ExtractLastComment, WhitespaceOnlyComment) { + auto result = ExtractLastComment("SELECT 1 /* */"); + EXPECT_EQ(result, " "); +} + +TEST(ExtractLastComment, NormalizedQueryWithPlaceholders) { + auto result = ExtractLastComment( + "SELECT * FROM users WHERE id = $1 AND name = $2 /* controller='users' */"); + EXPECT_EQ(result, " controller='users' "); +} + +TEST(ExtractLastComment, EmptyQuery) { + auto result = ExtractLastComment(""); + EXPECT_TRUE(result.empty()); +} + +TEST(ExtractLastComment, AdjacentComments) { + auto result = ExtractLastComment("SELECT 1 /* a *//* b */"); + EXPECT_EQ(result, " b "); +} + +// ============================================================================ +// ParseSqlcommenter tests +// ============================================================================ + +TEST(ParseSqlcommenter, BasicPairs) { + auto result = ParseSqlcommenter("controller='users',action='show'"); + EXPECT_EQ(result.count, 2); + EXPECT_EQ(result.labels[0].key, "controller"); + EXPECT_EQ(result.labels[0].value, "users"); + EXPECT_EQ(result.labels[1].key, "action"); + EXPECT_EQ(result.labels[1].value, "show"); + EXPECT_FALSE(result.truncated); +} + +TEST(ParseSqlcommenter, SinglePair) { + auto result = ParseSqlcommenter("key='value'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].key, "key"); + EXPECT_EQ(result.labels[0].value, "value"); +} + +TEST(ParseSqlcommenter, WithWhitespace) { + auto result = ParseSqlcommenter(" controller = 'users' , action = 'show' "); + EXPECT_EQ(result.count, 2); + EXPECT_EQ(result.labels[0].key, "controller"); + EXPECT_EQ(result.labels[0].value, "users"); + EXPECT_EQ(result.labels[1].key, "action"); + EXPECT_EQ(result.labels[1].value, "show"); +} + +TEST(ParseSqlcommenter, EmptyComment) { + auto result = ParseSqlcommenter(""); + EXPECT_EQ(result.count, 0); +} + +TEST(ParseSqlcommenter, WhitespaceOnly) { + auto result = ParseSqlcommenter(" \t "); + EXPECT_EQ(result.count, 0); +} + +TEST(ParseSqlcommenter, EmptyValue) { + auto result = ParseSqlcommenter("key=''"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].key, "key"); + EXPECT_EQ(result.labels[0].value, ""); +} + +TEST(ParseSqlcommenter, UrlEncodedValue) { + auto result = ParseSqlcommenter("key='hello%20world'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "hello world"); +} + +TEST(ParseSqlcommenter, UrlEncodedComma) { + auto result = ParseSqlcommenter("key='a%2Cb'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "a,b"); +} + +TEST(ParseSqlcommenter, UrlEncodedSingleQuote) { + auto result = ParseSqlcommenter("key='it%27s'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "it's"); +} + +TEST(ParseSqlcommenter, UrlEncodedKey) { + auto result = ParseSqlcommenter("my%20key='value'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].key, "my key"); +} + +TEST(ParseSqlcommenter, MalformedPercentEncoding) { + // %ZZ is not valid hex; should be passed through literally + auto result = ParseSqlcommenter("key='%ZZfoo'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "%ZZfoo"); +} + +TEST(ParseSqlcommenter, PercentAtEnd) { + // Trailing % without two hex digits + auto result = ParseSqlcommenter("key='foo%'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "foo%"); +} + +TEST(ParseSqlcommenter, PercentWithOneChar) { + auto result = ParseSqlcommenter("key='foo%2'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "foo%2"); +} + +TEST(ParseSqlcommenter, KeyTruncation) { + // Key longer than 32 chars should be truncated + std::string long_key(50, 'a'); + std::string comment = long_key + "='value'"; + auto result = ParseSqlcommenter(comment); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].key.size(), static_cast(kMaxKeyLen)); +} + +TEST(ParseSqlcommenter, ValueTruncation) { + // Value longer than 128 chars should be truncated + std::string long_value(200, 'b'); + std::string comment = "key='" + long_value + "'"; + auto result = ParseSqlcommenter(comment); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value.size(), static_cast(kMaxValueLen)); +} + +TEST(ParseSqlcommenter, MaxLabels) { + std::string comment; + for (int i = 0; i < kMaxLabels; ++i) { + if (i > 0) comment += ','; + comment += "k" + std::to_string(i) + "='v" + std::to_string(i) + "'"; + } + auto result = ParseSqlcommenter(comment); + EXPECT_EQ(result.count, kMaxLabels); + EXPECT_FALSE(result.truncated); +} + +TEST(ParseSqlcommenter, MoreThanMaxLabels) { + std::string comment; + for (int i = 0; i < kMaxLabels + 3; ++i) { + if (i > 0) comment += ','; + comment += "k" + std::to_string(i) + "='v" + std::to_string(i) + "'"; + } + auto result = ParseSqlcommenter(comment); + EXPECT_EQ(result.count, kMaxLabels); + EXPECT_TRUE(result.truncated); +} + +TEST(ParseSqlcommenter, MissingEquals) { + auto result = ParseSqlcommenter("key'value'"); + EXPECT_EQ(result.count, 0); +} + +TEST(ParseSqlcommenter, MissingOpenQuote) { + auto result = ParseSqlcommenter("key=value'"); + EXPECT_EQ(result.count, 0); +} + +TEST(ParseSqlcommenter, MissingCloseQuote) { + auto result = ParseSqlcommenter("key='value"); + EXPECT_EQ(result.count, 0); +} + +TEST(ParseSqlcommenter, MixedValidAndInvalid) { + auto result = ParseSqlcommenter("good='yes',bad,also_good='yep'"); + EXPECT_EQ(result.count, 2); + EXPECT_EQ(result.labels[0].key, "good"); + EXPECT_EQ(result.labels[0].value, "yes"); + EXPECT_EQ(result.labels[1].key, "also_good"); + EXPECT_EQ(result.labels[1].value, "yep"); +} + +TEST(ParseSqlcommenter, TrailingComma) { + auto result = ParseSqlcommenter("key='value',"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].key, "key"); +} + +TEST(ParseSqlcommenter, LeadingWhitespace) { + auto result = ParseSqlcommenter(" key='value'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].key, "key"); +} + +TEST(ParseSqlcommenter, RealWorldSqlcommenter) { + auto result = ParseSqlcommenter( + "controller='users',action='show',framework='rails',db_driver='pg'"); + EXPECT_EQ(result.count, 4); + EXPECT_EQ(result.labels[0].key, "controller"); + EXPECT_EQ(result.labels[0].value, "users"); + EXPECT_EQ(result.labels[1].key, "action"); + EXPECT_EQ(result.labels[1].value, "show"); + EXPECT_EQ(result.labels[2].key, "framework"); + EXPECT_EQ(result.labels[2].value, "rails"); + EXPECT_EQ(result.labels[3].key, "db_driver"); + EXPECT_EQ(result.labels[3].value, "pg"); +} + +// ============================================================================ +// Spec compliance: meta character escaping (\' → ') +// Per sqlcommenter spec, single quotes in values are escaped as \' +// ============================================================================ + +TEST(ParseSqlcommenter, EscapedSingleQuoteInValue) { + // key='it\'s' → value is "it's" + auto result = ParseSqlcommenter("key='it\\'s'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].key, "key"); + EXPECT_EQ(result.labels[0].value, "it's"); +} + +TEST(ParseSqlcommenter, MultipleEscapedQuotesInValue) { + // key='can\'t won\'t' → value is "can't won't" + auto result = ParseSqlcommenter("key='can\\'t won\\'t'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "can't won't"); +} + +TEST(ParseSqlcommenter, EscapedQuoteAtValueStart) { + auto result = ParseSqlcommenter("key='\\'hello'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "'hello"); +} + +TEST(ParseSqlcommenter, EscapedQuoteAtValueEnd) { + auto result = ParseSqlcommenter("key='hello\\''"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "hello'"); +} + +TEST(ParseSqlcommenter, EscapedQuoteWithUrlEncoding) { + // Meta unescaping and URL decoding combined + auto result = ParseSqlcommenter("key='it\\'s%20great'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "it's great"); +} + +TEST(ParseSqlcommenter, BackslashNotFollowedByQuote) { + // Backslash followed by non-quote char is kept literally + // Input string: key='path\to' (one backslash before 't') + auto result = ParseSqlcommenter("key='path\\to'"); + EXPECT_EQ(result.count, 1); + EXPECT_EQ(result.labels[0].value, "path\\to"); +} + +// ============================================================================ +// Spec compliance: spec exhibit round-trip +// ============================================================================ + +TEST(ParseSqlcommenter, SpecExhibitParsing) { + // From the sqlcommenter spec exhibit: + // action='%2Fparam*d',controller='index',framework='spring', + // traceparent='00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01', + // tracestate='congo%3Dt61rcWkgMzE%2Crojo%3D00f067aa0ba902b7' + auto result = ParseSqlcommenter( + "action='%2Fparam*d'," + "controller='index'," + "framework='spring'," + "traceparent='00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01'," + "tracestate='congo%3Dt61rcWkgMzE%2Crojo%3D00f067aa0ba902b7'"); + EXPECT_EQ(result.count, 5); + EXPECT_EQ(result.labels[0].key, "action"); + EXPECT_EQ(result.labels[0].value, "/param*d"); + EXPECT_EQ(result.labels[1].key, "controller"); + EXPECT_EQ(result.labels[1].value, "index"); + EXPECT_EQ(result.labels[2].key, "framework"); + EXPECT_EQ(result.labels[2].value, "spring"); + EXPECT_EQ(result.labels[3].key, "traceparent"); + EXPECT_EQ(result.labels[3].value, + "00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01"); + EXPECT_EQ(result.labels[4].key, "tracestate"); + EXPECT_EQ(result.labels[4].value, "congo=t61rcWkgMzE,rojo=00f067aa0ba902b7"); +} + +TEST(ParseSqlcommenter, SpecExhibitFullPipeline) { + // Full pipeline: extract comment from the spec exhibit SQL, parse, serialize + std::string_view query = + "SELECT * FROM FOO " + "/*action='%2Fparam*d',controller='index',framework='spring'," + "traceparent='00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01'," + "tracestate='congo%3Dt61rcWkgMzE%2Crojo%3D00f067aa0ba902b7'*/"; + auto comment = ExtractLastComment(query); + EXPECT_FALSE(comment.empty()); + auto parsed = ParseSqlcommenter(comment); + EXPECT_EQ(parsed.count, 5); + EXPECT_EQ(parsed.labels[0].value, "/param*d"); + EXPECT_EQ(parsed.labels[4].value, "congo=t61rcWkgMzE,rojo=00f067aa0ba902b7"); +} + +TEST(ParseSqlcommenter, UrlEncodedSpecialChars) { + // Slash, equals, ampersand — common in route and tracestate values + auto result = ParseSqlcommenter( + "route='%2Fpolls%201000',state='k1%3Dv1%26k2%3Dv2'"); + EXPECT_EQ(result.count, 2); + EXPECT_EQ(result.labels[0].key, "route"); + EXPECT_EQ(result.labels[0].value, "/polls 1000"); + EXPECT_EQ(result.labels[1].key, "state"); + EXPECT_EQ(result.labels[1].value, "k1=v1&k2=v2"); +} + +// ============================================================================ +// Spec compliance: real-world samples and edge cases +// ============================================================================ + +TEST(ParseSqlcommenter, RealWorldDjangoSample) { + // Exact sample from the sqlcommenter homepage (Django ORM output) + auto result = ParseSqlcommenter( + "controller='index',db_driver='django.db.backends.postgresql'," + "framework='django%3A2.2.1',route='%5Epolls/%24'," + "traceparent='00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01'," + "tracestate='congo%3Dt61rcWkgMzE%2Crojo%3D00f067aa0ba902b7'"); + EXPECT_EQ(result.count, 6); + EXPECT_EQ(result.labels[0].key, "controller"); + EXPECT_EQ(result.labels[0].value, "index"); + EXPECT_EQ(result.labels[1].key, "db_driver"); + EXPECT_EQ(result.labels[1].value, "django.db.backends.postgresql"); + EXPECT_EQ(result.labels[2].key, "framework"); + EXPECT_EQ(result.labels[2].value, "django:2.2.1"); + EXPECT_EQ(result.labels[3].key, "route"); + EXPECT_EQ(result.labels[3].value, "^polls/$"); + EXPECT_EQ(result.labels[4].key, "traceparent"); + EXPECT_EQ(result.labels[4].value, + "00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01"); + EXPECT_EQ(result.labels[5].key, "tracestate"); + EXPECT_EQ(result.labels[5].value, "congo=t61rcWkgMzE,rojo=00f067aa0ba902b7"); +} + +TEST(ExtractLastComment, MultiLineComment) { + // sqlcommenter comments can span multiple lines in logs + auto result = ExtractLastComment( + "INSERT INTO t1 VALUES ($1, $2)\n" + "/*controller='index',\n" + "framework='django%3A2.2.1'*/"); + EXPECT_EQ(result, + "controller='index',\n" + "framework='django%3A2.2.1'"); +} + +TEST(ExtractLastComment, MultipleBlocksReturnsLast) { + // Non-sqlcommenter comment followed by sqlcommenter — only last is returned + auto result = ExtractLastComment( + "SELECT /*+ IndexScan(t1) */ * FROM t1 " + "/* controller='index',action='show' */"); + EXPECT_EQ(result, " controller='index',action='show' "); +} + +TEST(ExtractLastComment, DashDashCommentIgnored) { + // -- style comments don't affect /* */ extraction + auto result = ExtractLastComment( + "SELECT * FROM t1 -- this is a line comment\n" + "/* controller='index' */"); + EXPECT_EQ(result, " controller='index' "); +} + +TEST(EndToEnd, RealWorldDjangoFullPipeline) { + // Full pipeline with the real-world Django sample from the sqlcommenter docs + std::string_view query = + "INSERT INTO \"polls_question\" (\"question_text\", \"pub_date\") VALUES " + "($1, $2) RETURNING \"polls_question\".\"id\" " + "/*controller='index',db_driver='django.db.backends.postgresql'," + "framework='django%3A2.2.1',route='%5Epolls/%24'," + "traceparent='00-5bd66ef5095369c7b0d1f8f4bd33716a-c532cb4098ac3dd2-01'," + "tracestate='congo%3Dt61rcWkgMzE%2Crojo%3D00f067aa0ba902b7'*/"; + auto comment = ExtractLastComment(query); + EXPECT_FALSE(comment.empty()); + auto parsed = ParseSqlcommenter(comment); + EXPECT_EQ(parsed.count, 6); + EXPECT_EQ(parsed.labels[2].value, "django:2.2.1"); + EXPECT_EQ(parsed.labels[3].value, "^polls/$"); + std::string json = SerializeLabelsJson(parsed); + // Verify JSON contains all keys + EXPECT_NE(json.find("\"controller\""), std::string::npos); + EXPECT_NE(json.find("\"traceparent\""), std::string::npos); + EXPECT_NE(json.find("\"tracestate\""), std::string::npos); +} + +TEST(EndToEnd, NonSqlcommenterLastComment) { + // If the last comment is not sqlcommenter format, no labels are extracted + std::string_view query = + "SELECT * FROM t1 /* controller='index' */ WHERE id = $1 /* regular comment */"; + auto comment = ExtractLastComment(query); + EXPECT_EQ(comment, " regular comment "); + auto parsed = ParseSqlcommenter(comment); + EXPECT_EQ(parsed.count, 0); +} + +TEST(EndToEnd, MultiLineCommentParsing) { + std::string_view query = + "SELECT $1\n" + "/*controller='index',\n" + "framework='django%3A2.2.1',\n" + "route='%5Epolls/%24'*/"; + auto comment = ExtractLastComment(query); + auto parsed = ParseSqlcommenter(comment); + EXPECT_EQ(parsed.count, 3); + EXPECT_EQ(parsed.labels[0].value, "index"); + EXPECT_EQ(parsed.labels[1].value, "django:2.2.1"); + EXPECT_EQ(parsed.labels[2].value, "^polls/$"); +} + +// ============================================================================ +// SerializeLabelsJson tests +// ============================================================================ + +TEST(SerializeLabelsJson, EmptyResult) { + ParseResult result; + EXPECT_EQ(SerializeLabelsJson(result), "{}"); +} + +TEST(SerializeLabelsJson, SingleLabel) { + ParseResult result; + result.count = 1; + result.labels[0].key = "controller"; + result.labels[0].value = "users"; + EXPECT_EQ(SerializeLabelsJson(result), R"({"controller":"users"})"); +} + +TEST(SerializeLabelsJson, MultipleLabels) { + ParseResult result; + result.count = 2; + result.labels[0].key = "controller"; + result.labels[0].value = "users"; + result.labels[1].key = "action"; + result.labels[1].value = "show"; + EXPECT_EQ(SerializeLabelsJson(result), + R"({"controller":"users","action":"show"})"); +} + +TEST(SerializeLabelsJson, EscapesDoubleQuotes) { + ParseResult result; + result.count = 1; + result.labels[0].key = "key"; + result.labels[0].value = "val\"ue"; + EXPECT_EQ(SerializeLabelsJson(result), R"({"key":"val\"ue"})"); +} + +TEST(SerializeLabelsJson, EscapesBackslash) { + ParseResult result; + result.count = 1; + result.labels[0].key = "key"; + result.labels[0].value = "path\\to"; + EXPECT_EQ(SerializeLabelsJson(result), R"({"key":"path\\to"})"); +} + +TEST(SerializeLabelsJson, EscapesControlChars) { + ParseResult result; + result.count = 1; + result.labels[0].key = "key"; + result.labels[0].value = "line1\nline2"; + EXPECT_EQ(SerializeLabelsJson(result), R"({"key":"line1\nline2"})"); +} + +// ============================================================================ +// End-to-end: ExtractLastComment -> ParseSqlcommenter -> SerializeLabelsJson +// ============================================================================ + +TEST(EndToEnd, FullPipeline) { + std::string_view query = + "SELECT * FROM users WHERE id = $1 " + "/* controller='users',action='show',job='UserSync' */"; + auto comment = ExtractLastComment(query); + EXPECT_FALSE(comment.empty()); + + auto parsed = ParseSqlcommenter(comment); + EXPECT_EQ(parsed.count, 3); + + std::string json = SerializeLabelsJson(parsed); + EXPECT_EQ(json, + R"({"controller":"users","action":"show","job":"UserSync"})"); +} + +TEST(EndToEnd, QueryWithNoComment) { + std::string_view query = "SELECT * FROM users WHERE id = $1"; + auto comment = ExtractLastComment(query); + EXPECT_TRUE(comment.empty()); + // No comment → empty ParseResult → "{}" + ParseResult result; + EXPECT_EQ(SerializeLabelsJson(result), "{}"); +} + +TEST(EndToEnd, UrlEncodedLabels) { + std::string_view query = + "SELECT 1 /* app='my%20app',route='/api/users%3Fid%3D1' */"; + auto comment = ExtractLastComment(query); + auto parsed = ParseSqlcommenter(comment); + EXPECT_EQ(parsed.count, 2); + EXPECT_EQ(parsed.labels[0].key, "app"); + EXPECT_EQ(parsed.labels[0].value, "my app"); + EXPECT_EQ(parsed.labels[1].key, "route"); + EXPECT_EQ(parsed.labels[1].value, "/api/users?id=1"); +}