diff --git a/.github/workflows/ci-tap.yml b/.github/workflows/ci-tap.yml
index 0d7727a..5a6ac03 100644
--- a/.github/workflows/ci-tap.yml
+++ b/.github/workflows/ci-tap.yml
@@ -73,28 +73,19 @@ jobs:
cmake --build build --parallel
cmake --install build
- - name: Start ClickHouse
+ - name: Start ClickHouse and OTel collector
run: |
- docker rm -f clickhouse-test 2>/dev/null || true
- docker run -d --name clickhouse-test \
- --network host \
- clickhouse/clickhouse-server:26.1
- # Wait for ClickHouse to be ready
- for i in {1..30}; do
- if curl -sf 'http://localhost:8123/' --data 'SELECT 1' >/dev/null 2>&1; then
- echo "ClickHouse ready"
- exit 0
+ docker compose -f docker/docker-compose.test.yml up -d --wait
+ docker compose -f docker/docker-compose.otel.yml up -d
+ # Poll OTel health endpoint (distroless image has no shell for healthcheck)
+ for i in $(seq 1 30); do
+ if curl -sf http://localhost:13133/ >/dev/null 2>&1; then
+ echo "OTel collector ready"
+ break
fi
- echo "Waiting for ClickHouse... ($i/30)"
+ echo "Waiting for OTel collector... ($i/30)"
sleep 1
done
- echo "ClickHouse not ready after 30s"
- docker logs clickhouse-test
- exit 1
-
- - name: Initialize ClickHouse schema
- run: |
- docker exec clickhouse-test clickhouse-client --multiquery < docker/init/00-schema.sql
- name: Run TAP tests
run: |
@@ -110,6 +101,8 @@ jobs:
name: tap-test-logs
path: t/tmp_check/
- - name: Cleanup ClickHouse
+ - name: Cleanup
if: always()
- run: docker rm -f clickhouse-test 2>/dev/null || true
+ run: |
+ docker compose -f docker/docker-compose.test.yml down --volumes 2>/dev/null || true
+ docker compose -f docker/docker-compose.otel.yml down --volumes 2>/dev/null || true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d590f0b..13d908b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -131,7 +131,7 @@ jobs:
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
- name: Build
- run: cmake --build build_unit --target hostname_test --parallel
+ run: cmake --build build_unit --parallel
- name: Run unit tests
run: ctest --test-dir build_unit --output-on-failure
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 259bd92..6372ece 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,6 +79,7 @@ target_include_directories(pg_stat_ch SYSTEM PRIVATE
${CMAKE_SOURCE_DIR}/third_party/opentelemetry-cpp/api/include
${CMAKE_SOURCE_DIR}/third_party/opentelemetry-cpp/sdk/include
${CMAKE_SOURCE_DIR}/third_party/opentelemetry-cpp/exporters/otlp/include
+ ${CMAKE_SOURCE_DIR}/third_party/opentelemetry-cpp/third_party/nlohmann-json/single_include
)
target_link_libraries(pg_stat_ch PRIVATE
PostgreSQLServer::PostgreSQLServer
@@ -149,6 +150,19 @@ if(PSCH_BUILD_UNIT_TESTS)
)
pg_stat_ch_set_warnings(hostname_test)
+ add_executable(sqlcommenter_parse_test
+ test/unit/sqlcommenter_parse_test.cc
+ src/export/sqlcommenter_parse.cc
+ )
+ target_include_directories(sqlcommenter_parse_test PRIVATE src include)
+ target_include_directories(sqlcommenter_parse_test SYSTEM PRIVATE
+ ${CMAKE_SOURCE_DIR}/third_party/opentelemetry-cpp/third_party/nlohmann-json/single_include
+ )
+ target_compile_features(sqlcommenter_parse_test PRIVATE cxx_std_17)
+ target_link_libraries(sqlcommenter_parse_test PRIVATE GTest::gtest_main)
+ pg_stat_ch_set_warnings(sqlcommenter_parse_test)
+
include(GoogleTest)
gtest_discover_tests(hostname_test)
+ gtest_discover_tests(sqlcommenter_parse_test)
endif()
diff --git a/docker/docker-compose.otel.yml b/docker/docker-compose.otel.yml
index 4cf52c3..7c5dcd4 100644
--- a/docker/docker-compose.otel.yml
+++ b/docker/docker-compose.otel.yml
@@ -9,8 +9,5 @@ services:
- "4317:4317" # gRPC OTLP receiver (matches psch_otel_endpoint default)
- "9091:9090" # Prometheus metrics exporter (host:9091 → container:9090)
- "13133:13133" # Health check HTTP endpoint
- healthcheck:
- test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:13133/"]
- interval: 5s
- timeout: 5s
- retries: 10
+ # Note: no healthcheck — the contrib image is distroless (no shell/curl/wget).
+ # Use host-side polling (curl localhost:13133) to verify readiness.
diff --git a/docker/init/00-schema.sql b/docker/init/00-schema.sql
index 4b15ef2..9aca1f6 100644
--- a/docker/init/00-schema.sql
+++ b/docker/init/00-schema.sql
@@ -59,6 +59,8 @@ CREATE TABLE pg_stat_ch.events_raw
query String COMMENT 'Full SQL query text (may be truncated). Used for debugging and query analysis.',
+ labels String DEFAULT '{}' COMMENT 'Query labels from sqlcommenter comments (key=value pairs in /* */ blocks). Access subpaths directly: labels.controller, labels.action. Empty {} when no labels present. See: https://google.github.io/sqlcommenter/',
+
-- ========================================================================
-- Shared buffer metrics (main buffer cache)
-- ========================================================================
diff --git a/docker/migrations/001_add_labels_column.sql b/docker/migrations/001_add_labels_column.sql
new file mode 100644
index 0000000..ef4c626
--- /dev/null
+++ b/docker/migrations/001_add_labels_column.sql
@@ -0,0 +1,15 @@
+-- Migration 001: Add labels column for sqlcommenter support
+--
+-- This migration adds the `labels` column to `events_raw` for existing
+-- installations. New installations already include this column via
+-- docker/init/00-schema.sql.
+--
+-- Run with:
+-- clickhouse-client < docker/migrations/001_add_labels_column.sql
+--
+-- Safe to re-run: ALTER TABLE ADD COLUMN IF NOT EXISTS is idempotent.
+
+ALTER TABLE pg_stat_ch.events_raw
+ ADD COLUMN IF NOT EXISTS labels String DEFAULT '{}'
+ COMMENT 'Query labels from sqlcommenter comments (key=value pairs in /* */ blocks). Access subpaths directly: labels.controller, labels.action. Empty {} when no labels present. See: https://google.github.io/sqlcommenter/'
+ AFTER query;
diff --git a/docs/guides/clickhouse.md b/docs/guides/clickhouse.md
index 99f2c60..341fc66 100644
--- a/docs/guides/clickhouse.md
+++ b/docs/guides/clickhouse.md
@@ -80,6 +80,32 @@ Four materialized views provide pre-aggregated analytics:
For view schemas, query patterns, and the `-State`/`-Merge` aggregation pattern, see [materialized views](/reference/materialized-views).
+## Schema migrations
+
+When upgrading pg_stat_ch, new columns or schema changes may be required. Migration scripts are provided in [`docker/migrations/`](https://github.com/ClickHouse/pg_stat_ch/tree/main/docker/migrations) and are safe to re-run (idempotent).
+
+Apply all pending migrations:
+
+```bash
+for f in docker/migrations/*.sql; do
+ clickhouse-client < "$f"
+done
+```
+
+Or apply a specific migration:
+
+```bash
+clickhouse-client < docker/migrations/001_add_labels_column.sql
+```
+
+| Migration | Version | Description |
+|---|---|---|
+| `001_add_labels_column.sql` | 0.2+ | Adds `labels JSON` column for [sqlcommenter](https://google.github.io/sqlcommenter/) query label support |
+
+
+New installations using `docker/init/00-schema.sql` already include all schema changes. Migrations are only needed for existing ClickHouse instances.
+
+
## Data retention
The `events_raw` table has no TTL by default. To limit storage, add a TTL:
diff --git a/docs/reference/events-schema.mdx b/docs/reference/events-schema.mdx
index c163154..41149ad 100644
--- a/docs/reference/events-schema.mdx
+++ b/docs/reference/events-schema.mdx
@@ -30,6 +30,16 @@ The table is partitioned by date (`toDate(ts_start)`) and ordered by `ts_start`
Query normalization replaces literals with placeholders (`$N`). This means `SELECT * FROM users WHERE id = 42` becomes `SELECT * FROM users WHERE id = $1`. No passwords, tokens, or PII are exported in query text.
+## Query labels
+
+| Column | Type | Description |
+|---|---|---|
+| `labels` | `String DEFAULT '{}'` | Key-value labels extracted from [sqlcommenter](https://google.github.io/sqlcommenter/) comments appended to the query. For example, `/* controller='users',action='show' */` produces `{"controller":"users","action":"show"}`. Access subpaths directly in ClickHouse: `labels.controller`, `labels.action`. Empty `{}` when no labels are present. |
+
+
+Labels are parsed from the **last** `/* */` comment block in the query text. The parser supports URL-encoded values and escaped single quotes per the sqlcommenter specification. Controlled by the [`pg_stat_ch.track_labels`](/reference/configuration) GUC (default: `true`).
+
+
## Shared buffer usage
These columns show how queries interact with PostgreSQL's `shared_buffers` cache. The cache hit ratio for a query is `shared_blks_hit / (shared_blks_hit + shared_blks_read)`. Target above 99% for OLTP workloads.
diff --git a/include/config/guc.h b/include/config/guc.h
index 70d9273..2d6b27c 100644
--- a/include/config/guc.h
+++ b/include/config/guc.h
@@ -28,6 +28,7 @@ extern int psch_otel_log_batch_size;
extern int psch_otel_log_max_bytes;
extern int psch_otel_log_delay_ms;
extern int psch_otel_metric_interval_ms;
+extern bool psch_track_labels;
extern bool psch_debug_force_locked_overflow;
// Initialize GUC variables
diff --git a/src/config/guc.cc b/src/config/guc.cc
index 9535025..7c1b088 100644
--- a/src/config/guc.cc
+++ b/src/config/guc.cc
@@ -32,6 +32,7 @@ int psch_otel_log_batch_size = 8192;
int psch_otel_log_max_bytes = 3 * 1024 * 1024; // 3 MiB: gRPC default max is 4 MiB
int psch_otel_log_delay_ms = 100;
int psch_otel_metric_interval_ms = 5000;
+bool psch_track_labels = true;
bool psch_debug_force_locked_overflow = false;
// Log level options (matches PostgreSQL's server_message_level_options pattern)
@@ -307,6 +308,17 @@ void PschInitGuc(void) {
PGC_SUSET,
0,
nullptr, nullptr, nullptr);
+ DefineCustomBoolVariable(
+ "pg_stat_ch.track_labels",
+ "Extract sqlcommenter labels from query comments.",
+ "When enabled, the background worker parses /* key='value' */ comments "
+ "and exports structured labels to ClickHouse (JSON) or OTel (attributes).",
+ &psch_track_labels,
+ true,
+ PGC_SIGHUP,
+ 0,
+ nullptr, nullptr, nullptr);
+
DefineCustomBoolVariable(
"pg_stat_ch.debug_force_locked_overflow",
"Force HandleOverflow in locked path (debug/test only).",
diff --git a/src/export/clickhouse_exporter.cc b/src/export/clickhouse_exporter.cc
index 743c537..4455a46 100644
--- a/src/export/clickhouse_exporter.cc
+++ b/src/export/clickhouse_exporter.cc
@@ -75,10 +75,15 @@ class ClickHouseExporter : public StatsExporter {
shared_ptr> DbOperationColumn() final { return TagString("cmd_type"); }
shared_ptr> DbQueryTextColumn() final { return RecordString("query"); }
+ void AppendLabels(const ParseResult& labels) final {
+ labels_col_->Append(SerializeLabelsJson(labels));
+ }
+
void BeginBatch() final {
block = std::make_unique();
columns.clear();
exported_count = 0;
+ labels_col_ = Wrap("labels");
}
void BeginRow() final { ++exported_count; }
bool CommitBatch() final;
@@ -116,6 +121,7 @@ class ClickHouseExporter : public StatsExporter {
std::unique_ptr client;
std::unique_ptr block;
std::vector> columns;
+ shared_ptr> labels_col_;
int consecutive_failures = 0;
int exported_count = 0;
};
diff --git a/src/export/exporter_interface.h b/src/export/exporter_interface.h
index 183ecb7..d87ea9e 100644
--- a/src/export/exporter_interface.h
+++ b/src/export/exporter_interface.h
@@ -6,6 +6,8 @@
#include
#include
+#include "export/sqlcommenter_parse.h"
+
class StatsExporter {
protected:
using string = std::string;
@@ -65,6 +67,10 @@ class StatsExporter {
virtual shared_ptr> DbOperationColumn() = 0;
// Query text. CH: RecordString "query"; OTel semconv: "db.query.text".
virtual shared_ptr> DbQueryTextColumn() = 0;
+ // Query labels from sqlcommenter comments. Called inside the event loop.
+ // CH: serializes to JSON, appends to a String "labels" column;
+ // OTel: sets per-label attributes prefixed with "db.query.label.".
+ virtual void AppendLabels(const ParseResult& labels) = 0;
virtual void BeginBatch() = 0;
virtual void BeginRow() = 0;
diff --git a/src/export/otel_exporter.cc b/src/export/otel_exporter.cc
index cef7290..3ddfef7 100644
--- a/src/export/otel_exporter.cc
+++ b/src/export/otel_exporter.cc
@@ -129,6 +129,15 @@ class OTelExporter : public StatsExporter {
shared_ptr> DbQueryTextColumn() final {
return Wrap>("db.query.text");
}
+ void AppendLabels(const ParseResult& labels) final {
+ for (int i = 0; i < labels.count; ++i) {
+ string attr_name = "db.query.label.";
+ attr_name.append(labels.labels[i].key.data(), labels.labels[i].key.size());
+ string val(labels.labels[i].value);
+ current_log_record->SetAttribute(attr_name, val);
+ current_row_tags[attr_name] = std::move(val);
+ }
+ }
bool EstablishNewConnection() final;
bool IsConnected() const final { return metrics_provider && log_provider; }
diff --git a/src/export/sqlcommenter_parse.cc b/src/export/sqlcommenter_parse.cc
new file mode 100644
index 0000000..e5de3c9
--- /dev/null
+++ b/src/export/sqlcommenter_parse.cc
@@ -0,0 +1,180 @@
+#include "export/sqlcommenter_parse.h"
+
+#include
+
+#include
+
+namespace {
+
+int DecodeHexByte(char hi, char lo) {
+ char buf[2] = {hi, lo};
+ unsigned value = 0;
+ auto [ptr, ec] = std::from_chars(buf, buf + 2, value, 16);
+ if (ec != std::errc{} || ptr != buf + 2)
+ return -1;
+ return static_cast(value);
+}
+
+bool IsWhitespace(char c) {
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+
+bool IsKeyTerminator(char c) {
+ return c == '=' || c == ',' || IsWhitespace(c);
+}
+
+bool NeedsDecode(std::string_view sv) {
+ return sv.find('%') != std::string_view::npos || sv.find('\\') != std::string_view::npos;
+}
+
+// Meta-unescape (\' -> ') and URL-decode (%XX) in a single pass.
+// Per sqlcommenter spec, meta unescaping happens before URL decoding,
+// but the two transforms don't overlap so a combined pass is equivalent.
+size_t MetaUnescapeAndUrlDecode(std::string_view src, char* dst, size_t max_len) {
+ size_t written = 0;
+ size_t i = 0;
+ while (i < src.size() && written < max_len) {
+ if (src[i] == '\\' && i + 1 < src.size() && src[i + 1] == '\'') {
+ dst[written++] = '\'';
+ i += 2;
+ } else if (src[i] == '%' && i + 2 < src.size()) {
+ int byte = DecodeHexByte(src[i + 1], src[i + 2]);
+ if (byte >= 0) {
+ dst[written++] = static_cast(byte);
+ i += 3;
+ continue;
+ }
+ dst[written++] = src[i++];
+ } else {
+ dst[written++] = src[i++];
+ }
+ }
+ return written;
+}
+
+// Decode a raw field (meta-unescape + URL-decode) into buf, or truncate if no decoding needed.
+std::string_view DecodeField(std::string_view raw, char* buf, size_t max_len) {
+ if (NeedsDecode(raw)) {
+ size_t len = MetaUnescapeAndUrlDecode(raw, buf, max_len);
+ return {buf, len};
+ }
+ return raw.substr(0, max_len);
+}
+
+// Lightweight scanner for walking through a sqlcommenter comment.
+struct Scanner {
+ std::string_view text;
+ size_t pos = 0;
+
+ bool AtEnd() const { return pos >= text.size(); }
+
+ void SkipWhitespace() {
+ while (!AtEnd() && IsWhitespace(text[pos]))
+ ++pos;
+ }
+
+ // Consume a specific character; returns false without advancing if not matched.
+ bool Consume(char expected) {
+ if (AtEnd() || text[pos] != expected)
+ return false;
+ ++pos;
+ return true;
+ }
+
+ // Scan a key: sequence of non-terminator characters.
+ std::string_view ScanKey() {
+ size_t start = pos;
+ while (!AtEnd() && !IsKeyTerminator(text[pos]))
+ ++pos;
+ return text.substr(start, pos - start);
+ }
+
+ // Scan a single-quoted value, handling \' escapes per sqlcommenter spec.
+ // Assumes the opening quote was already consumed.
+ // Returns the raw content (before decoding). Sets *ok = false on unterminated quote.
+ std::string_view ScanQuotedValue(bool* ok) {
+ size_t start = pos;
+ while (!AtEnd()) {
+ if (text[pos] == '\\' && pos + 1 < text.size() && text[pos + 1] == '\'') {
+ pos += 2;
+ } else if (text[pos] == '\'') {
+ auto val = text.substr(start, pos - start);
+ ++pos;
+ *ok = true;
+ return val;
+ } else {
+ ++pos;
+ }
+ }
+ *ok = false;
+ return {};
+ }
+};
+
+} // namespace
+
+std::string_view ExtractLastComment(std::string_view query) {
+ auto end_pos = query.rfind("*/");
+ if (end_pos == std::string_view::npos)
+ return {};
+
+ auto start_pos = query.rfind("/*", end_pos);
+ if (start_pos == std::string_view::npos)
+ return {};
+
+ size_t content_start = start_pos + 2;
+ if (content_start >= end_pos)
+ return {};
+ return query.substr(content_start, end_pos - content_start);
+}
+
+ParseResult ParseSqlcommenter(std::string_view comment) {
+ ParseResult result;
+ Scanner scan{comment};
+
+ while (!scan.AtEnd() && result.count < kMaxLabels) {
+ scan.SkipWhitespace();
+
+ auto raw_key = scan.ScanKey();
+ if (raw_key.empty()) {
+ if (!scan.AtEnd())
+ ++scan.pos;
+ continue;
+ }
+
+ scan.SkipWhitespace();
+ if (!scan.Consume('='))
+ continue;
+ scan.SkipWhitespace();
+ if (!scan.Consume('\''))
+ continue;
+
+ bool ok = false;
+ auto raw_value = scan.ScanQuotedValue(&ok);
+ if (!ok)
+ break;
+
+ Label& label = result.labels[result.count++];
+ label.key = DecodeField(raw_key, label.decoded_key, kMaxKeyLen);
+ label.value = DecodeField(raw_value, label.decoded_value, kMaxValueLen);
+
+ scan.SkipWhitespace();
+ scan.Consume(',');
+ }
+
+ if (result.count == kMaxLabels) {
+ scan.SkipWhitespace();
+ if (!scan.AtEnd())
+ result.truncated = true;
+ }
+
+ return result;
+}
+
+std::string SerializeLabelsJson(const ParseResult& result) {
+ nlohmann::ordered_json obj = nlohmann::ordered_json::object();
+ for (int i = 0; i < result.count; ++i) {
+ obj[std::string(result.labels[i].key)] = std::string(result.labels[i].value);
+ }
+ return obj.dump(-1, ' ', false, nlohmann::ordered_json::error_handler_t::replace);
+}
diff --git a/src/export/sqlcommenter_parse.h b/src/export/sqlcommenter_parse.h
new file mode 100644
index 0000000..f120a4b
--- /dev/null
+++ b/src/export/sqlcommenter_parse.h
@@ -0,0 +1,38 @@
+#ifndef PG_STAT_CH_SRC_EXPORT_SQLCOMMENTER_PARSE_H_
+#define PG_STAT_CH_SRC_EXPORT_SQLCOMMENTER_PARSE_H_
+
+#include
+#include
+#include
+
+constexpr int kMaxLabels = 16;
+constexpr int kMaxKeyLen = 32;
+constexpr int kMaxValueLen = 128;
+
+struct Label {
+ std::string_view key; // Points into query buffer or decoded_key
+ std::string_view value; // Points into query buffer or decoded_value
+ char decoded_key[kMaxKeyLen];
+ char decoded_value[kMaxValueLen];
+};
+
+struct ParseResult {
+ std::array