diff --git a/CHANGELOG.md b/CHANGELOG.md index f8e3748..46e8bbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.3.0] - 2026-04-23 +### Changed +- **Anonymous server identification in telemetry** — every analytics event now carries a short opaque `server` segment: a 16-hex-char SHA-256 prefix of the normalized Countly server URL. This lets `stats.count.ly` aggregate per-distinct-server counts (for any event type) without ever seeing the raw URL. The device ID stays at `"mcp"` — only events carry the hash. In HTTP transport the hash is recomputed per request from the request-scoped server URL (via `AsyncLocalStorage`), so multi-tenant deployments naturally emit per-tenant counts. The README analytics section was updated to reflect what is (and isn't) tracked. Opt-in still required (`ENABLE_ANALYTICS=true`). + ### Security - **Cross-tenant auth token mixing (HTTP transport)** (#110) — the HTTP transport previously mutated a shared axios client and shared config on every incoming request. Concurrent requests could interleave at `await` boundaries, causing tenant A's in-flight API calls to go out with tenant B's token. Fixed by constructing a per-request axios instance (with the `countly-token` header baked in) and passing state from the HTTP middleware to the MCP handler through `AsyncLocalStorage`. The shared client is now used only as a stdio-mode fallback and is never mutated per-request. - **Cross-tenant data leak via shared AppCache** (#110) — the apps cache was a single instance per process, so the first tenant's apps were visible to every other tenant's `resolveAppId` lookups for up to five minutes. Replaced with `AppCacheRegistry`, which keeps one `AppCache` per tenant keyed by SHA-256(token) so the raw token is never held as a Map key. diff --git a/README.md b/README.md index e131054..cdd0a86 100644 --- a/README.md +++ b/README.md @@ -239,16 +239,18 @@ The MCP server includes optional anonymous usage analytics to help improve the p - HTTP endpoint access patterns - Error occurrences (type and message, NO sensitive data) - Server start/stop events +- A **truncated opaque hash** of your Countly server URL (64-bit SHA-256 prefix), attached as the `server` segment on every event — used for distinct-server aggregation. The raw URL is never sent. **What is NOT tracked:** - Authentication tokens or credentials -- Server URLs or domains +- Raw Countly server URLs or domains (only the opaque `server` hash above) - User data or analytics content - Personal information - IP addresses or client identifiers +- Tool arguments or request/response bodies **Privacy & Device ID:** -All analytics are aggregated under a single device ID "mcp" to ensure complete anonymity. No server-specific or user-specific information is collected. +All analytics are aggregated under a single device ID `"mcp"` — Countly cannot distinguish individual operators from the device ID alone. The only per-deployment signal is the `server` hash on events, which is a truncated SHA-256 of the normalized server URL. The hash is intentionally coarse (64 bits) and the server URL is low-entropy, so do not assume the hash is unguessable for cloud patterns; it is meant for aggregation, not secrecy. **To opt in:** ```bash diff --git a/src/index.ts b/src/index.ts index c406af6..40722aa 100644 --- a/src/index.ts +++ b/src/index.ts @@ -206,8 +206,28 @@ class CountlyMCPServer { // Initialize analytics. Opt-in: enabled only when ENABLE_ANALYTICS=true. // README has always documented this as "disabled by default"; the previous // `!== 'false'` check silently opted users in. Flip to explicit opt-in. + // + // The getServerUrl callback lets analytics attach a short opaque SHA-256 + // hash of the current Countly server URL (as the `server` segment) to + // every event, so stats.count.ly can aggregate distinct-server counts + // without ever seeing raw URLs. + // + // Priority: + // 1. HTTP per-request URL from AsyncLocalStorage (multi-tenant) + // 2. static server config (stdio after constructor finishes) + // 3. process.env.COUNTLY_SERVER_URL (pre-config fallback — the + // `server_started` event fires from inside analytics.init() which + // runs before this.config is assigned, so without this fallback + // the very first event would ship without the `server` segment) const analyticsEnabled = (process.env.ENABLE_ANALYTICS || '').toLowerCase() === 'true'; - analytics.init(analyticsEnabled); + analytics.init(analyticsEnabled, () => { + const reqState = this.requestContext.getStore(); + return ( + reqState?.serverUrl + || this.config?.serverUrl + || process.env.COUNTLY_SERVER_URL + ); + }); // Log configuration on startup (only in non-test mode) if (!testMode) { diff --git a/src/lib/analytics.ts b/src/lib/analytics.ts index 5141fa0..93d8217 100644 --- a/src/lib/analytics.ts +++ b/src/lib/analytics.ts @@ -13,23 +13,110 @@ import { redactSensitiveInMessage } from './error-handler.js'; const ANALYTICS_URL = 'https://stats.count.ly'; const ANALYTICS_APP_KEY = '5a106dec46bf2e2d4d23c2cd3cf7490b12c22fc7'; +/** + * Length of the server-URL hash that accompanies every analytics event. + * 16 hex chars = 64 bits of entropy — enough to distinguish several billion + * distinct Countly servers with negligible collision risk, while keeping + * event payloads small. A collision between two real deployments is not a + * correctness issue for distinct-count aggregation. + */ +const SERVER_HASH_LENGTH = 16; // Load the package version once. Uses createRequire because the rest of the // file is ESM and require() isn't available natively. const require = createRequire(import.meta.url); +/** + * Normalize a Countly server URL into a canonical form before hashing, so + * variations that are semantically equivalent collapse to the same hash: + * + * - scheme dropped (`http://` == `https://` for identity purposes) + * - hostname lowercased (URLs are case-insensitive on host) + * - default port stripped (`:80` for http, `:443` for https) + * - trailing slashes on the pathname stripped + * - path / query / fragment case preserved (RFC 3986: only the host is + * case-insensitive) + * + * Uses `new URL()` for structural correctness; falls back to a minimal + * regex-based strip when the input doesn't parse as a URL (so a bare + * hostname or misformatted value still produces a stable hash). + */ +export function normalizeServerUrlForHash(url: string): string { + const trimmed = (url ?? '').trim(); + if (!trimmed) { + return ''; + } + + // If there's no scheme, prepend one so `new URL()` succeeds without + // changing the semantic identity — we drop the scheme again below. + const hasScheme = /^[a-z][a-z\d+.-]*:\/\//i.test(trimmed); + try { + const parsed = new URL(hasScheme ? trimmed : `https://${trimmed}`); + const hostname = parsed.hostname.toLowerCase(); + const isDefaultPort = + (parsed.protocol === 'http:' && parsed.port === '80') || + (parsed.protocol === 'https:' && parsed.port === '443'); + const port = parsed.port && !isDefaultPort ? `:${parsed.port}` : ''; + const pathname = parsed.pathname.replace(/\/+$/, ''); + // Preserve search + hash (rare on Countly URLs but keep case). + return `${hostname}${port}${pathname}${parsed.search}${parsed.hash}`; + } catch { + // Non-URL input (malformed, unexpected scheme, etc.): minimal best- + // effort normalization — strip scheme prefix and trailing slashes, + // preserve path case. + return trimmed + .replace(/^[a-z][a-z\d+.-]*:\/\//i, '') + .replace(/\/+$/, ''); + } +} + +/** + * Compute the short opaque server-URL hash that rides along as the `server` + * segment on every event. + * + * Privacy note: the raw URL is never sent. For cloud patterns the hash is + * brute-forceable by anyone with a dictionary of common URLs (including + * Countly themselves, who already know their own cloud customer URLs via + * billing). For custom on-prem URLs the hash is opaque in practice. + */ +export function computeServerHash(url: string | undefined): string | undefined { + if (!url) { + return undefined; + } + const normalized = normalizeServerUrlForHash(url); + if (!normalized) { + return undefined; + } + return createHash('sha256').update(normalized).digest('hex').substring(0, SERVER_HASH_LENGTH); +} + +/** + * Optional callback supplied by the server to resolve the Countly server URL + * at event time. In stdio mode this just returns the env-supplied config + * value; in HTTP mode it reads from AsyncLocalStorage so the per-request + * server URL ends up in the per-request events. + */ +type ServerUrlResolver = () => string | undefined; + class Analytics { private enabled: boolean = false; private initialized: boolean = false; private deviceId: string = 'mcp'; + private getServerUrl?: ServerUrlResolver; /** * Initialize analytics tracking. * Opt-in: enabled only when the caller passes true (which index.ts does * only when ENABLE_ANALYTICS=true is set in the environment). + * + * `getServerUrl` is called at each event-track time to resolve the + * current request's server URL. The returned URL is normalized and + * hashed into a short opaque `server` segment on the outgoing event — + * no raw URLs ever leave the process. */ - init(enabled: boolean = false): void { + init(enabled: boolean = false, getServerUrl?: ServerUrlResolver): void { this.enabled = enabled; + this.getServerUrl = getServerUrl; if (!this.enabled) { console.error('📊 Analytics: Disabled (set ENABLE_ANALYTICS=true to opt in)'); @@ -52,7 +139,7 @@ class Analytics { this.initialized = true; console.error('📊 Analytics: Enabled and initialized'); - + // Track session start this.trackServerStart(); } catch (error) { @@ -62,13 +149,21 @@ class Analytics { } /** - * Hash server URL to create anonymous device ID - * Does NOT include auth tokens + * Build the segmentation object for an event, adding the `server` hash + * if we can resolve the current server URL. Callers hand in the + * event-specific fields; we merge the server hash on top. */ - private hashServerUrl(url: string): string { - // Remove protocol and trailing slashes for consistency - const cleanUrl = url.replace(/^https?:\/\//, '').replace(/\/+$/, ''); - return createHash('sha256').update(cleanUrl).digest('hex').substring(0, 32); + private withServerSegment( + segmentation?: Record + ): Record | undefined { + const hash = computeServerHash(this.getServerUrl?.()); + if (!hash) { + return segmentation; + } + return { + ...(segmentation ?? {}), + server: hash, + }; } /** @@ -230,7 +325,9 @@ class Analytics { } /** - * Track custom event + * Track custom event. Automatically injects the `server` hash segment + * (when a serverUrl resolver was supplied to init) so Countly can + * aggregate per-server without ever seeing the raw URL. */ trackEvent(eventName: string, segmentation?: Record): void { if (!this.isEnabled()) { @@ -241,7 +338,7 @@ class Analytics { Countly.add_event({ key: eventName, count: 1, - segmentation, + segmentation: this.withServerSegment(segmentation), }); } catch (error) { console.error('📊 Analytics: Failed to track event:', error); @@ -249,7 +346,8 @@ class Analytics { } /** - * Track timed event + * Track timed event. Injects the `server` hash segment the same way + * trackEvent does. */ trackTimedEvent(eventName: string, segmentation: Record, duration: number): void { if (!this.isEnabled()) { @@ -261,7 +359,7 @@ class Analytics { key: eventName, count: 1, dur: duration, - segmentation, + segmentation: this.withServerSegment(segmentation), }); } catch (error) { console.error('📊 Analytics: Failed to track timed event:', error); diff --git a/tests/analytics.test.ts b/tests/analytics.test.ts index 8060212..c46ae5a 100644 --- a/tests/analytics.test.ts +++ b/tests/analytics.test.ts @@ -1,5 +1,9 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; -import { analytics } from '../src/lib/analytics.js'; +import { + analytics, + computeServerHash, + normalizeServerUrlForHash, +} from '../src/lib/analytics.js'; /** * Analytics Tests @@ -31,17 +35,19 @@ describe('Analytics', () => { beforeEach(() => { // Reset all mocks before each test vi.clearAllMocks(); - + // Reset analytics state by creating a new instance // Since analytics is a singleton, we need to reset its internal state (analytics as any).enabled = false; (analytics as any).initialized = false; + (analytics as any).getServerUrl = undefined; }); afterEach(() => { // Clean up (analytics as any).enabled = false; (analytics as any).initialized = false; + (analytics as any).getServerUrl = undefined; }); describe('init', () => { @@ -657,30 +663,178 @@ describe('Analytics', () => { }); }); - describe('private methods', () => { - it('should hash server URL correctly', () => { - // Access private method for testing - const hashMethod = (analytics as any).hashServerUrl.bind(analytics); - - const hash1 = hashMethod('https://example.com/api'); - const hash2 = hashMethod('https://example.com/api/'); - const hash3 = hashMethod('http://example.com/api'); - - // Should produce consistent hashes - expect(hash1).toBe(hash2); - expect(hash1).toBe(hash3); - expect(hash1).toHaveLength(32); + describe('server-url hashing', () => { + it('normalizes consistently: scheme stripped, hostname lowercased, trailing slash removed', () => { + expect(normalizeServerUrlForHash('https://example.com/api')) + .toBe(normalizeServerUrlForHash('https://example.com/api/')); + expect(normalizeServerUrlForHash('https://EXAMPLE.com/api')) + .toBe(normalizeServerUrlForHash('http://example.com/api')); + expect(normalizeServerUrlForHash('HTTPS://Example.COM///')) + .toBe('example.com'); }); - it('should hash different URLs differently', () => { - const hashMethod = (analytics as any).hashServerUrl.bind(analytics); - - const hash1 = hashMethod('https://example.com/api'); - const hash2 = hashMethod('https://different.com/api'); - - expect(hash1).not.toBe(hash2); + it('strips default ports (:80 on http, :443 on https)', () => { + // These are semantically equivalent URLs and must collapse to the + // same hash — otherwise distinct-server aggregation splits. + expect(normalizeServerUrlForHash('https://example.com')) + .toBe(normalizeServerUrlForHash('https://example.com:443')); + expect(normalizeServerUrlForHash('http://example.com')) + .toBe(normalizeServerUrlForHash('http://example.com:80')); + expect(computeServerHash('https://example.com')) + .toBe(computeServerHash('https://example.com:443')); + }); + + it('preserves non-default ports', () => { + // A custom port IS meaningful — different deployment. + expect(normalizeServerUrlForHash('https://example.com:8443')) + .not.toBe(normalizeServerUrlForHash('https://example.com')); + }); + + it('preserves path case (RFC 3986: paths are case-sensitive)', () => { + // /API and /api are potentially different endpoints; do not merge. + expect(normalizeServerUrlForHash('https://example.com/API')) + .not.toBe(normalizeServerUrlForHash('https://example.com/api')); + }); + + it('accepts bare hostnames without a scheme', () => { + // Common config style: `COUNTLY_SERVER_URL=my-countly.com` + expect(normalizeServerUrlForHash('example.com')) + .toBe(normalizeServerUrlForHash('https://example.com')); + }); + + it('hashes consistently across scheme / case / trailing-slash variations', () => { + const a = computeServerHash('https://example.com/api'); + const b = computeServerHash('https://example.com/api/'); + const c = computeServerHash('http://example.com/api'); + const d = computeServerHash('HTTPS://Example.COM/api'); + expect(a).toBe(b); + expect(a).toBe(c); + expect(a).toBe(d); + }); + + it('returns a 16-hex-char string', () => { + const h = computeServerHash('https://api.count.ly'); + expect(h).toMatch(/^[0-9a-f]{16}$/); + }); + + it('returns undefined for empty / undefined input', () => { + expect(computeServerHash(undefined)).toBeUndefined(); + expect(computeServerHash('')).toBeUndefined(); + expect(computeServerHash(' ')).toBeUndefined(); + }); + + it('hashes different URLs to different values', () => { + expect(computeServerHash('https://example.com')) + .not.toBe(computeServerHash('https://different.com')); + }); + }); + + describe('server-hash segment injection on events', () => { + it('adds `server` segment to trackEvent when a resolver is set', async () => { + const Countly = await getCountlyMock(); + analytics.init(true, () => 'https://api.count.ly'); + vi.clearAllMocks(); + + analytics.trackEvent('anything', { foo: 'bar' }); + + const seg = (Countly.add_event as any).mock.calls[0][0].segmentation; + expect(seg.server).toMatch(/^[0-9a-f]{16}$/); + expect(seg.foo).toBe('bar'); + }); + + it('adds `server` segment to trackTimedEvent', async () => { + const Countly = await getCountlyMock(); + analytics.init(true, () => 'https://acme.count.ly'); + vi.clearAllMocks(); + + analytics.trackTimedEvent('op', { type: 'query' }, 100); + + const seg = (Countly.add_event as any).mock.calls[0][0].segmentation; + expect(seg.server).toMatch(/^[0-9a-f]{16}$/); + expect(seg.type).toBe('query'); }); + it('propagates `server` through the specialized track* helpers', async () => { + const Countly = await getCountlyMock(); + analytics.init(true, () => 'https://api.count.ly'); + vi.clearAllMocks(); + + analytics.trackToolExecution('apps_list', true, 10); + analytics.trackToolCategory('apps'); + analytics.trackAuthMethod('headers'); + analytics.trackApiEndpoint('/o', 'GET', 200); + analytics.trackHttpRequest('/mcp', 'POST'); + analytics.trackError('Error', 'boom', 'apps_list'); + + for (const call of (Countly.add_event as any).mock.calls) { + expect(call[0].segmentation.server).toMatch(/^[0-9a-f]{16}$/); + } + }); + + it('omits `server` segment when no resolver is configured', async () => { + const Countly = await getCountlyMock(); + analytics.init(true); + vi.clearAllMocks(); + + analytics.trackEvent('no_server', { x: 1 }); + + const seg = (Countly.add_event as any).mock.calls[0][0].segmentation; + expect(seg).toEqual({ x: 1 }); + expect(seg.server).toBeUndefined(); + }); + + it('omits `server` segment when the resolver returns undefined / empty', async () => { + const Countly = await getCountlyMock(); + analytics.init(true, () => undefined); + vi.clearAllMocks(); + + analytics.trackEvent('still_no_server'); + + const seg = (Countly.add_event as any).mock.calls[0][0].segmentation; + expect(seg).toBeUndefined(); + }); + + it('re-evaluates the resolver on every event (per-request URL variation)', async () => { + const Countly = await getCountlyMock(); + let currentUrl = 'https://tenant-a.count.ly'; + analytics.init(true, () => currentUrl); + vi.clearAllMocks(); + + analytics.trackEvent('e1'); + currentUrl = 'https://tenant-b.count.ly'; + analytics.trackEvent('e2'); + + const segA = (Countly.add_event as any).mock.calls[0][0].segmentation; + const segB = (Countly.add_event as any).mock.calls[1][0].segmentation; + expect(segA.server).not.toBe(segB.server); + }); + + it('keeps device_id at "mcp" (hash is on events, not device id)', async () => { + const Countly = await getCountlyMock(); + analytics.init(true, () => 'https://api.count.ly'); + expect(Countly.init).toHaveBeenCalledWith( + expect.objectContaining({ device_id: 'mcp' }) + ); + }); + + it('includes the hash on the server_started event (fired from within init)', async () => { + const Countly = await getCountlyMock(); + // Resolver returns a URL immediately so the very first event — which + // trackServerStart() emits from inside init() — is already tagged. + // This guards against the regression where server_started events + // shipped without the `server` segment because the config wasn't + // wired up yet at init time. + analytics.init(true, () => 'https://api.count.ly'); + + const serverStartedCall = (Countly.add_event as any).mock.calls.find( + (c: any[]) => c[0].key === 'server_started' + ); + expect(serverStartedCall).toBeDefined(); + expect(serverStartedCall[0].segmentation.server).toMatch(/^[0-9a-f]{16}$/); + }); + }); + + describe('getAppVersion', () => { it('should get app version from package.json', () => { const getVersionMethod = (analytics as any).getAppVersion.bind(analytics);