Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copy this file to .env and fill in values for local execution.
# Only the key(s) required for your chosen --agent / --provider are needed.

ANTHROPIC_API_KEY=
OPENAI_API_KEY=
GROQ_API_KEY=
CEREBRAS_API_KEY=
GOOGLE_API_KEY=
GEMINI_API_KEY=
OPENROUTER_API_KEY=
DASHSCOPE_API_KEY=
XAI_API_KEY=
DEEPSEEK_API_KEY=
ANTHROPIC_BASE_URL=
MOONSHOT_API_KEY=
KIMI_API_KEY=
ZAI_API_KEY=
MISTRAL_API_KEY=
CURSOR_API_KEY=
6 changes: 2 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json

# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
.env.*
!.env.example

# caches
.eslintcache
Expand Down
15 changes: 11 additions & 4 deletions src/execution/docker-strategy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
createAuthCacheArgs,
createCliCacheArgs,
createEnvironmentArgs,
createEnvironmentFile,
createNpmCacheArgs,
createWorkspaceArgs,
NPM_CACHE_CONTAINER_PATH,
Expand Down Expand Up @@ -99,11 +100,13 @@ export class DockerExecutionStrategy implements ExecutionStrategy {
}
}

const envFile = createEnvironmentFile(env);

const command = [
...DOCKER_BASE_ARGS,
"--entrypoint", "/usr/bin/env",
...createCliCacheArgs(SWELANCER_CLI_CACHE_CONTAINER_PATH),
...createEnvironmentArgs(env),
...envFile.args,
"--platform", "linux/amd64",
...hostMount,
...promptMount,
Expand All @@ -122,7 +125,8 @@ export class DockerExecutionStrategy implements ExecutionStrategy {

return {
command,
options: {}
options: {},
cleanup: envFile.cleanup
};
}

Expand All @@ -138,11 +142,13 @@ export class DockerExecutionStrategy implements ExecutionStrategy {
}
}

const envFile = createEnvironmentFile(core.env || {});

const command = [
...DOCKER_BASE_ARGS,
...createCliCacheArgs(),
...createAuthCacheArgs(ctx.agentName ?? 'claude'),
...createEnvironmentArgs(core.env || {}),
...envFile.args,
...createWorkspaceArgs({ workspacePath }),
...testMountArgs,
this.containerName,
Expand All @@ -151,7 +157,8 @@ export class DockerExecutionStrategy implements ExecutionStrategy {

return {
command,
options: {}
options: {},
cleanup: envFile.cleanup
};
}
}
2 changes: 2 additions & 0 deletions src/execution/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ export interface PrepareContext {
export interface PreparedCommand {
command: string[];
options: import('../utils/shell').ExecuteOptions;
/** Optional cleanup to call after the command finishes (e.g. remove temp env files). */
cleanup?: () => void;
}

export interface ExecutionStrategy {
Expand Down
11 changes: 9 additions & 2 deletions src/execution/v2-container.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import {
createAuthCacheArgs,
createCliCacheArgs,
createEnvironmentArgs,
createEnvironmentFile,
createNpmCacheArgs,
NPM_CACHE_CONTAINER_PATH,
SWELANCER_CLI_CACHE_CONTAINER_PATH,
Expand Down Expand Up @@ -91,12 +92,13 @@ export class V2ContainerManager {
async create(opts: V2ContainerOptions): Promise<void> {
const mounts = this.buildMounts();
const env = this.buildEnv(opts.issueId);
const envFile = createEnvironmentFile(env);

const createArgs = [
'docker', 'create',
'--entrypoint', '/usr/bin/env',
...createCliCacheArgs(SWELANCER_CLI_CACHE_CONTAINER_PATH),
...createEnvironmentArgs(env),
...envFile.args,
'--platform', 'linux/amd64',
...mounts,
'-w', '/app/expensify',
Expand All @@ -108,7 +110,12 @@ export class V2ContainerManager {
this.logger.info(`[v2] Creating container for task ${opts.issueId}`);
}

const createResult = await this.executor.execute(createArgs);
let createResult;
try {
createResult = await this.executor.execute(createArgs);
} finally {
envFile.cleanup();
}
if (createResult.exitCode !== 0) {
throw new Error(`docker create failed: ${createResult.stderr}`);
}
Expand Down
7 changes: 6 additions & 1 deletion src/runners/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,12 @@ export class AgentRunner {
}

const execOptions = { ...prepared.options, timeout: config.timeout };
const result = await this.executor.execute(prepared.command, execOptions);
let result;
try {
result = await this.executor.execute(prepared.command, execOptions);
} finally {
prepared.cleanup?.();
}

const logCollector = AgentLoggerFactory.create(config.agent);
await logCollector.collect(config, exercise, exercisePath, result);
Expand Down
14 changes: 12 additions & 2 deletions src/runners/test-only.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,12 @@ export class TestOnlyRunner {
}

const execOptions = { ...prepared.options, timeout: config.timeout };
const result = await this.executor.execute(prepared.command, execOptions);
let result;
try {
result = await this.executor.execute(prepared.command, execOptions);
} finally {
prepared.cleanup?.();
}
const duration = Date.now() - startTime;

if (result.exitCode === 0) {
Expand Down Expand Up @@ -126,7 +131,12 @@ export class TestOnlyRunner {
}

const execOptions = { ...prepared.options, timeout: config.timeout };
const result = await this.executor.execute(prepared.command, execOptions);
let result;
try {
result = await this.executor.execute(prepared.command, execOptions);
} finally {
prepared.cleanup?.();
}
const duration = Date.now() - startTime;

if (result.exitCode === 0) {
Expand Down
7 changes: 6 additions & 1 deletion src/runners/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@ export class TestRunner {
}

const execOptions = { ...prepared.options, timeout: config.timeout };
const result = await this.executor.execute(prepared.command, execOptions);
let result;
try {
result = await this.executor.execute(prepared.command, execOptions);
} finally {
prepared.cleanup?.();
}
const duration = Date.now() - startTime;

if (result.exitCode === 0) {
Expand Down
43 changes: 42 additions & 1 deletion src/utils/__tests__/docker.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { describe, expect, it, afterEach } from 'bun:test';
import { createEnvironmentArgs, createAuthCacheArgs, hasAuthCache, AUTH_SENTINEL } from '../docker';
import { existsSync, readFileSync } from 'fs';
import { createEnvironmentArgs, createEnvironmentFile, createAuthCacheArgs, hasAuthCache, AUTH_SENTINEL } from '../docker';
import { mkdirSync, writeFileSync, rmSync } from 'fs';
import { join } from 'path';
import { homedir } from 'os';
Expand Down Expand Up @@ -31,6 +32,46 @@ describe('createEnvironmentArgs', () => {
});
});

describe('createEnvironmentFile', () => {
it('returns --env-file args pointing to an existing temp file', () => {
const { args, cleanup } = createEnvironmentFile({ MY_KEY: 'my-value' });
try {
expect(args[0]).toBe('--env-file');
expect(existsSync(args[1]!)).toBe(true);
} finally {
cleanup();
}
});

it('writes key=value lines to the temp file', () => {
const { args, cleanup } = createEnvironmentFile({
FOO: 'bar',
EMPTY_VALUE: '',
ANTHROPIC_API_KEY: '',
});
try {
const content = readFileSync(args[1]!, 'utf-8');
expect(content).toContain('FOO=bar');
expect(content).toContain('ANTHROPIC_API_KEY=');
expect(content).not.toContain('EMPTY_VALUE');
} finally {
cleanup();
}
});

it('cleanup removes the temp file', () => {
const { args, cleanup } = createEnvironmentFile({ KEY: 'val' });
cleanup();
expect(existsSync(args[1]!)).toBe(false);
});

it('cleanup is idempotent (does not throw on second call)', () => {
const { cleanup } = createEnvironmentFile({ KEY: 'val' });
cleanup();
expect(() => cleanup()).not.toThrow();
});
});

describe('createAuthCacheArgs', () => {
it('returns volume mount args for known agents', () => {
const args = createAuthCacheArgs('claude');
Expand Down
30 changes: 28 additions & 2 deletions src/utils/docker.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { existsSync, mkdirSync } from 'fs';
import { existsSync, mkdirSync, writeFileSync, unlinkSync } from 'fs';
import { join } from 'path';
import { homedir } from 'os';
import { homedir, tmpdir } from 'os';

export const DOCKER_BASE_ARGS = ["docker", "run", "--rm"] as const;
const CLI_CACHE_ENV = 'TS_BENCH_CLI_CACHE';
Expand Down Expand Up @@ -36,6 +36,32 @@ export function createEnvironmentArgs(envVars: Record<string, string>): string[]
.flatMap(([key, value]) => ["-e", `${key}=${value}`]);
}

/**
* Write environment variables to a temporary file and return "--env-file" args
* plus a cleanup callback. Using a file instead of "-e KEY=VALUE" keeps secrets
* out of the process list (visible via "ps aux" or "/proc/PID/cmdline").
*
* The file is created with mode 0o600 so only the current user can read it.
* Always call cleanup() after the Docker command completes (success or failure).
*/
export function createEnvironmentFile(envVars: Record<string, string>): { args: string[]; cleanup: () => void } {
// These keys are passed as empty strings on purpose:
// - NPM_* clears host prefix overrides so the container can use its own Node setup.
// - ANTHROPIC_API_KEY clears any inherited Anthropic key when Claude is configured for OpenRouter.
const allowEmptyKeys = new Set(['NPM_CONFIG_PREFIX', 'npm_config_prefix', 'NPM_PREFIX', 'ANTHROPIC_API_KEY']);
const lines = Object.entries(envVars)
.filter(([key, value]) => typeof value === 'string' && (value.length > 0 || allowEmptyKeys.has(key)))
.map(([key, value]) => `${key}=${value}`);
const filePath = join(tmpdir(), `ts-bench-env-${process.pid}-${Date.now()}`);
writeFileSync(filePath, lines.join('\n'), { mode: 0o600 });
return {
args: ['--env-file', filePath],
// Silently ignore errors: the file may have already been removed or
// the process may be shutting down; cleanup is best-effort.
cleanup: () => { try { unlinkSync(filePath); } catch { /* best-effort */ } }
};
}

export function createCliCacheArgs(containerMountPath: string = CLI_CACHE_CONTAINER_PATH): string[] {
const hostPath = resolveCliCachePath();
return ['-v', `${hostPath}:${containerMountPath}`];
Expand Down
9 changes: 6 additions & 3 deletions src/utils/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ const DEFAULT_ANY_ENV_MESSAGE = 'Please set at least one of the following enviro

export function requireEnv(key: string, message?: string): string {
const value = process.env[key];
if (value && value.trim().length > 0) {
return value;
if (!value || value.trim().length === 0) {
throw new Error(message ?? `Environment variable ${key} is not set`);
}
throw new Error(message ?? `Environment variable ${key} is not set`);
if (/^<[^>]+>$/.test(value) || value === 'your-api-key-here' || value === 'CHANGE_ME') {
throw new Error(`Environment variable ${key} appears to contain a placeholder value`);
}
return value;
}

export function requireAnyEnv(keys: string[], message?: string): { key: string; value: string } {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 requireAnyEnv missing placeholder detection added to requireEnv

The PR adds placeholder detection to requireEnv (line 10) to reject values like <your-key>, your-api-key-here, or CHANGE_ME. However, requireAnyEnv (line 16-22) still accepts any non-empty string, including these same placeholder patterns. Since requireAnyEnv is widely used by agent builders to obtain API keys (e.g., src/agents/builders/claude.ts:51, src/agents/builders/aider.ts:11, src/agents/builders/gemini.ts:11), a user who copies .env.example and fills in a placeholder like <your-api-key> would get past requireAnyEnv validation and receive a confusing authentication error from the API provider instead of the clear "placeholder value" error.

(Refers to lines 16-22)

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

Expand Down
Loading