Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
# Required: GitHub token with repo access for cloning game repositories
GITHUB_TOKEN=your_github_token_here

# Optional: LLM Provider API Keys (configure the ones you plan to use)
OPENAI_API_KEY=
# LLM provider API keys — set the ones for the models you run (see configs/models.yaml).
# Models are resolved by litellm from their provider-prefixed names.
ANTHROPIC_API_KEY=
OPENAI_API_KEY=
GEMINI_API_KEY=
XAI_API_KEY=
DASHSCOPE_API_KEY=

# Optional: only needed for legacy configs that still set `model_class: portkey`.
PORTKEY_API_KEY=
42 changes: 14 additions & 28 deletions codeclash/agents/minisweagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@
import traceback
from collections.abc import Callable

from minisweagent import Model
from minisweagent.agents.default import AgentConfig, DefaultAgent
from minisweagent.environments.docker import DockerEnvironment
from minisweagent.models import get_model
from minisweagent.models.test_models import DeterministicModel
from minisweagent.run.utils.save import save_traj
from minisweagent.models import Model, get_model

from codeclash import REPO_DIR
from codeclash.agents.player import Player
Expand All @@ -22,10 +19,8 @@


class ClashAgent(DefaultAgent):
"""
Slightly modified version of `DefaultAgent` from mini-SWE-agent
(https://github.com/SWE-agent/mini-swe-agent)
"""
"""`DefaultAgent` from mini-SWE-agent (https://github.com/SWE-agent/mini-swe-agent)
with per-player debug logging."""

def __init__(
self,
Expand All @@ -39,9 +34,11 @@ def __init__(
super().__init__(model, env, config_class=config_class, **kwargs)
self.logger = logger

def add_message(self, role: str, content: str, **kwargs):
super().add_message(role, content, **kwargs)
self.logger.debug(f"[{role}] {content}", extra={"highlighter": None})
def add_messages(self, *messages: dict) -> list[dict]:
result = super().add_messages(*messages)
for m in messages:
self.logger.debug(f"[{m.get('role')}] {m.get('content')}", extra={"highlighter": None})
return result


class MiniSWEAgent(Player):
Expand All @@ -51,26 +48,21 @@ def __init__(self, config: dict, environment: DockerEnvironment, game_context: G
super().__init__(config, environment=environment, game_context=game_context)

def run(self):
# temporary workaround around https://github.com/SWE-agent/mini-swe-agent/issues/477
if "DeterministicModel" not in self.config["config"]["model"].get("model_class", ""):
model = get_model(config=self.config["config"]["model"])
else:
model = DeterministicModel(outputs=self.config["config"]["model"]["outputs"])
model = get_model(config=self.config["config"]["model"])
self.agent = ClashAgent(
model=model,
env=self.environment,
logger=self.logger,
**self.config["config"]["agent"],
)
exit_status = None
result = None
exc_message = None
try:
exit_status, result = self.agent.run(task="", **self.game_context.to_template_vars())
result = self.agent.run(task="", **self.game_context.to_template_vars())
exit_status = result.get("exit_status", "")
except Exception as e:
exit_status = str(e)
exc_message = traceback.format_exc()
result = exc_message
self.logger.critical(exc_message)
finally:
traj_path = (
Expand All @@ -79,22 +71,16 @@ def run(self):
/ self.name
/ f"{self.name}_r{self.game_context.round}.traj.json"
)
save_traj(
self.agent, # type: ignore
traj_path,
exit_status=exit_status,
result=result,
print_fct=self.logger.debug,
)
self.agent.save(traj_path)
copy_to_container(
self.environment,
traj_path,
self.game_context.log_env / "edits" / traj_path.name,
)
self._metadata["agent_stats"][self.game_context.round] = {
"exit_status": exit_status,
"cost": self.agent.model.cost,
"api_calls": self.agent.model.n_calls,
"cost": self.agent.cost,
"api_calls": self.agent.n_calls,
}
if exit_status.lower().strip() not in ["", "submitted", "limitsexceeded"] and exc_message is not None:
raise RuntimeError(f"Agent {self.name} failed with exit status: {exit_status} and exception: {exc_message}")
9 changes: 7 additions & 2 deletions codeclash/arenas/arena.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
from codeclash.agents.player import Player
from codeclash.constants import DIR_LOGS, DIR_WORK, GH_ORG, RESULT_TIE
from codeclash.utils.aws import is_running_in_aws_batch, pull_game_container_aws_ecr
from codeclash.utils.environment import assert_zero_exit_code, copy_between_containers, copy_from_container
from codeclash.utils.environment import (
ClashDockerEnvironment,
assert_zero_exit_code,
copy_between_containers,
copy_from_container,
)
from codeclash.utils.log import get_logger


Expand Down Expand Up @@ -185,7 +190,7 @@ def get_environment(self, branch_name: str | None = None) -> DockerEnvironment:
run_args = ["--rm"]
else:
run_args = []
environment = DockerEnvironment(
environment = ClashDockerEnvironment(
image=self.image_name,
cwd=str(DIR_WORK),
env={
Expand Down
33 changes: 25 additions & 8 deletions codeclash/utils/environment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import os
import shutil
import subprocess
import tempfile
Expand All @@ -10,6 +11,28 @@
COPY_EXCLUDE_PATTERNS = [".git", "__pycache__"]


def _scratch_dir() -> str | None:
"""Local scratch dir for staging `docker cp` transfers. Defaults to the system temp dir.
Override with CODECLASH_TMPDIR (e.g. on AWS Batch, where the default temp dir misbehaves)."""
override = os.getenv("CODECLASH_TMPDIR")
if override:
Path(override).mkdir(parents=True, exist_ok=True)
return override


class ClashDockerEnvironment(DockerEnvironment):
"""DockerEnvironment that also accepts a plain command string.

mini-swe-agent v2's `execute` takes an action dict (`{"command": ...}`), but CodeClash's
arena code calls `execute("some shell command")` directly. Normalize so both work.
"""

def execute(self, action: str | dict, cwd: str = "", *, timeout: int | None = None) -> dict:
if isinstance(action, str):
action = {"command": action}
return super().execute(action, cwd, timeout=timeout)


def assert_zero_exit_code(result: dict, *, logger: logging.Logger | None = None) -> dict:
if result.get("returncode", 0) != 0:
msg = f"Command failed with exit code {result.get('returncode')}:\n{result.get('output')}"
Expand All @@ -34,10 +57,7 @@ def copy_between_containers(
print(
f"Copy between containers: {src_container.container_id}:{src_path} -> {dest_container.container_id}:{dest_path}"
)
# Some weird stuff happening on AWS where /tmp doesn't work properly
dir = Path.home() / "tmp"
dir.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(dir=dir) as temp_dir:
with tempfile.TemporaryDirectory(dir=_scratch_dir()) as temp_dir:
temp_path = Path(temp_dir) / Path(src_path).name

# Copy from source container to temporary local directory
Expand Down Expand Up @@ -151,10 +171,7 @@ def create_file_in_container(
Create a file with given content on a Docker container.
Uses a temporary file on the local filesystem for the transfer.
"""
# Some weird stuff happening on AWS where /tmp doesn't work properly
dir = Path.home() / "tmp"
dir.mkdir(parents=True, exist_ok=True)
with tempfile.NamedTemporaryFile(mode="w", delete=True, suffix=".tmp", dir=dir) as tmp_file:
with tempfile.NamedTemporaryFile(mode="w", delete=True, suffix=".tmp", dir=_scratch_dir()) as tmp_file:
tmp_file.write(content)
tmp_file.flush() # Ensure content is written to disk
tmp_file_path = Path(tmp_file.name)
Expand Down
4 changes: 2 additions & 2 deletions codeclash/viewer/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,14 +980,14 @@ def get_parent_folder(path):


def format_timestamp(timestamp):
"""Format Unix timestamp as MM/DD HH:MM"""
"""Format Unix timestamp as YY/MM/DD HH:MM"""
if timestamp is None:
return ""
from datetime import datetime

try:
dt = datetime.fromtimestamp(timestamp)
return dt.strftime("%m/%d %H:%M")
return dt.strftime("%y/%m/%d %H:%M")
except (ValueError, OSError):
return ""

Expand Down
4 changes: 2 additions & 2 deletions codeclash/viewer/static/js/picker.js
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ function shouldRowBeVisible(row) {
const dateElement = row.querySelector(".date-text");
if (dateElement) {
const dateText = dateElement.textContent.trim();
// Extract just the MM/DD part
// Extract just the date part (before the time)
const rowDate = dateText.split(" ")[0];
if (rowDate !== selectedDate) {
return false;
Expand Down Expand Up @@ -1065,7 +1065,7 @@ function handleModelTagClick(event, modelName) {

function handleDateClick(event, dateText) {
event.stopPropagation();
// Extract just the YYYY-MM-DD part
// Extract just the date part (before the time)
const date = dateText.trim().split(" ")[0];

// Toggle date filter - if already selected, clear it
Expand Down
63 changes: 61 additions & 2 deletions codeclash/viewer/static/js/trajectory.js
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,19 @@ function createMessageElement(message, index) {
const messageContent = document.createElement("div");
messageContent.className = "message-content";

// Handle different content types
if (typeof message.content === "string") {
// Handle different content types.
// mini-swe-agent v2 (tool-call) assistant messages keep the command in
// extra.actions / tool_calls instead of in a ```bash block in the text, so they need
// their own renderer. v1 messages have neither and fall through to the text paths below.
const toolActions =
message.extra && Array.isArray(message.extra.actions)
? message.extra.actions
: [];
const hasToolCalls =
Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
if (toolActions.length > 0 || hasToolCalls) {
messageContent.innerHTML = createToolCallContentHTML(message);
} else if (typeof message.content === "string") {
const lines = message.content.split("\n");
if (lines.length <= 5) {
// Show full content
Expand Down Expand Up @@ -260,6 +271,54 @@ function createComplexContentHTML(contentParts) {
return html;
}

function createToolCallContentHTML(message) {
// Render a mini-swe-agent v2 tool-call assistant message: the reasoning text (if any)
// followed by each issued command as a code block (matching the v1 ```bash styling).

// Thought text: content may be a string, an array of content blocks, or null.
let thought = "";
if (typeof message.content === "string") {
thought = message.content;
} else if (Array.isArray(message.content)) {
thought = message.content
.filter((p) => p && p.type === "text" && typeof p.text === "string")
.map((p) => p.text)
.join("\n");
}

// Commands: prefer the parsed actions, fall back to the raw tool_calls.
let commands = [];
if (message.extra && Array.isArray(message.extra.actions)) {
commands = message.extra.actions.map((a) => a.command).filter(Boolean);
}
if (!commands.length && Array.isArray(message.tool_calls)) {
commands = message.tool_calls
.map((tc) => {
try {
return JSON.parse(tc.function.arguments).command;
} catch (e) {
return tc.function && tc.function.arguments;
}
})
.filter(Boolean);
}

let html = '<div class="message-content-full">';
if (thought.trim()) {
html += `<div class="message-text"><pre>${escapeHtml(thought)}</pre></div>`;
}
commands.forEach((cmd) => {
html += `<div class="code-block"><pre><code>${escapeHtml(cmd)}</code></pre></div>`;
});
if (!thought.trim() && !commands.length) {
html += `<div class="message-text"><pre>${escapeHtml(
JSON.stringify(message.content, null, 2),
)}</pre></div>`;
}
html += "</div>";
return html;
}

function escapeHtml(text) {
const div = document.createElement("div");
div.textContent = text;
Expand Down
20 changes: 19 additions & 1 deletion codeclash/viewer/templates/includes/message.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,25 @@
{{ message_role_upper }} #{{ loop.index }}
</span>
<div class="message-content">
{% if message.content is string %}
{% set tool_actions = message.extra.actions if (message.extra and message.extra.actions) else [] %}
{% if tool_actions %}
{# mini-swe-agent v2 tool-call assistant message: reasoning text + issued command(s).
The command lives in extra.actions rather than a ```bash block in the text. #}
<div class="message-content-full">
{% if message.content is string and message.content|trim %}
<div class="message-text">
<pre>{{ message.content }}</pre>
</div>
{% endif %}
{% for action in tool_actions %}
{% if action.command %}
<div class="code-block">
<pre><code>{{ action.command }}</code></pre>
</div>
{% endif %}
{% endfor %}
</div>
{% elif message.content is string %}
{# Simple string content #}
{% set line_count = message.content.count('\n') + 1 %}
{% if line_count <= 5 %}
Expand Down
Loading
Loading