diff --git a/pyproject.toml b/pyproject.toml index 58028ca4..ba778820 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "jinja2", "huggingface-hub>=0.20", "pyarrow>=14.0", - "kernelguard>=0.1.1", + "kernelguard==0.3.1", ] [project.optional-dependencies] diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index 10857603..9464f861 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -883,6 +883,13 @@ def get_leaderboard_submissions( AND NOT r.secret AND r.score IS NOT NULL AND r.passed + AND s.status <> 'hacked' + AND NOT EXISTS ( + SELECT 1 + FROM leaderboard.submission_job_status sjs + WHERE sjs.submission_id = s.id + AND sjs.status = 'hacked' + ) AND s.user_id = %s AND EXISTS ( SELECT 1 @@ -922,6 +929,13 @@ def get_leaderboard_submissions( JOIN leaderboard.user_info ui ON s.user_id = ui.id WHERE l.name = %s AND r.runner = %s AND NOT r.secret AND r.score IS NOT NULL AND r.passed + AND s.status <> 'hacked' + AND NOT EXISTS ( + SELECT 1 + FROM leaderboard.submission_job_status sjs + WHERE sjs.submission_id = s.id + AND sjs.status = 'hacked' + ) AND EXISTS ( SELECT 1 FROM leaderboard.runs sr @@ -1441,6 +1455,13 @@ def get_leaderboard_submission_count( AND NOT r.secret AND r.score IS NOT NULL AND r.passed + AND s.status <> 'hacked' + AND NOT EXISTS ( + SELECT 1 + FROM leaderboard.submission_job_status sjs + WHERE sjs.submission_id = s.id + AND sjs.status = 'hacked' + ) AND s.user_id = %s AND EXISTS ( SELECT 1 @@ -1472,6 +1493,13 @@ def get_leaderboard_submission_count( AND NOT r.secret AND r.score IS NOT NULL AND r.passed + AND s.status <> 'hacked' + AND NOT EXISTS ( + SELECT 1 + FROM leaderboard.submission_job_status sjs + WHERE sjs.submission_id = s.id + AND sjs.status = 'hacked' + ) AND EXISTS ( SELECT 1 FROM leaderboard.runs sr diff --git a/tests/test_leaderboard_db.py b/tests/test_leaderboard_db.py index aaba3727..621b7391 100644 --- a/tests/test_leaderboard_db.py +++ b/tests/test_leaderboard_db.py @@ -385,6 +385,52 @@ def test_leaderboard_submission_ranked(database, submit_leaderboard): ] +def test_hacked_submissions_are_hidden_from_leaderboard_rankings(database, submit_leaderboard): + submit_time = datetime.datetime.now(tz=datetime.timezone.utc) + + with database as db: + submission_status_hacked = db.create_submission( + "submit-leaderboard", "submission_status_hacked.py", 5, "fast", submit_time, user_name="user5" + ) + _create_submission_run( + db, submission_status_hacked, mode="leaderboard", runner="A100", score=1.0 + ) + _create_submission_run( + db, submission_status_hacked, mode="leaderboard", secret=True, runner="A100" + ) + db.mark_submission_hacked(submission_status_hacked, error="blocked") + + job_status_hacked = db.create_submission( + "submit-leaderboard", "job_status_hacked.py", 6, "fast", submit_time, user_name="user6" + ) + _create_submission_run(db, job_status_hacked, mode="leaderboard", runner="A100", score=0.5) + _create_submission_run(db, job_status_hacked, mode="leaderboard", secret=True, runner="A100") + db.mark_submission_done(job_status_hacked) + db.upsert_submission_job_status(job_status_hacked, "hacked", "blocked") + + valid = db.create_submission( + "submit-leaderboard", "valid.py", 7, "valid", submit_time, user_name="user7" + ) + _create_submission_run(db, valid, mode="leaderboard", runner="A100", score=2.0) + _create_submission_run(db, valid, mode="leaderboard", secret=True, runner="A100") + db.mark_submission_done(valid) + + with database as db: + ranked = db.get_leaderboard_submissions("submit-leaderboard", "A100") + assert [row["submission_id"] for row in ranked] == [valid] + + assert db.get_leaderboard_submissions("submit-leaderboard", "A100", "5") == [] + assert db.get_leaderboard_submissions("submit-leaderboard", "A100", "6") == [] + assert [row["submission_id"] for row in db.get_leaderboard_submissions( + "submit-leaderboard", "A100", "7" + )] == [valid] + + assert db.get_leaderboard_submission_count("submit-leaderboard", "A100") == 1 + assert db.get_leaderboard_submission_count("submit-leaderboard", "A100", "5") == 0 + assert db.get_leaderboard_submission_count("submit-leaderboard", "A100", "6") == 0 + assert db.get_leaderboard_submission_count("submit-leaderboard", "A100", "7") == 1 + + def test_failed_secret_run_hides_submission_from_rankings(database, submit_leaderboard): submit_time = datetime.datetime.now(tz=datetime.timezone.utc) failed_secret = dataclasses.replace(sample_run_result(), passed=False) diff --git a/uv.lock b/uv.lock index 88cf63b2..52364d68 100644 --- a/uv.lock +++ b/uv.lock @@ -574,7 +574,7 @@ requires-dist = [ { name = "fastapi", extras = ["all"] }, { name = "huggingface-hub", specifier = ">=0.20" }, { name = "jinja2" }, - { name = "kernelguard", specifier = ">=0.1.1" }, + { name = "kernelguard", specifier = "==0.3.1" }, { name = "modal" }, { name = "pre-commit", marker = "extra == 'dev'" }, { name = "psycopg2-binary" }, @@ -1024,11 +1024,11 @@ wheels = [ [[package]] name = "kernelguard" -version = "0.1.1" +version = "0.3.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1a/c1/0171bf0c75c935f5caa357ad748a910c302d4c6ec8261b708518761f783b/kernelguard-0.1.1.tar.gz", hash = "sha256:1e31ca28ebff67a09cba706c60c0f1b1dd6c098697ebf70248d85ee8d2d30eff", size = 49892, upload-time = "2026-03-19T22:34:13.652Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/63/91e85e4dd05535452a134941cdc5c41d73bc23f1b23693cd66ee0b4d4fe3/kernelguard-0.3.1.tar.gz", hash = "sha256:e4ae135ae940450fd530a77c97f1369e8400bbe32932ca5b8f090dc94f641ae0", size = 86864, upload-time = "2026-06-22T00:02:59.494Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/17/64/809f8a4c9aa66af2eb458f84fdcc6240363d6face44095e8d7c978fd9171/kernelguard-0.1.1-py3-none-any.whl", hash = "sha256:584810dc5fbc05781d348eb3c96af11b678b4ef571bcc260ef182cf758ccc122", size = 49469, upload-time = "2026-03-19T22:34:12.396Z" }, + { url = "https://files.pythonhosted.org/packages/8c/09/e175afea22d809cf24b55f7c602a4de0a8330ebfc3f5f1f40747a8b9c2d9/kernelguard-0.3.1-py3-none-any.whl", hash = "sha256:9cd39734d4aada04873e9d8f6a8498635095cf929dd311b726f3d9d253c2bac7", size = 89097, upload-time = "2026-06-22T00:02:58.095Z" }, ] [[package]]