diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index 9464f861..7884dbb6 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -1257,7 +1257,11 @@ def get_user_submissions( offset: Offset for pagination Returns: - List of submission dictionaries with summary info and runs + List of submission dictionaries with summary info and runs. Each + entry includes ``status`` ("pending"/"failed"/"done") and + ``secret_score`` (the secret leaderboard geomean score, the ranking + metric; ``None`` if absent). The public leaderboard score remains + available per-run in ``runs[].score``. """ # Validate and clamp inputs limit = max(1, min(limit, 100)) @@ -1325,16 +1329,59 @@ def get_user_submissions( "score": run_row[2], }) + # Per-submission status + secret score. The `runs` above already + # carry the public leaderboard score (in runs[].score), but they are + # ranking-filtered (anti-cheat: only public runs whose matching + # secret run passed) and never include secret runs, so two things + # are not derivable from them: + # - secret_score: the secret leaderboard run's score (the actual + # ranking metric). Visible to the owner, as the detail endpoint + # already exposes it; the list endpoint just never selected it. + # - whether any run failed, so a finished-but-failed submission can + # be told apart from a clean one (both otherwise look "done"). + # One extra aggregate over the same runs rows (keyed by + # submission_id, like runs_query) avoids an N+1 detail fetch per row. + # + # MIN(score): a submission can have a secret leaderboard run per GPU; + # take the best (lowest) to match how the public score is summarized. + agg_query = """ + SELECT submission_id, + MIN(score) FILTER ( + WHERE mode = 'leaderboard' AND secret AND passed + ) AS secret_score, + bool_or(NOT passed) AS has_failed_run + FROM leaderboard.runs + WHERE submission_id = ANY(%s) + GROUP BY submission_id + """ + self.cursor.execute(agg_query, (submission_ids,)) + agg_by_submission: dict = { + row[0]: {"secret_score": row[1], "has_failed_run": row[2]} + for row in self.cursor.fetchall() + } + # Build result with runs grouped by submission results = [] for row in submissions: sub_id = row[0] + done = row[4] + agg = agg_by_submission.get(sub_id, {}) + + if not done: + status = "pending" + elif agg.get("has_failed_run"): + status = "failed" + else: + status = "done" + results.append({ "id": sub_id, "leaderboard_name": row[1], "file_name": row[2], "submission_time": row[3], - "done": row[4], + "done": done, + "status": status, + "secret_score": agg.get("secret_score"), "runs": runs_by_submission.get(sub_id, []), }) return results diff --git a/tests/test_leaderboard_db.py b/tests/test_leaderboard_db.py index 621b7391..a738e7fd 100644 --- a/tests/test_leaderboard_db.py +++ b/tests/test_leaderboard_db.py @@ -1066,6 +1066,106 @@ def test_get_user_submissions_with_multiple_runs(database, submit_leaderboard): assert 2.0 in scores +def test_get_user_submissions_status_and_secret_score_on_success(database, submit_leaderboard): + """A fully-passing submission reports status 'done' with the secret score. + + The public score stays where it already was (runs[].score). + """ + with database as db: + sub = db.create_submission( + "submit-leaderboard", "ok.py", 5, "code", + datetime.datetime.now(tz=datetime.timezone.utc), user_name="user5", + ) + _create_submission_run(db, sub, mode="leaderboard", secret=False, runner="A100", score=1.5) + _create_submission_run(db, sub, mode="leaderboard", secret=True, runner="A100", score=1.7) + db.mark_submission_done(sub) + + result = db.get_user_submissions(user_id="5") + assert len(result) == 1 + assert result[0]["status"] == "done" + # Scores come back as Decimal from Postgres; compare as float. + assert float(result[0]["secret_score"]) == 1.7 + # Public score is unchanged: still exposed per-run. + assert [float(r["score"]) for r in result[0]["runs"]] == [1.5] + + # Backward compat: the change is purely additive. The pre-existing + # fields must still be present so existing clients (popcorn-cli, + # kernelboard) keep working; new fields are added alongside. + assert {"id", "leaderboard_name", "file_name", "submission_time", "done", "runs"} <= set( + result[0] + ) + assert {"gpu_type", "score"} <= set(result[0]["runs"][0]) + + +def test_get_user_submissions_status_failed_when_secret_run_failed(database, submit_leaderboard): + """A failed secret run -> status 'failed'; runs/scores stay hidden (anti-cheat).""" + failed = dataclasses.replace(sample_run_result(), passed=False) + with database as db: + sub = db.create_submission( + "submit-leaderboard", "bad.py", 5, "code", + datetime.datetime.now(tz=datetime.timezone.utc), user_name="user5", + ) + _create_submission_run(db, sub, mode="leaderboard", secret=False, runner="A100", score=1.5) + _create_submission_run( + db, sub, mode="leaderboard", secret=True, runner="A100", + score=None, result=failed, + ) + db.mark_submission_done(sub) + + result = db.get_user_submissions(user_id="5") + assert len(result) == 1 + assert result[0]["status"] == "failed" + # The failed secret run withholds the public score (ranking filter) and + # there is no passing secret score either. + assert result[0]["secret_score"] is None + assert result[0]["runs"] == [] + + +def test_get_user_submissions_status_failed_keeps_runs_when_public_run_failed( + database, submit_leaderboard +): + """A failed *public* run still reports its (passing) runs, with status 'failed'. + + Unlike a failed secret run, a failed public run does not trigger the + anti-cheat full-hide, so the passing public runs remain visible. + """ + failed = dataclasses.replace(sample_run_result(), passed=False) + with database as db: + sub = db.create_submission( + "submit-leaderboard", "bad_public.py", 5, "code", + datetime.datetime.now(tz=datetime.timezone.utc), user_name="user5", + ) + # Passing public test, failing public leaderboard run, passing secret. + _create_submission_run(db, sub, mode="test", secret=False, runner="A100") + _create_submission_run( + db, sub, mode="leaderboard", secret=False, runner="A100", + score=None, result=failed, + ) + _create_submission_run(db, sub, mode="leaderboard", secret=True, runner="A100", score=1.7) + db.mark_submission_done(sub) + + result = db.get_user_submissions(user_id="5") + assert len(result) == 1 + assert result[0]["status"] == "failed" + # The passing public test run is still present (not a full hide). + assert len(result[0]["runs"]) >= 1 + + +def test_get_user_submissions_status_pending_when_not_done(database, submit_leaderboard): + """A not-yet-finished submission reports status 'pending'.""" + with database as db: + sub = db.create_submission( + "submit-leaderboard", "wip.py", 5, "code", + datetime.datetime.now(tz=datetime.timezone.utc), user_name="user5", + ) + _create_submission_run(db, sub, mode="leaderboard", secret=False, runner="A100", score=1.5) + # Not marked done. + + result = db.get_user_submissions(user_id="5") + assert len(result) == 1 + assert result[0]["status"] == "pending" + + def test_check_leaderboard_access_public(database, submit_leaderboard): """Public leaderboards grant access to everyone.""" with database as db: