@rllm.rollout(name="frozenlake")
async def frozenlake_flow(task: Task, config: AgentConfig) -> Episode:
meta = task.metadata or {}
desc = generate_random_map(size=meta["size"], p=meta["p"], seed=meta["seed"])
env = gym.make("FrozenLake-v1", desc=desc, is_slippery=meta["is_slippery"])
env.reset(seed=meta["seed"])
client = AsyncOpenAI(base_url=config.base_url, api_key="EMPTY")
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": render_first_turn(env, max_turns)},
]
steps, won = [], False
for turn in range(max_turns):
resp = await client.chat.completions.create(model=config.model, messages=messages, ...)
content = resp.choices[0].message.content or ""
action = parse_action(content) # e.g. "```Up```" → 3
messages.append({"role": "assistant", "content": content})
steps.append(Step(chat_completions=list(messages), action=_ACTION_LABELS.get(action), …))
if action is None:
messages.append({"role": "user", "content": "Please reply with a valid action…"})
continue
_, reward, terminated, truncated, _ = env.step(action)
if terminated:
won = float(reward) > 0
break
if truncated:
break
messages.append({"role": "user", "content": render_next_turn(env, turn + 1)})
return Episode(
trajectories=[Trajectory(name="frozenlake", steps=steps)],
artifacts={"won": won, "turns": len(steps)},
is_correct=won,
)