Spaces:
Sleeping
Sleeping
| from dataclasses import dataclass, field | |
| from typing import Any, Union | |
| import gradio as gr | |
| Observation = Union[str, dict[str, Any]] | |
| Action = Union[str, dict[str, Any]] # e.g., user message, tool call schema | |
| class StepResult: | |
| observation: Observation | |
| reward: float | |
| done: bool | |
| info: dict[str, Any] = field(default_factory=dict) | |
| class WordleEnv: | |
| """ | |
| Demonstration env. Not a full game; 4-letter variant for brevity. | |
| Observations are emoji strings; actions are 4-letter lowercase words. | |
| Reward is 1.0 on success, else 0.0. Terminal on success or after 6 guesses. | |
| """ | |
| def __init__(self, *, secret: str = "word", max_guesses: int = 6) -> None: | |
| assert len(secret) == 4 and secret.isalpha() | |
| self._secret = secret | |
| self._max = max_guesses | |
| self._n = 0 | |
| self._obs = "β¬" * 4 | |
| def reset(self) -> Observation: # noqa: ARG002 | |
| self._n = 0 | |
| self._obs = "β¬" * 4 | |
| return self._obs | |
| def step(self, action: Action) -> StepResult: | |
| guess: str = str(action) | |
| guess = guess.strip().lower() | |
| if len(guess) != 4 or not guess.isalpha(): | |
| return StepResult(self._obs, -0.05, False, {"error": "invalid guess"}) | |
| self._n += 1 | |
| secret = self._secret | |
| feedback: list[str] = [] | |
| for i, ch in enumerate(guess): | |
| if ch == secret[i]: | |
| feedback.append("π©") | |
| elif ch in secret: | |
| feedback.append("π¨") | |
| else: | |
| feedback.append("β¬") | |
| self._obs = "".join(feedback) | |
| done = guess == secret or self._n >= self._max | |
| reward = 1.0 if guess == secret else 0.0 | |
| return StepResult(self._obs, reward, done, {"guesses": self._n}) | |
| def render(self) -> str: | |
| return self._obs | |
| # def step_fn(guess: str, wordle) -> tuple[str, float, bool, dict]: | |
| # """ | |
| # Perform a step in the Wordle environment. | |
| # | |
| # Args: | |
| # guess (str): The guessed word (4-letter lowercase string). | |
| # | |
| # Returns: | |
| # tuple[str, float, bool, dict]: A tuple containing: | |
| # - observation: The observation after the step . | |
| # - reward: The reward obtained from the step. | |
| # - done: Whether the game is done. | |
| # - info: Additional info. | |
| # """ | |
| # result = wordle.step(guess) | |
| # return result.observation, result.reward, result.done, result.info, wordle | |
| wordle = WordleEnv(secret="word") | |
| def step_fn(guess: str) -> tuple[str, float, bool, dict]: | |
| """ | |
| Perform a step in the Wordle environment. | |
| Args: | |
| guess (str): The guessed word (4-letter lowercase string). | |
| Returns: | |
| tuple[str, float, bool, dict]: A tuple containing: | |
| - observation: The observation after the step . | |
| - reward: The reward obtained from the step. | |
| - done: Whether the game is done. | |
| - info: Additional info. | |
| """ | |
| result = wordle.step(guess) | |
| return result.observation, result.reward, result.done, result.info | |
| # demo = gr.Interface( | |
| # fn=step_fn, | |
| # inputs=["text", gr.State(WordleEnv(secret="word"))], | |
| # outputs=[ | |
| # gr.Textbox(label="Observation"), | |
| # gr.Number(label="Reward"), | |
| # gr.Textbox(label="Done"), | |
| # gr.Textbox(label="Info"), | |
| # gr.State(), | |
| # ], | |
| # ) | |
| demo = gr.Interface( | |
| fn=step_fn, | |
| inputs=["text"], | |
| outputs=[ | |
| gr.Textbox(label="Observation"), | |
| gr.Number(label="Reward"), | |
| gr.Textbox(label="Done"), | |
| gr.Textbox(label="Info"), | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True) | |