| | import io
|
| | import base64
|
| | from dataclasses import dataclass, field
|
| | from typing import Any, List, Tuple, Dict
|
| | from PIL import Image
|
| |
|
| |
|
| | @dataclass
|
| | class LLMMessage:
|
| | content: str | List[Dict[str, Any]]
|
| | source: str = "user"
|
| |
|
| |
|
| | @dataclass
|
| | class SystemMessage(LLMMessage):
|
| | def __init__(self, content: str, source: str = "system"):
|
| | self.content = content
|
| | self.source = source
|
| |
|
| |
|
| | @dataclass
|
| | class UserMessage(LLMMessage):
|
| | def __init__(
|
| | self,
|
| | content: str | List[Dict[str, Any]],
|
| | source: str = "user",
|
| | is_original: bool = False,
|
| | ):
|
| | self.content = content
|
| | self.source = source
|
| | self.is_original = is_original
|
| |
|
| |
|
| | @dataclass
|
| | class AssistantMessage(LLMMessage):
|
| | def __init__(self, content: str, source: str = "assistant"):
|
| | self.content = content
|
| | self.source = source
|
| |
|
| |
|
| | @dataclass
|
| | class ImageObj:
|
| | """Image wrapper for handling screenshots and images"""
|
| |
|
| | image: Image.Image
|
| |
|
| | @classmethod
|
| | def from_pil(cls, image: Image.Image) -> "ImageObj":
|
| | return cls(image=image)
|
| |
|
| | def to_base64(self) -> str:
|
| | """Convert PIL image to base64 string"""
|
| | buffered = io.BytesIO()
|
| | self.image.save(buffered, format="PNG")
|
| | return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
| |
|
| | def resize(self, size: Tuple[int, int]) -> Image.Image:
|
| | """Resize the image"""
|
| | return self.image.resize(size)
|
| |
|
| |
|
| | @dataclass
|
| | class ModelResponse:
|
| | """Response from model call"""
|
| |
|
| | content: str
|
| | usage: Dict[str, Any] = field(default_factory=dict)
|
| |
|
| |
|
| | @dataclass
|
| | class FunctionCall:
|
| | """Represents a function call with arguments"""
|
| |
|
| | id: str
|
| | name: str
|
| | arguments: Dict[str, Any]
|
| |
|
| |
|
| | def message_to_openai_format(message: LLMMessage) -> Dict[str, Any]:
|
| | """Convert our LLMMessage to OpenAI API format"""
|
| | role = (
|
| | "system"
|
| | if isinstance(message, SystemMessage)
|
| | else "assistant"
|
| | if isinstance(message, AssistantMessage)
|
| | else "user"
|
| | )
|
| |
|
| |
|
| | if isinstance(message.content, list):
|
| | content_parts = []
|
| | for item in message.content:
|
| | if isinstance(item, ImageObj):
|
| |
|
| | base64_image = item.to_base64()
|
| | content_parts.append(
|
| | {
|
| | "type": "image_url",
|
| | "image_url": {"url": f"data:image/png;base64,{base64_image}"},
|
| | }
|
| | )
|
| | elif isinstance(item, str):
|
| | content_parts.append({"type": "text", "text": item})
|
| | elif isinstance(item, dict):
|
| |
|
| | content_parts.append(item)
|
| | return {"role": role, "content": content_parts}
|
| | else:
|
| |
|
| | return {"role": role, "content": message.content}
|
| |
|
| |
|
| | @dataclass
|
| | class WebSurferEvent:
|
| | source: str
|
| | message: str
|
| | url: str
|
| | action: str | None = None
|
| | arguments: Dict[str, Any] | None = None
|
| |
|