From f7ca1b00db368905a8b92a5ac822d38396116e0c Mon Sep 17 00:00:00 2001 From: Jared Vititoe Date: Mon, 20 Apr 2026 22:49:08 -0400 Subject: [PATCH] ask: switch to llama3.2:latest, increase timeout to 120s gemma3:latest produces garbage output on the Vulkan backend (Intel Arc A380). llama3.2:latest runs correctly at 100% GPU. Timeout bumped to 120s to handle cold model loads (~22s) without timing out. Co-Authored-By: Claude Sonnet 4.6 --- matrixbot/commands.py | 2 +- matrixbot/config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/matrixbot/commands.py b/matrixbot/commands.py index cbb62a1..bdb5136 100644 --- a/matrixbot/commands.py +++ b/matrixbot/commands.py @@ -873,7 +873,7 @@ async def cmd_ask(client: AsyncClient, room_id: str, sender: str, args: str): await send_text(client, room_id, "Thinking...") try: - timeout = aiohttp.ClientTimeout(total=90) + timeout = aiohttp.ClientTimeout(total=120) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.post( f"{OLLAMA_URL}/api/chat", diff --git a/matrixbot/config.py b/matrixbot/config.py index 24518ab..42baa9a 100644 --- a/matrixbot/config.py +++ b/matrixbot/config.py @@ -20,7 +20,7 @@ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") OLLAMA_URL = os.getenv("OLLAMA_URL", "http://10.10.10.157:11434") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.2:latest") BALL_MODEL = os.getenv("BALL_MODEL", "sadiq-bd/llama3.2-1b-uncensored:latest") -ASK_MODEL = os.getenv("ASK_MODEL", "gemma3:latest") +ASK_MODEL = os.getenv("ASK_MODEL", "llama3.2:latest") MINECRAFT_RCON_HOST = os.getenv("MINECRAFT_RCON_HOST", "10.10.10.67") MINECRAFT_RCON_PORT = int(os.getenv("MINECRAFT_RCON_PORT", "25575")) MINECRAFT_RCON_PASSWORD = os.getenv("MINECRAFT_RCON_PASSWORD", "")