feat: !ask --model flag to pick the LLM per-request

Usage: !ask --model <alias> <question> (or -m for short) Aliases: phi4, llama3, llama3-1b, gemma, gemma-1b, deepseek, codellama, qwen, dolphin, creative, abliterated, uncensored, llama2. The 'Thinking...' message shows which model is responding. Invalid aliases list all options. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 21:35:22 -04:00
parent 3ed15de5ce
commit 37f5d2d70d
1 changed files with 41 additions and 5 deletions
@@ -1174,12 +1174,48 @@ async def cmd_trivia(client: AsyncClient, room_id: str, sender: str, args: str):
 # ==================== INTEGRATIONS ====================
-@command("ask", "Ask LotusBot a question (2min cooldown)")
+# Short aliases users can pass with --model / -m
 _ASK_MODEL_ALIASES: dict[str, str] = {
    "phi4":         "phi4-mini:latest",
    "phi4-mini":    "phi4-mini:latest",
    "llama":        "llama3.2:latest",
    "llama3":       "llama3.2:latest",
    "llama3-1b":    "llama3.2:1b",
    "gemma":        "gemma3:latest",
    "gemma-1b":     "gemma3:1b",
    "deepseek":     "deepseek-r1:latest",
    "codellama":    "codellama:latest",
    "qwen":         "qwen2.5:latest",
    "dolphin":      "dolphin-phi:latest",
    "creative":     "huihui_ai/llama3.2-abliterate:3b",
    "abliterated":  "huihui_ai/llama3.2-abliterate:3b",
    "uncensored":   "llama2-uncensored:latest",
    "llama2":       "llama2-uncensored:latest",
 }
@command("ask", "Ask LotusBot a question — optionally pick a model with --model <name> (2min cooldown)")
 async def cmd_ask(client: AsyncClient, room_id: str, sender: str, args: str):
    if not args:
-        await send_text(client, room_id, f"Usage: {BOT_PREFIX}ask <question>")
+        aliases = ", ".join(sorted(_ASK_MODEL_ALIASES))
        await send_text(client, room_id,
            f"Usage: {BOT_PREFIX}ask [--model <name>] <question>\nModels: {aliases}")
        return
    # Parse optional --model / -m flag
    model = ASK_MODEL
    model_flag = re.match(r"^(?:--model|-m)\s+(\S+)\s+(.*)", args, re.DOTALL)
    if model_flag:
        alias = model_flag.group(1).lower()
        args = model_flag.group(2).strip()
        resolved = _ASK_MODEL_ALIASES.get(alias)
        if not resolved:
            aliases = ", ".join(sorted(_ASK_MODEL_ALIASES))
            await send_text(client, room_id,
                f"Unknown model '{alias}'. Available: {aliases}")
            return
        model = resolved
    remaining = check_cooldown(sender, "ask")
    if remaining:
        await send_text(client, room_id, f"Command on cooldown. Try again in {remaining}s.")
@@ -1190,7 +1226,7 @@ async def cmd_ask(client: AsyncClient, room_id: str, sender: str, args: str):
        await send_text(client, room_id, "Please provide a valid question.")
        return
-    await send_text(client, room_id, "Thinking...")
+    await send_text(client, room_id, f"Thinking... (via {_model_label(model)})")
    try:
        timeout = aiohttp.ClientTimeout(total=120)
@@ -1198,7 +1234,7 @@ async def cmd_ask(client: AsyncClient, room_id: str, sender: str, args: str):
            async with session.post(
                f"{OLLAMA_URL}/api/chat",
                json={
-                    "model": ASK_MODEL,
+                    "model": model,
                    "stream": False,
                    "messages": [
                        {
@@ -1227,7 +1263,7 @@ async def cmd_ask(client: AsyncClient, room_id: str, sender: str, args: str):
            f'<font color="#a855f7"><strong>🤖 LotusBot</strong></font><br>'
            f'<em>Q: {question}</em><br>'
            f'<blockquote>{full_response}</blockquote>'
-            f'<sup><em>via {_model_label(ASK_MODEL)}</em></sup>'
+            f'<sup><em>via {_model_label(model)}</em></sup>'
        )
        await send_html(client, room_id, plain, html)
    except asyncio.TimeoutError: