Large Language Models (LLMs) become far more useful when they can plan actions and call external tools . In this project, I built a lightweight local action agent using Streamlit , Ollama , and an open Llama-3 model variant fine-tuned for tool use.
The agent runs entirely on your machine — no API keys, no cloud calls to commercial LLMs. It can plan tasks like:
…and then generate a friendly natural-language answer .
How does it work?
At a high level, the workflow is:
User input: You type a request into the Streamlit chat box.
LLM Planner: A local Ollama model (Llama-3-Groq-8B-Tool-Use) receives a strict JSON-only system prompt. It must respond with either:
a {"tool": ..., "args": {...}}
request, or
a {"final_answer": ...}
direct reply.
Tool Execution: If a tool call is returned, the app validates/casts the arguments, runs the relevant function (weather, wiki, FX, calculator), and captures the raw JSON result.
Summarizer: The LLM is called again to transform raw JSON into a clean, conversational answer.
UI : Both the friendly answer and the raw tool output (expandable) are shown in the chat history.
Tools included (no API keys required)
The agent ships with four ready-to-use tools:
Weather: Queries Open-Meteo to fetch real-time conditions.
Wikipedia Summary: Fetches summaries via Wikipedia’s public REST API.
FX Converter: Uses ExchangeRate.host for currency conversion.
Calculator: Parses math expressions safely with Python’s ast
and a whitelist of functions ( sqrt
, sin
, log
, etc.).
✨ Because all tools are open/free, you don’t need to manage API keys or rate-limited paid services.
Streamlit UI
The UI is intentionally simple:
Sidebar → change Ollama URL and model name (e.g., ollama list
shows what’s installed).
Main screen → type a request, hit Enter, see both the agent’s reply and optional raw tool output.
Example interactions
Getting Started
1. Install dependencies
pip install streamlit requests pydantic python-dotenv
2. Start Ollama
ollama serve
3. Ensure model is available
ollama list
Import the quantized GGUF if needed and note the model name (update in the sidebar).
# app_streamlit.py
# -------------------------------------------------------------
# Streamlit chat app that uses a local Ollama model
# (Llama-3-Groq-8B-Tool-Use, e.g., Q4_K_M quant) to plan actions
# and call simple, no-key tools (weather, Wikipedia, FX, calculator).
# -------------------------------------------------------------
# Quick start:
# 1) Install deps: pip install streamlit requests pydantic python-dotenv
# 2) Start Ollama: ollama serve
# 3) Ensure your model is available in Ollama (e.g., after importing GGUF):
# ollama list
# If needed, set the model name in the app sidebar (default below is a guess).
# 4) Run app: streamlit run app_streamlit.py
# -------------------------------------------------------------
import os
import re
import json
import math
import ast
import requests
import streamlit as st
from typing import Any, Dict
# ---------------------------
# Default Config (editable in sidebar)
# ---------------------------
DEFAULT_OLLAMA_URL = os.getenv("OLLAMA_URL", "http://127.0.0.1:11434")
# NOTE: After importing a GGUF into Ollama, the created model's name can vary.
# Try one of these in the sidebar if default doesn't work:
# - "llama-3-groq-8b-tool-use" (common community naming)
# - "Llama-3-Groq-8B-Tool-Use" (case variants)
# - whatever shows in `ollama list`
DEFAULT_MODEL_NAME = os.getenv("OLLAMA_MODEL", "groq-8b-tool")
# ---------------------------
# Safe calculator
# ---------------------------
ALLOWED_NODES = {
ast.Expression, ast.BinOp, ast.UnaryOp, ast.Num, ast.Load,
ast.Add, ast.Sub, ast.Mult, ast.Div, ast.Pow, ast.Mod,
ast.USub, ast.UAdd, ast.FloorDiv
}
ALLOWED_FUNCS = {
"sqrt": math.sqrt, "ceil": math.ceil, "floor": math.floor,
"log": math.log, "sin": math.sin, "cos": math.cos, "tan": math.tan
}
def safe_calc(expr: str) -> float:
node = ast.parse(expr, mode="eval")
for sub in ast.walk(node):
if type(sub) not in ALLOWED_NODES and not isinstance(sub, ast.Constant):
raise ValueError(f"Illegal token: {type(sub).__name__}")
if isinstance(sub, ast.Call):
if not isinstance(sub.func, ast.Name) or sub.func.id not in ALLOWED_FUNCS:
raise ValueError("Only basic math functions allowed")
return eval(compile(node, "<calc>", "eval"), {"__builtins__": {}}, {**ALLOWED_FUNCS})
# ---------------------------
# Tools (no API keys required)
# ---------------------------
def tool_weather_get(city: str) -> Dict[str, Any]:
"""Get current weather using Open-Meteo (free)."""
g = requests.get(
"https://geocoding-api.open-meteo.com/v1/search",
params={"name": city, "count": 1, "language": "en"},
timeout=20
).json()
if not g.get("results"):
return {"error": f"City not found: {city}"}
loc = g["results"][0]
lat, lon = loc["latitude"], loc["longitude"]
f = requests.get(
"https://api.open-meteo.com/v1/forecast",
params={"latitude": lat, "longitude": lon, "current_weather": True, "timezone": "auto"},
timeout=20
).json()
cw = f.get("current_weather") or {}
return {
"location": loc.get("name"),
"temperature_c": cw.get("temperature"),
"windspeed_kmh": cw.get("windspeed"),
"weathercode": cw.get("weathercode"),
"time": cw.get("time"),
}
def tool_wiki_summary(topic: str) -> Dict[str, Any]:
t = topic.strip().replace(" ", "_")
r = requests.get(
f"https://en.wikipedia.org/api/rest_v1/page/summary/{t}",
headers={"accept": "application/json"}, timeout=20
)
if r.status_code == 404:
return {"error": f"No Wikipedia page for '{topic}'"}
d = r.json()
return {
"title": d.get("title"),
"extract": d.get("extract"),
"url": d.get("content_urls", {}).get("desktop", {}).get("page")
}
def tool_fx_convert(amount: float, from_: str, to: str) -> Dict[str, Any]:
r = requests.get(
"https://api.exchangerate.host/convert",
params={"from": from_, "to": to, "amount": amount},
timeout=20
).json()
if not r.get("success", True):
return {"error": "FX API failed"}
return {
"from": from_.upper(), "to": to.upper(),
"amount": amount, "result": r.get("result"), "date": r.get("date")
}
def tool_calc_eval(expression: str) -> Dict[str, Any]:
try:
val = safe_calc(expression)
return {"expression": expression, "result": val}
except Exception as e:
return {"error": f"Calc error: {e}"}
TOOLS = {
"weather.get": {"fn": tool_weather_get, "schema": {"city": str}},
"wiki.summary": {"fn": tool_wiki_summary, "schema": {"topic": str}},
"fx.convert": {"fn": tool_fx_convert, "schema": {"amount": float, "from": str, "to": str}},
"calc.eval": {"fn": tool_calc_eval, "schema": {"expression": str}},
}
# ---------------------------
# LLM planner prompt
# ---------------------------
SYSTEM_PROMPT = ('''
You are a strict planner. You must reply ONLY with valid JSON.
Available tools: weather.get | wiki.summary | fx.convert | calc.eval
Rules:
- If using a tool, output exactly:
{"tool":"<tool_name>","args":{"arg1":"value1","arg2":"value2"}}
- If giving a final answer, output exactly:
{"final_answer":"your answer here"}
Examples:
User: weather in Chennai today
Assistant: {"tool":"weather.get","args":{"city":"Chennai"}}
User: Tell me about the Right to Information Act
Assistant: {"tool":"wiki.summary","args":{"topic":"Right to Information Act"}}
User: convert 150 USD to INR
Assistant: {"tool":"fx.convert","args":{"amount":150,"from":"USD","to":"INR"}}
User: calculate (12.5 * 3) + sqrt(81)
Assistant: {"tool":"calc.eval","args":{"expression":"(12.5 * 3) + sqrt(81)"}}
Output JSON only. No explanations. No extra text.'''
)
def llm_chat(ollama_url: str, model: str, messages: list[dict], stream: bool = False) -> dict:
resp = requests.post(
f"{ollama_url}/api/chat",
json={"model": model, "messages": messages, "stream": stream},
timeout=180,
)
resp.raise_for_status()
return resp.json()
def llm_generate(ollama_url: str, model: str, prompt: str) -> str:
resp = requests.post(
f"{ollama_url}/api/generate",
json={"model": model, "prompt": prompt, "stream": False},
timeout=180,
)
resp.raise_for_status()
return resp.json().get("response", "").strip()
def llm_plan(ollama_url: str, model: str, user_text: str) -> Dict[str, Any]:
data = llm_chat(
ollama_url,
model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_text},
],
stream=False,
)
msg = data.get("message", {}).get("content", "{}")
m = re.search(r"\{.*\}", msg, re.S)
text = m.group(0) if m else msg
try:
return json.loads(text)
except Exception:
return {"final_answer": msg}
def summarize_answer(ollama_url: str, model: str, user_msg: str, tool: str | None, result: Dict[str, Any]) -> str:
if tool is None:
return result if isinstance(result, str) else json.dumps(result, ensure_ascii=False, indent=2)
prompt = (
"User asked: " + user_msg +
"\nTool used: " + tool +
"\nTool result (JSON): " + json.dumps(result) +
"\nWrite a concise, friendly answer, include key numbers/links if present."
)
return llm_generate(ollama_url, model, prompt)
# ---------------------------
# Streamlit UI
# ---------------------------
st.set_page_config(page_title="Action Agent (Local)", page_icon="🛠️")
st.title("🛠️ Local Action Agent – Llama‑3‑Groq‑8B‑Tool‑Use")
st.caption("Text-only LLM plans actions; tools: weather • Wikipedia • FX • calculator.")
with st.sidebar:
st.subheader("Settings")
ollama_url = st.text_input("Ollama URL", DEFAULT_OLLAMA_URL)
model_name = st.text_input("Model name (see `ollama list`)", DEFAULT_MODEL_NAME)
st.markdown("**Tips**")
st.markdown("- Try: `weather in Chennai today`")
st.markdown("- Try: `wiki: Right to Information Act`")
st.markdown("- Try: `convert 150 USD to INR`")
st.markdown("- Try: `calculate (12.5 * 3) + sqrt(81)`")
if "history" not in st.session_state:
st.session_state.history = []
user_msg = st.text_input("Type a request and press Enter")
if user_msg:
with st.spinner("Thinking..."):
plan = llm_plan(ollama_url, model_name, user_msg)
if "final_answer" in plan:
st.session_state.history.append({
"user": user_msg, "route": "direct", "answer": plan["final_answer"]
})
else:
tool = plan.get("tool")
args = plan.get("args", {})
if tool not in TOOLS:
st.session_state.history.append({
"user": user_msg, "route": "error", "answer": f"Unknown tool: {tool}"
})
else:
# Simple arg casting/validation
spec = TOOLS[tool]["schema"]
cast_args = {}
arg_error = None
for k, typ in spec.items():
if k not in args:
arg_error = f"Missing arg '{k}' for tool {tool}"
break
try:
cast_args[k] = args[k] if typ is str else typ(args[k])
except Exception:
arg_error = f"Bad type for '{k}' (expected {typ.__name__})"
break
if arg_error:
st.session_state.history.append({
"user": user_msg, "route": "error", "answer": arg_error
})
else:
result = TOOLS[tool]["fn"](**cast_args)
if "error" in result:
st.session_state.history.append({
"user": user_msg, "route": f"tool:{tool}", "answer": result["error"], "raw": result
})
else:
final = summarize_answer(ollama_url, model_name, user_msg, tool, result)
st.session_state.history.append({
"user": user_msg, "route": f"tool:{tool}", "answer": final, "raw": result
})
# Render history (latest first)
for item in reversed(st.session_state.history):
st.markdown(f"**You:** {item['user']}")
st.markdown(f"**Agent ({item['route']}):** {item['answer']}")
if "raw" in item:
with st.expander("Raw tool result"):
st.json(item["raw"])
st.divider()
4. Run the app
streamlit run app_streamlit.py
Open http://localhost:8501 and start chatting!
Why this matters
This project shows how to:
Run a fully local action agent (no cloud APIs).
Combine LLM planning + structured tool calls + friendly summaries.
Use Streamlit to prototype AI UX quickly.
It’s a great pattern for experimenting with AI assistants, automations, or teaching demos.