Usage & Cost Tracking
Monitor LLM token usage and costs across all providers with built-in pricing tables and real-time tracking.
Overview
YosrAI provides comprehensive cost tracking to help you monitor and control LLM usage expenses. Every API call is automatically tracked with token counts and calculated costs.
Usage Model
class Usage:
prompt_tokens: int # Input tokens
completion_tokens: int # Output tokens
total_tokens: int # Total tokens used
cost: float # Calculated cost in USD
cost_formatted: str # Formatted cost string (e.g., "$0.0142")
Automatic Tracking
Agent Usage
from yosrai import Agent
agent = Agent.from_preset("assistant")
# Run agent (usage automatically tracked)
result = agent.run("Write a short story")
# Access usage data
usage = agent.last_usage
print(f"Prompt tokens: {usage.prompt_tokens}")
print(f"Completion tokens: {usage.completion_tokens}")
print(f"Total tokens: {usage.total_tokens}")
print(f"Cost: {usage.cost_formatted}") # "$0.0024"
Workflow Usage
from yosrai import Workflow, Agent
agent1 = Agent.from_preset("researcher")
agent2 = Agent.from_preset("writer")
workflow = Workflow("Pipeline").start(agent1).then(agent2)
result = workflow.run("Create an article about AI")
# Aggregate usage across all steps
total_usage = workflow.last_usage
print(f"Total workflow cost: {total_usage.cost_formatted}")
Conversation Usage
from yosrai import Agent
agent = Agent.from_preset("assistant")
with agent.conversation() as chat:
chat.send_message("Hello!")
chat.send_message("How are you?")
chat.send_message("Tell me a joke.")
# Cumulative cost across conversation
print(f"Total conversation cost: {chat.total_usage.cost_formatted}")
Manual Cost Calculation
Built-in Pricing
from yosrai import calculate_cost
# Calculate cost for OpenAI GPT-4
cost = calculate_cost("openai/gpt-4o", prompt_tokens=1000, completion_tokens=500)
print(f"Cost: ${cost:.4f}") # "$0.0325"
# Calculate cost for Anthropic Claude
cost = calculate_cost("anthropic/claude-3-sonnet", prompt_tokens=1000, completion_tokens=500)
print(f"Cost: ${cost:.4f}") # "$0.0180"
Supported Models
YosrAI includes pricing for major LLM providers:
OpenAI:
- gpt-4o: $5.00/1M input, $15.00/1M output
- gpt-4o-mini: $0.15/1M input, $0.60/1M output
- gpt-3.5-turbo: $0.50/1M input, $1.50/1M output
Anthropic:
- claude-3-5-sonnet: $3.00/1M input, $15.00/1M output
- claude-3-haiku: $0.25/1M input, $1.25/1M output
- claude-3-sonnet: $3.00/1M input, $15.00/1M output
Google:
- gemini-1.5-pro: $1.25/1M input, $5.00/1M output
- gemini-1.5-flash: $0.15/1M input, $0.60/1M output
Ollama (Local): - All models: $0.00 (free)
Custom Pricing
Register Custom Models
from yosrai import register_model_pricing
# Add pricing for custom/private models
register_model_pricing(
"my-private-model",
input_price=0.001, # $0.001 per 1K input tokens
output_price=0.002 # $0.002 per 1K output tokens
)
# Now calculate costs for your custom model
cost = calculate_cost("my-private-model", prompt_tokens=1000, completion_tokens=500)
print(f"Cost: ${cost:.4f}")
Update Existing Pricing
# Update pricing for existing model (e.g., after price change)
register_model_pricing("openai/gpt-4o", input_price=4.00, output_price=12.00)
Cost Monitoring
Real-time Cost Display
import time
from yosrai import Agent
agent = Agent.from_preset("assistant")
# Monitor cost during long-running tasks
start_time = time.time()
result = agent.run("Write a detailed analysis of climate change")
elapsed = time.time() - start_time
cost = agent.last_usage.cost
print(f"Analysis completed in {elapsed:.1f}s")
print(f"Cost: ${cost:.4f}")
print(f"Cost per second: ${cost/elapsed:.4f}/s")
Budget Management
def run_with_budget_limit(agent, prompt, max_cost=1.00):
"""Run agent with cost limit."""
result = agent.run(prompt)
if agent.last_usage.cost > max_cost:
print(f"Cost exceeded budget: ${agent.last_usage.cost:.4f} > ${max_cost:.4f}")
return None
return result
# Usage
result = run_with_budget_limit(agent, "Expensive analysis task", max_cost=0.50)
Cost Analysis
def analyze_usage_history(runs):
"""Analyze usage patterns across multiple runs."""
total_cost = 0
total_tokens = 0
total_time = 0
for run in runs:
total_cost += run['usage'].cost
total_tokens += run['usage'].total_tokens
total_time += run['duration']
avg_cost_per_token = total_cost / total_tokens if total_tokens > 0 else 0
avg_cost_per_second = total_cost / total_time if total_time > 0 else 0
return {
'total_cost': total_cost,
'total_tokens': total_tokens,
'total_time': total_time,
'avg_cost_per_token': avg_cost_per_token,
'avg_cost_per_second': avg_cost_per_second
}
Provider-Specific Features
OpenAI Usage Details
# OpenAI provides detailed token breakdowns
agent = Agent(model="openai/gpt-4o")
result = agent.run("Complex analysis task")
usage = agent.last_usage
# usage.prompt_tokens includes system message, user message, etc.
# usage.completion_tokens is the assistant's response
print(f"System tokens: {usage.prompt_tokens - user_tokens}")
print(f"User tokens: {user_tokens}")
print(f"Assistant tokens: {usage.completion_tokens}")
Anthropic Usage Tracking
# Anthropic provides input/output token counts
agent = Agent(model="anthropic/claude-3-sonnet")
result = agent.run("Analysis task")
usage = agent.last_usage
print(f"Input tokens: {usage.prompt_tokens}")
print(f"Output tokens: {usage.completion_tokens}")
Streaming Cost Estimation
# Estimate costs during streaming (approximate)
agent = Agent(model="openai/gpt-4o", stream=True)
accumulated_tokens = 0
async for chunk in agent.astream("Long analysis task"):
# Estimate tokens from chunk size (rough approximation)
accumulated_tokens += len(chunk.content.split()) * 1.3 # ~1.3 tokens per word
# Estimate cost
estimated_cost = calculate_cost("openai/gpt-4o", 1000, accumulated_tokens)
print(f"Estimated cost so far: ${estimated_cost:.4f}")
CLI Cost Monitoring
Interactive Cost Display
# Chat with cost tracking
yosrai chat --preset researcher
# Output shows cost per message
You: Explain quantum computing
Assistant: [response...]
Message 1 | Total tokens: 245 | Cost: $0.0074
Batch Processing Costs
# Process multiple items with cost tracking
for file in data/*.json; do
echo "Processing $file..."
yosrai run agent.json --inputs-file "$file" --quiet
# Check exit code for cost limits if needed
done
Cost-Aware CI/CD
# Validate and check costs in CI/CD
yosrai validate blueprints/ --json > validation.json
# Run with cost monitoring
yosrai run workflow.json --inputs-file input.json --json > result.json
# Extract cost for reporting
COST=$(jq '.usage.cost' result.json)
echo "Workflow cost: $${COST}"
# Fail if too expensive
if (( $(echo "$COST > 1.00" | bc -l) )); then
echo "Cost exceeded budget: $${COST}"
exit 1
fi
Best Practices
Cost Optimization
1. Choose Appropriate Models
# Use cheaper models for simple tasks
cheap_agent = Agent.from_preset("assistant", model="openai/gpt-4o-mini")
# Use expensive models only when needed
expensive_agent = Agent.from_preset("researcher", model="openai/gpt-4o")
2. Monitor and Alert
def cost_aware_run(agent, prompt, max_cost=0.10):
result = agent.run(prompt)
cost = agent.last_usage.cost
if cost > max_cost:
print(f"⚠️ High cost alert: ${cost:.4f} (limit: ${max_cost:.4f})")
return result
3. Cache Expensive Results
import hashlib
def cached_expensive_call(agent, prompt, cache_file="cache.json"):
# Create cache key from prompt
key = hashlib.md5(prompt.encode()).hexdigest()
# Check cache
if os.path.exists(cache_file):
with open(cache_file) as f:
cache = json.load(f)
if key in cache:
return cache[key]['result']
# Run expensive operation
result = agent.run(prompt)
cost = agent.last_usage.cost
# Cache result
cache = {}
if os.path.exists(cache_file):
with open(cache_file) as f:
cache = json.load(f)
cache[key] = {'result': result, 'cost': cost, 'timestamp': time.time()}
with open(cache_file, 'w') as f:
json.dump(cache, f)
return result
Cost Reporting
Daily Cost Tracking
def log_daily_cost(run_data):
"""Log cost data for analysis."""
today = datetime.now().strftime("%Y-%m-%d")
log_entry = {
'date': today,
'timestamp': datetime.now().isoformat(),
'model': run_data['model'],
'tokens': run_data['usage'].total_tokens,
'cost': run_data['usage'].cost,
'task': run_data['task']
}
# Append to daily log
log_file = f"cost_log_{today}.jsonl"
with open(log_file, 'a') as f:
f.write(json.dumps(log_entry) + '\n')
return log_entry
Cost Analytics
def analyze_cost_logs(log_files):
"""Analyze cost patterns from logs."""
total_cost = 0
total_tokens = 0
model_costs = {}
for log_file in log_files:
with open(log_file) as f:
for line in f:
entry = json.loads(line)
total_cost += entry['cost']
total_tokens += entry['tokens']
model = entry['model']
if model not in model_costs:
model_costs[model] = 0
model_costs[model] += entry['cost']
return {
'total_cost': total_cost,
'total_tokens': total_tokens,
'avg_cost_per_token': total_cost / total_tokens if total_tokens > 0 else 0,
'model_breakdown': model_costs
}
Troubleshooting
Common Issues
Usage data not available:
# Check if agent was run
if hasattr(agent, 'last_usage') and agent.last_usage:
print(f"Cost: {agent.last_usage.cost}")
else:
print("No usage data - run the agent first")
Pricing not found for model:
# Register custom pricing
register_model_pricing("unknown-model", input_price=0.001, output_price=0.002)
# Or use calculate_cost with explicit prices
cost = (prompt_tokens * 0.001 + completion_tokens * 0.002) / 1000
Cost calculation seems wrong:
# Verify token counts
usage = agent.last_usage
print(f"Prompt: {usage.prompt_tokens}, Completion: {usage.completion_tokens}")
print(f"Expected cost: ${calculate_cost(model, usage.prompt_tokens, usage.completion_tokens):.4f}")
Migration Guide
From Manual Tracking
# Old way (manual)
start_tokens = get_token_count()
result = agent.run(prompt)
end_tokens = get_token_count()
cost = calculate_cost_manually(end_tokens - start_tokens)
# New way (automatic)
result = agent.run(prompt)
cost = agent.last_usage.cost
From External Monitoring
# Old way (external logging)
result = agent.run(prompt)
log_to_external_service({
'tokens': external_token_counter(),
'cost': external_cost_calculator(),
'model': 'gpt-4'
})
# New way (built-in)
result = agent.run(prompt)
log_data = {
'tokens': agent.last_usage.total_tokens,
'cost': agent.last_usage.cost,
'model': agent.model
}
This comprehensive cost tracking system ensures you always know how much your AI usage costs, enabling better budget management and cost optimization strategies.