Skip to content

Usage & Cost Tracking

Monitor LLM token usage and costs across all providers with built-in pricing tables and real-time tracking.

Overview

YosrAI provides comprehensive cost tracking to help you monitor and control LLM usage expenses. Every API call is automatically tracked with token counts and calculated costs.

Usage Model

class Usage:
    prompt_tokens: int      # Input tokens
    completion_tokens: int  # Output tokens
    total_tokens: int       # Total tokens used
    cost: float            # Calculated cost in USD
    cost_formatted: str    # Formatted cost string (e.g., "$0.0142")

Automatic Tracking

Agent Usage

from yosrai import Agent

agent = Agent.from_preset("assistant")

# Run agent (usage automatically tracked)
result = agent.run("Write a short story")

# Access usage data
usage = agent.last_usage
print(f"Prompt tokens: {usage.prompt_tokens}")
print(f"Completion tokens: {usage.completion_tokens}")
print(f"Total tokens: {usage.total_tokens}")
print(f"Cost: {usage.cost_formatted}")  # "$0.0024"

Workflow Usage

from yosrai import Workflow, Agent

agent1 = Agent.from_preset("researcher")
agent2 = Agent.from_preset("writer")

workflow = Workflow("Pipeline").start(agent1).then(agent2)
result = workflow.run("Create an article about AI")

# Aggregate usage across all steps
total_usage = workflow.last_usage
print(f"Total workflow cost: {total_usage.cost_formatted}")

Conversation Usage

from yosrai import Agent

agent = Agent.from_preset("assistant")

with agent.conversation() as chat:
    chat.send_message("Hello!")
    chat.send_message("How are you?")
    chat.send_message("Tell me a joke.")

    # Cumulative cost across conversation
    print(f"Total conversation cost: {chat.total_usage.cost_formatted}")

Manual Cost Calculation

Built-in Pricing

from yosrai import calculate_cost

# Calculate cost for OpenAI GPT-4
cost = calculate_cost("openai/gpt-4o", prompt_tokens=1000, completion_tokens=500)
print(f"Cost: ${cost:.4f}")  # "$0.0325"

# Calculate cost for Anthropic Claude
cost = calculate_cost("anthropic/claude-3-sonnet", prompt_tokens=1000, completion_tokens=500)
print(f"Cost: ${cost:.4f}")  # "$0.0180"

Supported Models

YosrAI includes pricing for major LLM providers:

OpenAI: - gpt-4o: $5.00/1M input, $15.00/1M output - gpt-4o-mini: $0.15/1M input, $0.60/1M output - gpt-3.5-turbo: $0.50/1M input, $1.50/1M output

Anthropic: - claude-3-5-sonnet: $3.00/1M input, $15.00/1M output - claude-3-haiku: $0.25/1M input, $1.25/1M output - claude-3-sonnet: $3.00/1M input, $15.00/1M output

Google: - gemini-1.5-pro: $1.25/1M input, $5.00/1M output - gemini-1.5-flash: $0.15/1M input, $0.60/1M output

Ollama (Local): - All models: $0.00 (free)

Custom Pricing

Register Custom Models

from yosrai import register_model_pricing

# Add pricing for custom/private models
register_model_pricing(
    "my-private-model",
    input_price=0.001,    # $0.001 per 1K input tokens
    output_price=0.002    # $0.002 per 1K output tokens
)

# Now calculate costs for your custom model
cost = calculate_cost("my-private-model", prompt_tokens=1000, completion_tokens=500)
print(f"Cost: ${cost:.4f}")

Update Existing Pricing

# Update pricing for existing model (e.g., after price change)
register_model_pricing("openai/gpt-4o", input_price=4.00, output_price=12.00)

Cost Monitoring

Real-time Cost Display

import time
from yosrai import Agent

agent = Agent.from_preset("assistant")

# Monitor cost during long-running tasks
start_time = time.time()
result = agent.run("Write a detailed analysis of climate change")

elapsed = time.time() - start_time
cost = agent.last_usage.cost

print(f"Analysis completed in {elapsed:.1f}s")
print(f"Cost: ${cost:.4f}")
print(f"Cost per second: ${cost/elapsed:.4f}/s")

Budget Management

def run_with_budget_limit(agent, prompt, max_cost=1.00):
    """Run agent with cost limit."""
    result = agent.run(prompt)

    if agent.last_usage.cost > max_cost:
        print(f"Cost exceeded budget: ${agent.last_usage.cost:.4f} > ${max_cost:.4f}")
        return None

    return result

# Usage
result = run_with_budget_limit(agent, "Expensive analysis task", max_cost=0.50)

Cost Analysis

def analyze_usage_history(runs):
    """Analyze usage patterns across multiple runs."""
    total_cost = 0
    total_tokens = 0
    total_time = 0

    for run in runs:
        total_cost += run['usage'].cost
        total_tokens += run['usage'].total_tokens
        total_time += run['duration']

    avg_cost_per_token = total_cost / total_tokens if total_tokens > 0 else 0
    avg_cost_per_second = total_cost / total_time if total_time > 0 else 0

    return {
        'total_cost': total_cost,
        'total_tokens': total_tokens,
        'total_time': total_time,
        'avg_cost_per_token': avg_cost_per_token,
        'avg_cost_per_second': avg_cost_per_second
    }

Provider-Specific Features

OpenAI Usage Details

# OpenAI provides detailed token breakdowns
agent = Agent(model="openai/gpt-4o")
result = agent.run("Complex analysis task")

usage = agent.last_usage
# usage.prompt_tokens includes system message, user message, etc.
# usage.completion_tokens is the assistant's response
print(f"System tokens: {usage.prompt_tokens - user_tokens}")
print(f"User tokens: {user_tokens}")
print(f"Assistant tokens: {usage.completion_tokens}")

Anthropic Usage Tracking

# Anthropic provides input/output token counts
agent = Agent(model="anthropic/claude-3-sonnet")
result = agent.run("Analysis task")

usage = agent.last_usage
print(f"Input tokens: {usage.prompt_tokens}")
print(f"Output tokens: {usage.completion_tokens}")

Streaming Cost Estimation

# Estimate costs during streaming (approximate)
agent = Agent(model="openai/gpt-4o", stream=True)

accumulated_tokens = 0
async for chunk in agent.astream("Long analysis task"):
    # Estimate tokens from chunk size (rough approximation)
    accumulated_tokens += len(chunk.content.split()) * 1.3  # ~1.3 tokens per word

    # Estimate cost
    estimated_cost = calculate_cost("openai/gpt-4o", 1000, accumulated_tokens)
    print(f"Estimated cost so far: ${estimated_cost:.4f}")

CLI Cost Monitoring

Interactive Cost Display

# Chat with cost tracking
yosrai chat --preset researcher

# Output shows cost per message
You: Explain quantum computing
Assistant: [response...]
Message 1 | Total tokens: 245 | Cost: $0.0074

Batch Processing Costs

# Process multiple items with cost tracking
for file in data/*.json; do
    echo "Processing $file..."
    yosrai run agent.json --inputs-file "$file" --quiet
    # Check exit code for cost limits if needed
done

Cost-Aware CI/CD

# Validate and check costs in CI/CD
yosrai validate blueprints/ --json > validation.json

# Run with cost monitoring
yosrai run workflow.json --inputs-file input.json --json > result.json

# Extract cost for reporting
COST=$(jq '.usage.cost' result.json)
echo "Workflow cost: $${COST}"

# Fail if too expensive
if (( $(echo "$COST > 1.00" | bc -l) )); then
    echo "Cost exceeded budget: $${COST}"
    exit 1
fi

Best Practices

Cost Optimization

1. Choose Appropriate Models

# Use cheaper models for simple tasks
cheap_agent = Agent.from_preset("assistant", model="openai/gpt-4o-mini")

# Use expensive models only when needed
expensive_agent = Agent.from_preset("researcher", model="openai/gpt-4o")

2. Monitor and Alert

def cost_aware_run(agent, prompt, max_cost=0.10):
    result = agent.run(prompt)
    cost = agent.last_usage.cost

    if cost > max_cost:
        print(f"⚠️  High cost alert: ${cost:.4f} (limit: ${max_cost:.4f})")

    return result

3. Cache Expensive Results

import hashlib

def cached_expensive_call(agent, prompt, cache_file="cache.json"):
    # Create cache key from prompt
    key = hashlib.md5(prompt.encode()).hexdigest()

    # Check cache
    if os.path.exists(cache_file):
        with open(cache_file) as f:
            cache = json.load(f)
            if key in cache:
                return cache[key]['result']

    # Run expensive operation
    result = agent.run(prompt)
    cost = agent.last_usage.cost

    # Cache result
    cache = {}
    if os.path.exists(cache_file):
        with open(cache_file) as f:
            cache = json.load(f)

    cache[key] = {'result': result, 'cost': cost, 'timestamp': time.time()}

    with open(cache_file, 'w') as f:
        json.dump(cache, f)

    return result

Cost Reporting

Daily Cost Tracking

def log_daily_cost(run_data):
    """Log cost data for analysis."""
    today = datetime.now().strftime("%Y-%m-%d")

    log_entry = {
        'date': today,
        'timestamp': datetime.now().isoformat(),
        'model': run_data['model'],
        'tokens': run_data['usage'].total_tokens,
        'cost': run_data['usage'].cost,
        'task': run_data['task']
    }

    # Append to daily log
    log_file = f"cost_log_{today}.jsonl"
    with open(log_file, 'a') as f:
        f.write(json.dumps(log_entry) + '\n')

    return log_entry

Cost Analytics

def analyze_cost_logs(log_files):
    """Analyze cost patterns from logs."""
    total_cost = 0
    total_tokens = 0
    model_costs = {}

    for log_file in log_files:
        with open(log_file) as f:
            for line in f:
                entry = json.loads(line)
                total_cost += entry['cost']
                total_tokens += entry['tokens']

                model = entry['model']
                if model not in model_costs:
                    model_costs[model] = 0
                model_costs[model] += entry['cost']

    return {
        'total_cost': total_cost,
        'total_tokens': total_tokens,
        'avg_cost_per_token': total_cost / total_tokens if total_tokens > 0 else 0,
        'model_breakdown': model_costs
    }

Troubleshooting

Common Issues

Usage data not available:

# Check if agent was run
if hasattr(agent, 'last_usage') and agent.last_usage:
    print(f"Cost: {agent.last_usage.cost}")
else:
    print("No usage data - run the agent first")

Pricing not found for model:

# Register custom pricing
register_model_pricing("unknown-model", input_price=0.001, output_price=0.002)

# Or use calculate_cost with explicit prices
cost = (prompt_tokens * 0.001 + completion_tokens * 0.002) / 1000

Cost calculation seems wrong:

# Verify token counts
usage = agent.last_usage
print(f"Prompt: {usage.prompt_tokens}, Completion: {usage.completion_tokens}")
print(f"Expected cost: ${calculate_cost(model, usage.prompt_tokens, usage.completion_tokens):.4f}")

Migration Guide

From Manual Tracking

# Old way (manual)
start_tokens = get_token_count()
result = agent.run(prompt)
end_tokens = get_token_count()
cost = calculate_cost_manually(end_tokens - start_tokens)

# New way (automatic)
result = agent.run(prompt)
cost = agent.last_usage.cost

From External Monitoring

# Old way (external logging)
result = agent.run(prompt)
log_to_external_service({
    'tokens': external_token_counter(),
    'cost': external_cost_calculator(),
    'model': 'gpt-4'
})

# New way (built-in)
result = agent.run(prompt)
log_data = {
    'tokens': agent.last_usage.total_tokens,
    'cost': agent.last_usage.cost,
    'model': agent.model
}

This comprehensive cost tracking system ensures you always know how much your AI usage costs, enabling better budget management and cost optimization strategies.