Source code for lrs.core.free_energy

"""Free energy calculations for Active Inference."""

from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import math

from lrs.core.lens import ToolLens  # CRITICAL: Import ToolLens
from lrs.core.registry import ToolRegistry


[docs] @dataclass class PolicyEvaluation: """Results of policy evaluation.""" epistemic_value: float pragmatic_value: float total_G: float expected_success_prob: float components: Dict[str, Any]
[docs] def calculate_epistemic_value( policy: List[ToolLens], registry: Optional[ToolRegistry] = None ) -> float: """ Calculate epistemic value (information gain) of a policy. Higher values indicate more information gain from exploration. Args: policy: Sequence of tools to execute registry: Tool registry with statistics Returns: Epistemic value (information gain) Example: >>> epistemic = calculate_epistemic_value([novel_tool]) >>> # High value for unexplored tools """ if not policy: return 0.0 epistemic = 0.0 for tool in policy: # Information gain ≈ entropy of success distribution # H(p) = -p*log(p) - (1-p)*log(1-p) p = tool.success_rate if hasattr(tool, 'success_rate') else 0.5 # Clamp to avoid log(0) p = max(0.01, min(0.99, p)) entropy = -(p * math.log(p) + (1 - p) * math.log(1 - p)) epistemic += entropy return epistemic
[docs] def calculate_pragmatic_value( policy: List[ToolLens], preferences: Dict[str, float], registry: Optional[ToolRegistry] = None, discount: float = 0.95 ) -> float: """ Calculate pragmatic value (expected reward) of a policy. Higher values indicate higher expected utility. Args: policy: Sequence of tools to execute preferences: Reward/cost for outcomes (success, error, step_cost) registry: Tool registry with statistics discount: Temporal discount factor (default: 0.95) Returns: Pragmatic value (expected reward) Example: >>> pragmatic = calculate_pragmatic_value( ... [reliable_tool], ... preferences={'success': 5.0, 'error': -3.0} ... ) >>> # High value for reliable tools """ if not policy: return 0.0 reward_success = preferences.get('success', 1.0) reward_error = preferences.get('error', -1.0) step_cost = preferences.get('step_cost', -0.1) pragmatic = 0.0 discount_factor = 1.0 for tool in policy: p_success = tool.success_rate if hasattr(tool, 'success_rate') else 0.5 # Expected reward for this step expected_reward = ( p_success * reward_success + (1 - p_success) * reward_error + step_cost ) pragmatic += discount_factor * expected_reward discount_factor *= discount return pragmatic
[docs] def calculate_expected_free_energy( policy: List[ToolLens], registry: Optional[ToolRegistry] = None, preferences: Optional[Dict[str, float]] = None, precision: float = 0.5, epistemic_weight: Optional[float] = None ) -> float: """ Calculate Expected Free Energy G(π) for a policy. G(π) = Epistemic Value - Pragmatic Value Lower G is better (minimization objective). Args: policy: Sequence of tools to execute registry: Tool registry with statistics preferences: Reward structure precision: Current precision γ ∈ [0,1] epistemic_weight: Override for epistemic term weight Returns: Expected Free Energy G Example: >>> G = calculate_expected_free_energy( ... policy=[search_tool, filter_tool], ... preferences={'success': 5.0, 'error': -3.0}, ... precision=0.7 ... ) >>> # Low G indicates good policy """ if not policy: return 0.0 if preferences is None: preferences = {'success': 1.0, 'error': -1.0, 'step_cost': -0.1} # Calculate components epistemic = calculate_epistemic_value(policy, registry) pragmatic = calculate_pragmatic_value(policy, preferences, registry) # Weight epistemic term by uncertainty (1 - precision) if epistemic_weight is None: epistemic_weight = 1.0 - precision # G = Epistemic - Pragmatic G = epistemic_weight * epistemic - pragmatic return G
[docs] def precision_weighted_selection( policy_evaluations: List[PolicyEvaluation], precision: float = 0.5, temperature: float = 1.0 ) -> int: """ Select policy using precision-weighted softmax. P(π) ∝ exp(-β * G(π)) where β = precision (inverse temperature). Args: policy_evaluations: Evaluated policies precision: Current precision γ ∈ [0,1] temperature: Additional temperature parameter Returns: Index of selected policy Example: >>> selected_idx = precision_weighted_selection( ... evaluations, ... precision=0.7 ... ) >>> best_policy = policies[selected_idx] """ import random if not policy_evaluations: raise ValueError("Cannot select from empty evaluations") # Extract G values G_values = [eval.total_G for eval in policy_evaluations] # Softmax with precision as inverse temperature beta = precision / temperature exp_values = [math.exp(-beta * G) for G in G_values] total = sum(exp_values) probabilities = [e / total for e in exp_values] # Sample from distribution r = random.random() cumsum = 0.0 for i, p in enumerate(probabilities): cumsum += p if r < cumsum: return i return len(probabilities) - 1 # Fallback