from dataclasses import dataclass
from typing import Optional, Tuple
import numpy as np
from scipy.sparse import diags, eye
from scipy.sparse.linalg import eigs
[docs]class RankCentrality:
"""Rank Centrality for spectral ranking from pairwise comparisons."""
def __init__(self, k: int):
self.k = k
[docs] def stationary_distribution(self, wins: np.ndarray, losses: np.ndarray) -> np.ndarray:
"""
Compute stationary distribution using Rank Centrality.
Args:
wins (np.ndarray): Win counts (k x k).
losses (np.ndarray): Loss counts (k x k).
Returns:
np.ndarray: Stationary distribution (pi).
"""
total = wins + losses
total = np.where(total == 0, 1, total)
P = wins / total
P = np.where(wins + losses == 0, 1.0 / self.k, P)
np.fill_diagonal(P, 0)
degrees = P.sum(axis=1)
degrees = np.where(degrees == 0, 1, degrees)
D_inv = diags(1.0 / degrees)
M = eye(self.k) - D_inv @ P
_, vecs = eigs(M.T, k=1, which='SM')
pi = np.abs(vecs[:, 0].real)
pi /= pi.sum()
return pi
[docs]@dataclass
class DuelingBanditEnv:
"""Dueling Bandit environment with Bradley-Terry model."""
P: np.ndarray
features: Optional[np.ndarray] = None
seed: Optional[int] = None
def __post_init__(self):
self.k = self.P.shape[0]
self.rng = np.random.default_rng(self.seed)
self.rank = RankCentrality(self.k)
[docs] @classmethod
def random_bt(cls, k: int, d: int = 0, seed: Optional[int] = None) -> "DuelingBanditEnv":
"""Generate a random Bradley-Terry environment."""
rng = np.random.default_rng(seed)
utilities = rng.normal(size=k)
P = 1.0 / (1.0 + np.exp(utilities[:, None] - utilities[None, :]))
np.fill_diagonal(P, 0.5)
features = rng.normal(size=(k, d)) if d > 0 else None
return cls(P, features, seed)
[docs] def duel(self, a: int, b: int) -> Tuple[int, int]:
"""Perform a duel between two items."""
if self.rng.random() < self.P[a, b]:
return a, b
return b, a
[docs] def best_arm(self) -> int:
"""Return the index of the item with the highest BTL score."""
utilities = -np.log(1.0 / self.P - 1)
return int(np.argmax(np.nanmean(utilities, axis=1)))
[docs] def true_rank(self) -> np.ndarray:
"""Return the true ranking of items."""
utilities = -np.log(1.0 / self.P - 1)
scores = np.nanmean(utilities, axis=1)
return np.argsort(-scores)
[docs] def delta12(self) -> float:
"""Compute the separation metric Delta_1,2."""
top2 = np.argsort(-np.nanmean(-np.log(1.0 / self.P - 1), axis=1))[:2]
return (self.P[top2[0], top2[1]] - 0.5) ** 2