Source code for kosmos.ml.models.vqc.circuit.qiskit_circuit.gradient_method

import math
from abc import ABC, abstractmethod

import numpy as np
import torch
from qiskit import QuantumCircuit

from kosmos.ml.typing import TensorNpArray
from kosmos.utils.rng import RNG

DEVICE = "cpu"


[docs] class GradientMethod(ABC): r"""Abstract base class for quantum-circuit gradient methods. Implementations may follow analytic rules (e.g., parameter-shift) or stochastic gradient-free approaches (e.g., SPSA). All subclasses must implement the ``jacobian`` method that computes d(outputs)/d(weights). Attributes: parameterized_circuit (QiskitParameterizedCircuit | None): The Qiskit parameterized circuit. Notes: This package provides two implementations: - ``ParameterShiftRule``: exact, low variance, but requires 2 evaluations per parameter. - ``SPSA``: stochastic, higher variance, but requires only 2 evaluations per sample independent of parameter count. """ def __init__(self) -> None: """Initialize the gradient method.""" self.parameterized_circuit = None
[docs] def set_parameterized_circuit( self, parameterized_circuit: "QiskitParameterizedCircuit", # noqa: F821 ) -> None: """Assign the parameterized circuit to be used by the gradient method. This must be called before computing gradients. Args: parameterized_circuit (QiskitParameterizedCircuit): The Qiskit parameterized circuit. """ self.parameterized_circuit = parameterized_circuit
[docs] def validate_parameterized_circuit(self) -> None: """Validate that the parameterized circuit has been set.""" if self.parameterized_circuit is None: msg = "The parameterized circuit has not been set for the gradient method." raise ValueError(msg)
[docs] @abstractmethod def jacobian(self, x: TensorNpArray, weights: np.ndarray) -> torch.Tensor: """Compute d(outputs)/d(weights). Args: x (TensorNpArray): Input values, shape (len_x, input_dim). weights (np.ndarray): Weights values. Returns: torch.Tensor: Jacobian of shape (len_x, output_dim, followed by weights.shape). """
[docs] class ParameterShiftRule(GradientMethod): r"""Gradient computation using the parameter-shift rule. The parameter-shift rule is an analytic approach to computing gradients in variational quantum circuits. The gradient of an expectation value can be computed exactly using two circuit evaluations per parameter: .. math:: f'(\theta) = \frac{f(\theta + s) - f(\theta - s)}{2 \sin(s)} In the common case of Pauli rotations, the canonical shift is :math:`s = \pi/2`. Attributes: shift (float): Shift magnitude. Notes: - The method provides low-variance, unbiased gradients. - Computational cost scales linearly with the number of parameters (two evaluations per parameter). - Requires the circuit to be differentiable in the parameter of interest and the underlying generator to have a known spectrum. """ def __init__(self, shift: float = np.pi / 2) -> None: """Initialize the parameter-shift rule. Args: shift (float): Shift magnitude. Defaults to π/2. """ super().__init__() self.shift = shift
[docs] def jacobian(self, x: TensorNpArray, weights: np.ndarray) -> torch.Tensor: """Compute d(outputs)/d(weights) via parameter-shift. Args: x (TensorNpArray): Input values, shape (len_x, input_dim). weights (np.ndarray): Weights values. Returns: torch.Tensor: Jacobian of shape (len_x, output_dim, followed by weights.shape). """ self.validate_parameterized_circuit() x = x.detach().cpu().numpy() if isinstance(x, torch.Tensor) else x len_x = len(x) wshape = weights.shape indices = list(np.ndindex(wshape)) qcs: list[QuantumCircuit] = [] for idx in indices: for xb in x: for s in (self.shift, -self.shift): w = weights.copy() w[idx] += s qcs.append(self.parameterized_circuit.circuit(w, xb)) outputs = self.parameterized_circuit.execute_circuits(qcs) jac = np.zeros((len_x, self.parameterized_circuit.output_dim, *wshape), dtype=np.float32) for k, idx in enumerate(indices): for b in range(len_x): base = (k * len_x + b) * 2 y_plus = np.atleast_2d(outputs[base]) y_minus = np.atleast_2d(outputs[base + 1]) jac[(b, slice(None), *idx)] = 0.5 * (y_plus - y_minus) return torch.from_numpy(jac).float().to(DEVICE)
[docs] class SPSA(GradientMethod): r"""Gradient computation using Simultaneous Perturbation Stochastic Approximation (SPSA). SPSA is a gradient-free method that estimates all partial derivatives using only two circuit evaluations per random perturbation, independent of the number of parameters. A random perturbation vector :math:`\Delta` is drawn from a symmetric Bernoulli distribution :math:`\{-1, +1\}`, which is optimal in the sense of minimizing estimator variance (Sadegh & Spall, 1998). The gradient estimate for parameter :math:`i` is: .. math:: \hat{g}_i \approx \frac{f(\theta + \epsilon \Delta) - f(\theta - \epsilon \Delta)} {2\epsilon \Delta_i} Multiple samples are averaged to reduce variance. Attributes: epsilon (float): Perturbation magnitude. num_samples (int): Number of random perturbation samples to average over. np_rng (np.random.Generator): NumPy random number generator. Notes: - Cost is :math:`O(\mathrm{num\_samples})` compared to :math:`O(\mathrm{num\_parameters})` of the parameter-shift rule. - Produces an unbiased gradient estimator under mild regularity. - Typically more robust to noise than analytic gradient methods such as parameter-shift. - Only gradient estimation is implemented here; the optimizer gain sequences :math:`a_k` and :math:`c_k` are not part of this component. - This implementation follows standard formulations used in quantum optimization, such as PennyLane's ``SPSAOptimizer`` (https://docs.pennylane.ai/en/stable/_modules/pennylane/optimize/spsa.html#SPSAOptimizer.compute_grad). """ def __init__(self, epsilon: float = 0.01, num_samples: int = 3) -> None: """Initialize the SPSA gradient method. Args: epsilon (float): Perturbation magnitude. Defaults to 0.01. num_samples (int): Number of random perturbation samples to average over. Defaults to 3. """ super().__init__() if epsilon <= 0 or not math.isfinite(epsilon): msg = "epsilon must be > 0 and finite." raise ValueError(msg) if num_samples <= 0 or not math.isfinite(num_samples): msg = "num_samples must be > 0 and finite." raise ValueError(msg) self.epsilon = epsilon self.num_samples = num_samples self.np_rng = RNG.np_generator()
[docs] def jacobian(self, x: TensorNpArray, weights: np.ndarray) -> torch.Tensor: """Compute d(outputs)/d(weights) via SPSA. Args: x (TensorNpArray): Input values, shape (len_x, input_dim). weights (np.ndarray): Weights values. Returns: torch.Tensor: Jacobian of shape (len_x, output_dim, followed by weights.shape). """ self.validate_parameterized_circuit() x = x.detach().cpu().numpy() if isinstance(x, torch.Tensor) else x len_x = len(x) wshape = weights.shape output_dim = self.parameterized_circuit.output_dim jac_avg = np.zeros((len_x, output_dim, *wshape), dtype=np.float32) deltas: list[np.ndarray] = [] qcs: list[QuantumCircuit] = [] for _ in range(self.num_samples): delta = self.np_rng.choice(np.array([-1.0, 1.0], dtype=np.float32), size=wshape) deltas.append(delta) w_plus = weights + self.epsilon * delta w_minus = weights - self.epsilon * delta for xb in x: qcs.append(self.parameterized_circuit.circuit(w_plus, xb)) qcs.append(self.parameterized_circuit.circuit(w_minus, xb)) outputs = self.parameterized_circuit.execute_circuits(qcs) scale = 1.0 / (2 * self.epsilon) for s_idx, delta in enumerate(deltas): for b in range(len_x): base = (s_idx * len_x + b) * 2 y_plus = np.asarray(outputs[base]) y_minus = np.asarray(outputs[base + 1]) diff = scale * (y_plus - y_minus) for out_idx in range(output_dim): jac_avg[b, out_idx] += diff[out_idx] * delta jac_avg /= self.num_samples return torch.from_numpy(jac_avg).float().to(DEVICE)