Source code for pennylane.optimize.shot_adaptive

# Copyright 2018-2021 Xanadu Quantum Technologies Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Shot adaptive optimizer"""
# pylint: disable=too-many-instance-attributes,too-many-arguments,too-many-branches
from copy import copy
import numpy as np
from scipy.stats import multinomial

import pennylane as qml

from .gradient_descent import GradientDescentOptimizer


[docs]class ShotAdaptiveOptimizer(GradientDescentOptimizer):
    r"""Optimizer where the shot rate is adaptively calculated using the variances of the
    parameter-shift gradient.

    By keeping a running average of the parameter-shift gradient and the *variance*
    of the parameter-shift gradient, this optimizer frugally distributes a shot
    budget across the partial derivatives of each parameter.

    In addition, weighted random sampling can be used to further distribute the
    shot budget across the local terms from which the Hamiltonian is constructed.

    .. note::

        The shot adaptive optimizer only supports single QNode objects as objective functions.
        The bound device must also be instantiated with a finite number of shots.

    Args:
        min_shots (int): The minimum number of shots used to estimate the expectations
            of each term in the Hamiltonian. Note that this must be larger than 2 for the variance
            of the gradients to be computed.
        mu (float): The running average constant :math:`\mu \in [0, 1]`. Used to control how
            quickly the number of shots recommended for each gradient component changes.
        b (float): Regularization bias. The bias should be kept small, but non-zero.
        term_sampling (str): The random sampling algorithm to multinomially distribute the shot
            budget across terms in the Hamiltonian expectation value. Currently, only
            ``"weighted_random_sampling"`` is supported. The default value is ``None``, which
            disables the random sampling behaviour.
        stepsize (float): The learning rate :math:`\eta`. The learning rate *must* be such
            that :math:`\eta < 2/L = 2/\sum_i|c_i|`, where:

            * :math:`L \leq \sum_i|c_i|` is the bound on the `Lipschitz constant
              <https://en.wikipedia.org/wiki/Lipschitz_continuity>`__ of the variational quantum
              algorithm objective function, and

            * :math:`c_i` are the coefficients of the Hamiltonian used in the objective function.

    **Example**

    For VQE/VQE-like problems, the objective function for the optimizer can be realized
    as a :class:`~.QNode` object measuring the expectation of a :class:`~.Hamiltonian`.

    >>> from pennylane import numpy as np
    >>> coeffs = [2, 4, -1, 5, 2]
    >>> obs = [
    ...   qml.X(1),
    ...   qml.Z(1),
    ...   qml.X(0) @ qml.X(1),
    ...   qml.Y(0) @ qml.Y(1),
    ...   qml.Z(0) @ qml.Z(1)
    ... ]
    >>> H = qml.Hamiltonian(coeffs, obs)
    >>> dev = qml.device("default.qubit", wires=2, shots=100)
    >>> @qml.qnode(dev)
    >>> def cost(weights):
    ...     qml.StronglyEntanglingLayers(weights, wires=range(2))
    ...     return qml.expval(H)

    Once constructed, the cost function can be passed directly to the
    optimizer's ``step`` method. The attributes ``opt.shots_used`` and
    ``opt.total_shots_used`` can be used to track the number of shots per
    iteration, and across the life of the optimizer, respectively.

    >>> shape = qml.templates.StronglyEntanglingLayers.shape(n_layers=2, n_wires=2)
    >>> params = np.random.random(shape)
    >>> opt = qml.ShotAdaptiveOptimizer(min_shots=10, term_sampling="weighted_random_sampling")
    >>> for i in range(60):
    ...    params = opt.step(cost, params)
    ...    print(f"Step {i}: cost = {cost(params):.2f}, shots_used = {opt.total_shots_used}")
    Step 0: cost = -5.69, shots_used = 240
    Step 1: cost = -2.98, shots_used = 336
    Step 2: cost = -4.97, shots_used = 624
    Step 3: cost = -5.53, shots_used = 1054
    Step 4: cost = -6.50, shots_used = 1798
    Step 5: cost = -6.68, shots_used = 2942
    Step 6: cost = -6.99, shots_used = 4350
    Step 7: cost = -6.97, shots_used = 5814
    Step 8: cost = -7.00, shots_used = 7230
    Step 9: cost = -6.69, shots_used = 9006
    Step 10: cost = -6.85, shots_used = 11286
    Step 11: cost = -6.63, shots_used = 14934
    Step 12: cost = -6.86, shots_used = 17934
    Step 13: cost = -7.19, shots_used = 22950
    Step 14: cost = -6.99, shots_used = 28302
    Step 15: cost = -7.38, shots_used = 34134
    Step 16: cost = -7.66, shots_used = 41022
    Step 17: cost = -7.21, shots_used = 48918
    Step 18: cost = -7.53, shots_used = 56286
    Step 19: cost = -7.46, shots_used = 63822
    Step 20: cost = -7.31, shots_used = 72534
    Step 21: cost = -7.23, shots_used = 82014
    Step 22: cost = -7.31, shots_used = 92838

    .. details::
        :title: Usage Details

        The shot adaptive optimizer is based on the iCANS1 optimizer by
        `Kübler et al. (2020) <https://quantum-journal.org/papers/q-2020-05-11-263/>`__, and works
        as follows:

        1. The initial step of the optimizer is performed with some specified minimum
           number of shots, :math:`s_{min}`, for all partial derivatives.

        2. The parameter-shift rule is then used to estimate the gradient :math:`g_i` with :math:`s_i` shots
           for each parameter :math:`\theta_i`, parameters, as well as the variances
           :math:`v_i` of the estimated gradients.

        3. Gradient descent is performed for each parameter :math:`\theta_i`, using
           the pre-defined learning rate :math:`\eta` and the gradient information :math:`g_i`:
           :math:`\theta_i \rightarrow \theta_i - \eta g_i`.

        4. A maximum shot number is set by maximizing the improvement in the expected gain per shot.
           For a specific parameter value, the improvement in the expected gain per shot
           is then calculated via

           .. math::
               \gamma_i = \frac{1}{s_i} \left[ \left(\eta - \frac{1}{2} L\eta^2\right)
                           g_i^2 - \frac{L\eta^2}{2s_i}v_i \right],

           where:

           * :math:`L \leq \sum_i|c_i|` is the bound on the `Lipschitz constant
             <https://en.wikipedia.org/wiki/Lipschitz_continuity>`__ of the variational quantum algorithm objective function,

           * :math:`c_i` are the coefficients of the Hamiltonian, and

           * :math:`\eta` is the learning rate, and *must* be bound such that :math:`\eta < 2/L`
             for the above expression to hold.

        5. Finally, the new values of :math:`s_{i+1}` (shots for partial derivative of parameter
           :math:`\theta_i`) is given by:

           .. math::

               s_{i+1} = \frac{2L\eta}{2-L\eta}\left(\frac{v_i}{g_i^2}\right)\propto
                     \frac{v_i}{g_i^2}.

        In addition to the above, to counteract the presence of noise in the system, a
        running average of :math:`g_i` and :math:`s_i` (:math:`\chi_i` and :math:`\xi_i` respectively)
        are used when computing :math:`\gamma_i` and :math:`s_i`.

        For more details, see:

        * Andrew Arrasmith, Lukasz Cincio, Rolando D. Somma, and Patrick J. Coles. "Operator Sampling
          for Shot-frugal Optimization in Variational Algorithms." `arXiv:2004.06252
          <https://arxiv.org/abs/2004.06252>`__ (2020).

        * Jonas M. Kübler, Andrew Arrasmith, Lukasz Cincio, and Patrick J. Coles. "An Adaptive Optimizer
          for Measurement-Frugal Variational Algorithms." `Quantum 4, 263
          <https://quantum-journal.org/papers/q-2020-05-11-263/>`__ (2020).
    """

    def __init__(self, min_shots, term_sampling=None, mu=0.99, b=1e-6, stepsize=0.07):
        self.term_sampling = term_sampling
        self.trainable_args = set()

        # hyperparameters
        self.min_shots = min_shots
        self.mu = mu  # running average constant
        self.b = b  # regularization bias
        self.lipschitz = None

        self.shots_used = 0
        """int: number of shots used on the current iteration"""

        self.total_shots_used = 0
        """int: total number of shots used across all iterations"""

        # total number of iterations
        self.k = 0
        # Number of shots per parameter
        self.s = None
        # maximum number of shots required to evaluate across all parameters
        self.max_shots = None

        # Running average of the parameter gradients
        self.chi = None
        # Running average of the variance of the parameter gradients
        self.xi = None

        super().__init__(stepsize=stepsize)

[docs]    @staticmethod
    def qnode_weighted_random_sampling(qnode, coeffs, observables, shots, argnums, *args, **kwargs):
        """Returns an array of length ``shots`` containing single-shot estimates
        of the Hamiltonian gradient. The shots are distributed randomly over
        the terms in the Hamiltonian, as per a multinomial distribution.

        Args:
            qnode (.QNode): A QNode that returns the expectation value of a Hamiltonian.
            coeffs (List[float]): The coefficients of the Hamiltonian being measured
            observables (List[Observable]): The terms of the Hamiltonian being measured
            shots (int): The number of shots used to estimate the Hamiltonian expectation
                value. These shots are distributed over the terms in the Hamiltonian,
                as per a Multinomial distribution.
            argnums (Sequence[int]): the QNode argument indices which are trainable
            *args: Arguments to the QNode
            **kwargs: Keyword arguments to the QNode

        Returns:
            array[float]: the single-shot gradients of the Hamiltonian expectation value
        """
        qnode = copy(qnode)
        base_func = qnode.func

        # determine the shot probability per term
        prob_shots = np.abs(coeffs) / np.sum(np.abs(coeffs))

        # construct the multinomial distribution, and sample
        # from it to determine how many shots to apply per term
        si = multinomial(n=shots, p=prob_shots)
        shots_per_term = si.rvs()[0]

        grads = []

        for o, c, p, s in zip(observables, coeffs, prob_shots, shots_per_term):
            # if the number of shots is 0, do nothing
            if s == 0:
                continue

            def func(*qnode_args, **qnode_kwargs):
                qs = qml.tape.make_qscript(base_func)(*qnode_args, **qnode_kwargs)
                for op in qs.operations:
                    qml.apply(op)
                return qml.expval(o)  # pylint:disable=cell-var-from-loop

            qnode.func = func

            new_shots = 1 if s == 1 else [(1, int(s))]

            if s > 1:

                def cost(*args, **kwargs):
                    # pylint: disable=cell-var-from-loop
                    return qml.math.stack(qnode(*args, **kwargs))

            else:
                cost = qnode

            jacs = qml.jacobian(cost, argnum=argnums)(*args, **kwargs, shots=new_shots)

            if s == 1:
                jacs = [np.expand_dims(j, 0) for j in jacs]

            # Divide each term by the probability per shot. This is
            # because we are sampling one at a time.
            grads.append([(c * j / p) for j in jacs])

        return [np.concatenate(i) for i in zip(*grads)]

[docs]    @staticmethod
    def check_device(dev):
        r"""Verifies that the device used by the objective function is non-analytic.

        Args:
            dev (.Device): the device to verify

        Raises:
            ValueError: if the device is analytic
        """
        if not dev.shots:
            raise ValueError(
                "The Rosalin optimizer can only be used with devices "
                "that estimate expectation values with a finite number of shots."
            )

[docs]    def check_learning_rate(self, coeffs):
        r"""Verifies that the learning rate is less than 2 over the Lipschitz constant,
        where the Lipschitz constant is given by :math:`\sum |c_i|` for Hamiltonian
        coefficients :math:`c_i`.

        Args:
            coeffs (Sequence[float]): the coefficients of the terms in the Hamiltonian

        Raises:
            ValueError: if the learning rate is large than :math:`2/\sum |c_i|`
        """
        self.lipschitz = np.sum(np.abs(coeffs))

        if self.stepsize > 2 / self.lipschitz:
            raise ValueError(f"The learning rate must be less than {2 / self.lipschitz}")

    def _single_shot_qnode_gradients(self, qnode, args, kwargs):
        """Compute the single shot gradients of a QNode."""
        self.check_device(qnode.device)

        qnode.construct(args, kwargs)
        tape = qnode.tape
        [expval] = tape.measurements
        coeffs, observables = (
            expval.obs.terms() if isinstance(expval.obs, qml.Hamiltonian) else ([1.0], [expval.obs])
        )

        if self.lipschitz is None:
            self.check_learning_rate(coeffs)

        if self.term_sampling == "weighted_random_sampling":
            return self.qnode_weighted_random_sampling(
                qnode, coeffs, observables, self.max_shots, self.trainable_args, *args, **kwargs
            )
        if self.term_sampling is not None:
            raise ValueError(
                f"Unknown Hamiltonian term sampling method {self.term_sampling}. "
                "Only term_sampling='weighted_random_sampling' and "
                "term_sampling=None currently supported."
            )

        new_shots = [(1, int(self.max_shots))]

        def cost(*args, **kwargs):
            return qml.math.stack(qnode(*args, **kwargs, shots=new_shots))

        grads = [qml.jacobian(cost, argnum=i)(*args, **kwargs) for i in self.trainable_args]

        return grads

[docs]    def compute_grad(
        self, objective_fn, args, kwargs
    ):  # pylint: disable=signature-differs,arguments-differ,arguments-renamed
        r"""Compute gradient of the objective function, as well as the variance of the gradient,
        at the given point.

        Args:
            objective_fn (function): the objective function for optimization
            args: arguments to the objective function
            kwargs: keyword arguments to the objective function

        Returns:
            tuple[array[float], array[float]]: a tuple of NumPy arrays containing the gradient
            :math:`\nabla f(x^{(t)})` and the variance of the gradient
        """
        if isinstance(objective_fn, qml.QNode) or hasattr(objective_fn, "device"):
            grads = self._single_shot_qnode_gradients(objective_fn, args, kwargs)
        else:
            raise ValueError(
                "The objective function must be encoded as a single QNode object for the shot "
                "adaptive optimizer. "
            )

        # grads will have dimension [max(self.s), *params.shape]
        # For each parameter, we want to truncate the number of shots to self.s[idx],
        # and take the mean and the variance.
        gradients = []
        gradient_variances = []

        for i, grad in enumerate(grads):
            p_ind = np.ndindex(*grad.shape[1:])

            g = np.zeros_like(grad[0])
            s = np.zeros_like(grad[0])

            for idx in p_ind:
                grad_slice = grad[(slice(0, self.s[i][idx]),) + idx]
                g[idx] = np.mean(grad_slice)
                s[idx] = np.var(grad_slice, ddof=1)

            gradients.append(g)
            gradient_variances.append(s)

        return gradients, gradient_variances

[docs]    def step(self, objective_fn, *args, **kwargs):
        """Update trainable arguments with one step of the optimizer.

        Args:
            objective_fn (function): the objective function for optimization
            *args: variable length argument list for objective function
            **kwargs: variable length of keyword arguments for the objective function

        Returns:
            list[array]: The new variable values :math:`x^{(t+1)}`.
            If single arg is provided, list[array] is replaced by array.
        """
        self.trainable_args = set()

        for index, arg in enumerate(args):
            if getattr(arg, "requires_grad", False):
                self.trainable_args |= {index}

        if self.s is None:
            # Number of shots per parameter
            self.s = [
                np.zeros_like(a, dtype=np.int64) + self.min_shots
                for i, a in enumerate(args)
                if i in self.trainable_args
            ]

        # keep track of the number of shots run
        s = np.concatenate([i.flatten() for i in self.s])
        self.max_shots = max(s)
        self.shots_used = int(2 * np.sum(s))
        self.total_shots_used += self.shots_used

        # compute the gradient, as well as the variance in the gradient,
        # using the number of shots determined by the array s.
        grads, grad_variances = self.compute_grad(objective_fn, args, kwargs)
        new_args = self.apply_grad(grads, args)

        if self.xi is None:
            self.chi = [np.zeros_like(g, dtype=np.float64) for g in grads]
            self.xi = [np.zeros_like(g, dtype=np.float64) for g in grads]

        # running average of the gradient
        self.chi = [self.mu * c + (1 - self.mu) * g for c, g in zip(self.chi, grads)]

        # running average of the gradient variance
        self.xi = [self.mu * x + (1 - self.mu) * v for x, v in zip(self.xi, grad_variances)]

        for idx, (c, x) in enumerate(zip(self.chi, self.xi)):
            xi = x / (1 - self.mu ** (self.k + 1))
            chi = c / (1 - self.mu ** (self.k + 1))

            # determine the new optimum shots distribution for the next
            # iteration of the optimizer
            s = np.ceil(
                (2 * self.lipschitz * self.stepsize * xi)
                / ((2 - self.lipschitz * self.stepsize) * (chi**2 + self.b * (self.mu**self.k)))
            )

            # apply an upper and lower bound on the new shot distributions,
            # to avoid the number of shots reducing below min(2, min_shots),
            # or growing too significantly.
            gamma = (
                (self.stepsize - self.lipschitz * self.stepsize**2 / 2) * chi**2
                - xi * self.lipschitz * self.stepsize**2 / (2 * s)
            ) / s

            argmax_gamma = np.unravel_index(np.argmax(gamma), gamma.shape)
            smax = max(s[argmax_gamma], 2)

            self.s[idx] = np.int64(np.clip(s, max(2, self.min_shots), smax))

        self.k += 1

        # unwrap from list if one argument, cleaner return
        if len(new_args) == 1:
            return new_args[0]

        return new_args

[docs]    def step_and_cost(self, objective_fn, *args, **kwargs):
        """Update trainable arguments with one step of the optimizer and return the corresponding
        objective function value prior to the step.

        The objective function will be evaluated using the maximum number of shots
        across all parameters as determined by the optimizer during the
        optimization step.

        .. warning::

            Unlike other gradient descent optimizers, the objective function will be evaluated
            **separately** to the gradient computation, and will result in extra
            device evaluations.

        Args:
            objective_fn (function): the objective function for optimization
            *args : variable length argument list for objective function
            **kwargs : variable length of keyword arguments for the objective function

        Returns:
            tuple[list [array], float]: the new variable values :math:`x^{(t+1)}` and the objective
            function output prior to the step.
            If single arg is provided, list [array] is replaced by array.
        """
        new_args = self.step(objective_fn, *args, **kwargs)
        forward = objective_fn(*args, **kwargs, shots=int(self.max_shots))
        return new_args, forward
_modules/pennylane/optimize/shot_adaptive
Download Python script
Download Notebook
View on GitHub
Source code for pennylane.optimize.shot_adaptive

Contents

Downloads

Source code for pennylane.optimize.shot_adaptive

Contents

Downloads

Related