Support multi-argument functions with QNGOptimizer (#5926)

**Context:** The `QNGOptimizer` currently only works with single-argument functions, see #5902. **Description of the Change:** This PR updates `QNGOptimizer` to work with multi-argument functions. If multiple arguments are trainable, the metric tensor and gradients are processed individually per parameter. This excludes parameter correlations between different `QNode` arguments from consideration in the update. A note about this has been added to the docstring. **Benefits:** Better support of quantum-aware optimizers (`QNGOptimizer`). **Possible Drawbacks:** **Related GitHub Issues:** Fixes #5902 [sc-66929]
PennyLaneAI · Aug 5, 2024 · 1f3dbc7 · 1f3dbc7
1 parent af95d98
commit 1f3dbc7
Show file tree

Hide file tree

Showing 4 changed files with 109 additions and 47 deletions.
diff --git a/doc/releases/changelog-dev.md b/doc/releases/changelog-dev.md
@@ -43,6 +43,9 @@
 
 <h3>Improvements 🛠</h3>
 
+* `QNGOptimizer` now supports cost functions with multiple arguments, updating each argument independently.
+  [(#5926)](https://github.com/PennyLaneAI/pennylane/pull/5926)
+
 * `qml.for_loop` can now be captured into plxpr.
   [(#6041)](https://github.com/PennyLaneAI/pennylane/pull/6041)
 

diff --git a/pennylane/gradients/metric_tensor.py b/pennylane/gradients/metric_tensor.py
@@ -324,7 +324,7 @@ def circuit(weights):
 
             >>> dev = qml.device("default.qubit", wires=3)
             >>> @qml.qnode(dev, interface="autograd")
-            >>> def circuit(weights):  # , extra_weight):
+            >>> def circuit(weights):
             ...     qml.RX(weights[1], wires=0)
             ...     qml.RY(weights[0], wires=0)
             ...     qml.CNOT(wires=[0, 1])

diff --git a/pennylane/optimize/qng.py b/pennylane/optimize/qng.py
@@ -22,6 +22,15 @@
 from .gradient_descent import GradientDescentOptimizer
 
 
+def _reshape_and_regularize(tensor, lam):
+    shape = qml.math.shape(tensor)
+    size = 1 if shape == () else qml.math.prod(shape[: len(shape) // 2])
+    tensor = qml.math.reshape(tensor, (size, size))
+    # Add regularization
+    tensor += lam * qml.math.eye(size, like=tensor)
+    return tensor
+
+
 class QNGOptimizer(GradientDescentOptimizer):
     r"""Optimizer with adaptive learning rate, via calculation
     of the diagonal or block-diagonal approximation to the Fubini-Study metric tensor.
@@ -94,7 +103,7 @@ class QNGOptimizer(GradientDescentOptimizer):
     **Examples:**
 
     For VQE/VQE-like problems, the objective function for the optimizer can be
-    realized as :class:`~.QNode` that returns the expectation value of a Hamiltonian.
+    realized as a :class:`~.QNode` that returns the expectation value of a Hamiltonian.
 
     >>> dev = qml.device("default.qubit", wires=(0, 1, "aux"))
     >>> @qml.qnode(dev)
@@ -104,7 +113,7 @@ class QNGOptimizer(GradientDescentOptimizer):
     ...     return qml.expval(qml.X(0) + qml.X(1))
 
     Once constructed, the cost function can be passed directly to the
-    optimizer's ``step`` function:
+    optimizer's :meth:`~.step` function:
 
     >>> eta = 0.01
     >>> init_params = np.array([0.011, 0.012])
@@ -113,19 +122,26 @@ class QNGOptimizer(GradientDescentOptimizer):
     >>> theta_new
     tensor([ 0.01100528, -0.02799954], requires_grad=True)
 
-    An alternative function to calculate the metric tensor of the QNode
-    can be provided to :meth:`~.step`
-    via the ``metric_tensor_fn`` keyword argument.  For example, we can provide a function
+    An alternative function to calculate the metric tensor of the QNode can be provided to ``step``
+    via the ``metric_tensor_fn`` keyword argument. For example, we can provide a function
     to calculate the metric tensor via the adjoint method.
 
     >>> adj_metric_tensor = qml.adjoint_metric_tensor(circuit, circuit.device)
     >>> opt.step(circuit, init_params, metric_tensor_fn=adj_metric_tensor)
     tensor([ 0.01100528, -0.02799954], requires_grad=True)
 
+    .. note::
+
+        If the objective function takes multiple trainable arguments, ``QNGOptimizer`` applies the
+        metric tensor for each argument individually. This means that "correlations" between
+        parameters from different arguments are not taken into account. In order to take all
+        correlations into account within the optimization, consider combining all parameters into
+        one objective function argument.
+
     .. seealso::
 
         See the :doc:`quantum natural gradient example <demo:demos/tutorial_quantum_natural_gradient>`
-        for more details on Fubini-Study metric tensor and this optimization class.
+        for more details on the Fubini-Study metric tensor and this optimization class.
 
     Keyword Args:
         stepsize=0.01 (float): the user-defined hyperparameter :math:`\eta`
@@ -190,41 +206,22 @@ def step_and_cost(
             if metric_tensor_fn is None:
                 metric_tensor_fn = qml.metric_tensor(qnode, approx=self.approx)
 
-            _metric_tensor = metric_tensor_fn(*args, **kwargs)
-            # Reshape metric tensor to be square
-            shape = qml.math.shape(_metric_tensor)
-            size = qml.math.prod(shape[: len(shape) // 2])
-            self.metric_tensor = qml.math.reshape(_metric_tensor, (size, size))
-            # Add regularization
-            self.metric_tensor = self.metric_tensor + self.lam * qml.math.eye(
-                size, like=_metric_tensor
-            )
+            mt = metric_tensor_fn(*args, **kwargs)
+            if isinstance(mt, tuple):
+                self.metric_tensor = tuple(_reshape_and_regularize(_mt, self.lam) for _mt in mt)
+            else:
+                self.metric_tensor = _reshape_and_regularize(mt, self.lam)
 
         g, forward = self.compute_grad(qnode, args, kwargs, grad_fn=grad_fn)
-        new_args = pnp.array(self.apply_grad(g, args), requires_grad=True)
+        new_args = self.apply_grad(g, args)
 
         if forward is None:
             forward = qnode(*args, **kwargs)
 
-        # Note: for now, we only have single element lists as the new
-        # arguments, but this might change, see TODO below.
-        # Once the other approach is implemented, we need to unwrap from list
-        # if one argument for a cleaner return.
-        # if len(new_args) == 1:
-        return new_args[0], forward
-
-        # TODO: The scenario of the following return statement is not implemented
-        # yet, as currently only a single metric tensor can be processed.
-        # An optimizer refactor is needed to accomodate for this (similar to other
-        # optimizers for which `apply_grad` will have to be patched to allow for
-        # tuple-valued gradients to be processed)
-        #
-        # For multiple QNode arguments, `qml.jacobian` and `qml.metric_tensor`
-        # return a tuple of arrays. Each of the gradient arrays has to be processed
-        # together with the corresponding array in the metric tensor tuple.
-        # This requires modifications of the `GradientDescentOptimizer` base class
-        # as none of the optimizers accomodate for this use case.
-        # return new_args, forward
+        if len(new_args) == 1:
+            new_args = new_args[0]
+
+        return new_args, forward
 
     # pylint: disable=arguments-differ
     def step(
@@ -273,7 +270,17 @@ def apply_grad(self, grad, args):
         Returns:
             array: the new values :math:`x^{(t+1)}`
         """
-        grad_flat = pnp.array(list(_flatten(grad)))
-        x_flat = pnp.array(list(_flatten(args)))
-        x_new_flat = x_flat - self.stepsize * pnp.linalg.solve(self.metric_tensor, grad_flat)
-        return unflatten(x_new_flat, args)
+        args_new = list(args)
+        mt = self.metric_tensor if isinstance(self.metric_tensor, tuple) else (self.metric_tensor,)
+
+        trained_index = 0
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", False):
+                grad_flat = pnp.array(list(_flatten(grad[trained_index])))
+                # self.metric_tensor has already been reshaped to 2D, matching flat gradient.
+                update = pnp.linalg.solve(mt[trained_index], grad_flat)
+                args_new[index] = arg - self.stepsize * unflatten(update, grad[trained_index])
+
+                trained_index += 1
+
+        return tuple(args_new)
diff --git a/tests/optimize/test_qng.py b/tests/optimize/test_qng.py
@@ -159,7 +159,6 @@ def grad_fn2(param):
         assert np.isclose(cost1, expected_cost)
         assert np.isclose(cost2, expected_cost)
 
-    @pytest.mark.skip("QNGOptimizer is not yet implemented for split inputs.")
     def test_step_and_cost_with_grad_fn_split_input(self):
         """Test that the correct cost and update is returned via the step_and_cost
         method for the QNG optimizer when providing an explicit grad_fn.
@@ -194,6 +193,59 @@ def grad_fn2(params_0, params_1):
         assert np.isclose(cost1, expected_cost)
         assert np.isclose(cost2, expected_cost)
 
+    @pytest.mark.parametrize("trainable_idx", [0, 1])
+    def test_step_and_cost_split_input_one_trainable(self, trainable_idx):
+        """Test that the correct cost and update is returned via the step_and_cost
+        method for the QNG optimizer when providing an explicit grad_fn or not.
+        Using a circuit with multiple inputs, one of which is trainable."""
+
+        dev = qml.device("default.qubit")
+
+        @qml.qnode(dev)
+        def circuit(x, y):
+            """A cost function with two arguments."""
+            qml.RX(x, 0)
+            qml.RY(-y, 0)
+            return qml.expval(qml.Z(0))
+
+        grad_fn = qml.grad(circuit)
+        mt_fn = qml.metric_tensor(circuit)
+
+        params = np.array(0.2, requires_grad=False), np.array(-0.8, requires_grad=False)
+        params[trainable_idx].requires_grad = True
+        opt = qml.QNGOptimizer(stepsize=0.01)
+
+        # Without manually provided functions
+        step1, cost1 = opt.step_and_cost(circuit, *params)
+        step2 = opt.step(circuit, *params)
+
+        # With modified autograd gradient function
+        fake_grad_fn = lambda *args, **kwargs: grad_fn(*args, **kwargs) * 2
+        step3, cost2 = opt.step_and_cost(circuit, *params, grad_fn=fake_grad_fn)
+        step4 = opt.step(circuit, *params, grad_fn=fake_grad_fn)
+
+        # With modified metric tensor function
+        fake_mt_fn = lambda *args, **kwargs: mt_fn(*args, **kwargs) * 4
+        step5 = opt.step(circuit, *params, metric_tensor_fn=fake_mt_fn)
+
+        # Expectations
+        if trainable_idx == 1:
+            mt_inv = 1 / (np.cos(2 * params[0]) + 1) * 8
+        else:
+            mt_inv = 4
+        exact_update = -opt.stepsize * grad_fn(*params) * mt_inv
+        factors = [1.0, 1.0, 2.0, 2.0, 0.25]
+        expected_cost = circuit(*params)
+
+        for factor, step in zip(factors, [step1, step2, step3, step4, step5]):
+            expected_step = tuple(
+                par + exact_update * factor if i == trainable_idx else par
+                for i, par in enumerate(params)
+            )
+            assert np.allclose(step, expected_step)
+        assert np.isclose(cost1, expected_cost)
+        assert np.isclose(cost2, expected_cost)
+
     @pytest.mark.slow
     def test_qubit_rotation(self, tol):
         """Test qubit rotation has the correct QNG value
@@ -238,7 +290,6 @@ def gradient(params):
         # check final cost
         assert np.allclose(circuit(theta), -0.9963791, atol=tol, rtol=0)
 
-    @pytest.mark.skip("QNGOptimizer is not yet implemented for split inputs.")
     def test_single_qubit_vqe_using_expval_h_multiple_input_params(self, tol, recwarn):
         """Test single-qubit VQE by returning qml.expval(H) in the QNode and
         check for the correct QNG value every step, the correct parameter updates, and
@@ -277,13 +328,14 @@ def gradient(params):
 
             # check metric tensor
             res = opt.metric_tensor
-            exp = np.diag([0.25, (np.cos(x) ** 2) / 4])
-            assert np.allclose(res, exp, atol=0.00001, rtol=0)
+            exp = (np.array([[0.25]]), np.array([[(np.cos(2 * theta[0]) + 1) / 8]]))
+            assert np.allclose(res, exp)
 
             # check parameter update
-            theta_new = np.array([x, y])
-            dtheta = eta * sp.linalg.pinvh(exp) @ gradient(theta)
-            assert np.allclose(dtheta, theta - theta_new, atol=0.000001, rtol=0)
+            theta_new = (x, y)
+            grad = gradient(theta)
+            dtheta = tuple(eta * g / e[0, 0] for e, g in zip(exp, grad))
+            assert np.allclose(dtheta, theta - theta_new)
 
         # check final cost
         assert np.allclose(circuit(x, y), -1.41421356, atol=tol, rtol=0)