# 实现高阶自动微分¶

CPU GPU Ascend 全流程 初级 中级 高级

## 一阶求导¶

### 输入求导¶

import numpy as np
import mindspore.context as context
import mindspore.nn as nn
import mindspore.ops as ops
from mindspore import Tensor
from mindspore import ParameterTuple, Parameter
from mindspore import dtype as mstype
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.matmul = ops.MatMul()
self.z = Parameter(Tensor(np.array([1.0], np.float32)), name='z')
def construct(self, x, y):
x = x * self.z
out = self.matmul(x, y)
return out

def __init__(self, net):
self.net = net
def construct(self, x, y):

x = Tensor([[0.8, 0.6, 0.2], [1.8, 1.3, 1.1]], dtype=mstype.float32)
y = Tensor([[0.11, 3.3, 1.1], [1.1, 0.2, 1.4], [1.1, 2.2, 0.3]], dtype=mstype.float32)
output = GradNetWrtX(Net())(x, y)
print(output)


[[4.5099998 2.7 3.6000001]
[4.5099998 2.7 3.6000001]]


x = Tensor([[x1, x2, x3], [x4, x5, x6]])
y = Tensor([[y1, y2, y3], [y4, y5, y6], [y7, y8, y9]])
z = Tensor([z])


$output = [[(x1 \cdot y1 + x2 \cdot y4 + x3 \cdot y7) \cdot z, (x1 \cdot y2 + x2 \cdot y5 + x3 \cdot y8) \cdot z, (x1 \cdot y3 + x2 \cdot y6 + x3 \cdot y9) \cdot z]$,

$[(x4 \cdot y1 + x5 \cdot y4 + x6 \cdot y7) \cdot z, (x4 \cdot y2 + x5 \cdot y5 + x6 \cdot y8) \cdot z, (x4 \cdot y3 + x5 \cdot y6 + x6 \cdot y9) \cdot z]]$

(1) 求和公式：

$\sum{output} = [(x1 \cdot y1 + x2 \cdot y4 + x3 \cdot y7) + (x1 \cdot y2 + x2 \cdot y5 + x3 \cdot y8) + (x1 \cdot y3 + x2 \cdot y6 + x3 \cdot y9) +$

$(x4 \cdot y1 + x5 \cdot y4 + x6 \cdot y7) + (x4 \cdot y2 + x5 \cdot y5 + x6 \cdot y8) + (x4 \cdot y3 + x5 \cdot y6 + x6 \cdot y9)] \cdot z$

(2) 求导公式：

$\frac{\mathrm{d}(\sum{output})}{\mathrm{d}x} = [[(y1 + y2 + y3) \cdot z，(y4 + y5 + y6) \cdot z，(y7 + y8 + y9) \cdot z]，[(y1 + y2 + y3) \cdot z，(y4 + y5 + y6) \cdot z，(y7 + y8 + y9) \cdot z]]$

(3) 计算结果：

$\frac{\mathrm{d}(\sum{output})}{\mathrm{d}x} = [[4.5099998 \quad 2.7 \quad 3.6000001] [4.5099998 \quad 2.7 \quad 3.6000001]]$

### 权重求导¶

class GradNetWrtX(nn.Cell):
def __init__(self, net):
self.net = net
self.params = ParameterTuple(net.trainable_params())
def construct(self, x, y):

output = GradNetWrtX(Net())(x, y)
print(output)


(Tensor(shape=[1], dtype=Float32, value= [ 2.15359993e+01]),)


$\frac{\mathrm{d}(\sum{output})}{\mathrm{d}z} = (x1 \cdot y1 + x2 \cdot y4 + x3 \cdot y7) + (x1 \cdot y2 + x2 \cdot y5 + x3 \cdot y8) + (x1 \cdot y3 + x2 \cdot y6 + x3 \cdot y9) +$

$(x4 \cdot y1 + x5 \cdot y4 + x6 \cdot y7) + (x4 \cdot y2 + x5 \cdot y5 + x6 \cdot y8) + (x4 \cdot y3 + x5 \cdot y6 + x6 \cdot y9)$

$\frac{\mathrm{d}(\sum{output})}{\mathrm{d}z} = [2.15359993e+01]$

### 梯度值缩放¶

class GradNetWrtX(nn.Cell):
def __init__(self, net):
self.net = net
self.grad_wrt_output = Tensor([[0.1, 0.6, 0.2], [0.8, 1.3, 1.1]], dtype=mstype.float32)
def construct(self, x, y):

output = GradNetWrtX(Net())(x, y)
print(output)


[[2.211 0.51 1.49 ]
[5.588 2.68 4.07 ]]


self.grad_wrt_output可以记作如下形式：

self.grad_wrt_output = Tensor([[s1, s2, s3], [s4, s5, s6]])


$output = [[(x1 \cdot y1 + x2 \cdot y4 + x3 \cdot y7) \cdot z \cdot s1，(x1 \cdot y2 + x2 \cdot y5 + x3 \cdot y8) \cdot z \cdot s2，(x1 \cdot y3 + x2 \cdot y6 + x3 \cdot y9) \cdot z \cdot s3]，$

$[(x4 \cdot y1 + x5 \cdot y4 + x6 \cdot y7) \cdot z \cdot s4，(x4 \cdot y2 + x5 \cdot y5 + x6 \cdot y8) \cdot z \cdot s5，(x4 \cdot y3 + x5 \cdot y6 + x6 \cdot y9) \cdot z \cdot s6]]$

$\frac{\mathrm{d}(\sum{output})}{\mathrm{d}x} = [[(s1 \cdot y1 + s2 \cdot y2 + s3 \cdot y3) \cdot z，(s1 \cdot y4 + s2 \cdot y5 + s3 \cdot y6) \cdot z，(s1 \cdot y7 + s2 \cdot y8 + s3 \cdot y9) \cdot z]，$

$[(s4 \cdot y1 + s5 \cdot y2 + s6 \cdot y3) \cdot z，(s4 \cdot y4 + s5 \cdot y5 + s6 \cdot y6) \cdot z，(s4 \cdot y7 + s5 \cdot y8 + s6 \cdot y9) \cdot z]]$

class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.matmul = ops.MatMul()
self.z = Parameter(Tensor(np.array([1.0], np.float32)), name='z')
def construct(self, x, y):
x = x * self.z
out = self.matmul(x, y)
return out[0][0]

output = GradNetWrtX(Net())(x, y)
print(output)


[[0.11 1.1 1.1]
[0.   0.  0. ]]


## 高阶求导¶

MindSpore可通过多次求导的方式支持高阶导数，下面通过几类例子展开阐述。

### 单输入单输出高阶导数¶

import numpy as np
import mindspore.context as context
import mindspore.nn as nn
import mindspore.ops as ops
from mindspore import Tensor
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")

class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.sin = ops.Sin()
def construct(self, x):
out = self.sin(x)
return out

def __init__(self, network):
self.network = network
def construct(self, x):
return gout
def __init__(self, network):
self.network = network
def construct(self, x):
return gout

net=Net()
x_train = Tensor(np.array([1.0], dtype=np.float32))
print(output)


[-0.841471]


### 单输入多输出高阶导数¶

import mindspore.context as context
import mindspore.nn as nn
import mindspore.ops as ops
from mindspore import Tensor
from mindspore import dtype as mstype
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")

class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.mul = ops.Mul()
def construct(self, x):
out = self.mul(x, x)
return out

def __init__(self, network):
self.network = network
def construct(self, x):
return gout
def __init__(self, network):
self.network = network
def construct(self, x):
return gout

net=Net()
x = Tensor([0.1, 0.2, 0.3], dtype=mstype.float32)
print(output)


[2. 2. 2.]


### 多输入多输出高阶导数¶

import numpy as np
import mindspore.context as context
import mindspore.nn as nn
import mindspore.ops as ops
from mindspore import Tensor
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")

class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.mul = ops.Mul()

def construct(self, x, y):
x_square = self.mul(x, x)
x_square_y = self.mul(x_square, y)
return x_square_y

def __init__(self, network):
self.network = network
def construct(self, x, y):
gout = self.grad(self.network)(x, y) # return dx, dy
return gout

def __init__(self, network):
self.network = network
self.sens1 = Tensor(np.array([1]).astype('float32'))
self.sens2 = Tensor(np.array([0]).astype('float32'))
def construct(self, x, y):
dxdx, dxdy = self.grad(self.network)(x, y, (self.sens1,self.sens2))
dydx, dydy = self.grad(self.network)(x, y, (self.sens2,self.sens1))
return dxdx, dxdy, dydx, dydy

net = Net()
x_train = Tensor(np.array([4],dtype=np.float32))
y_train = Tensor(np.array([5],dtype=np.float32))
dxdx, dxdy, dydx, dydy = secondgrad(x_train, y_train)
print(dxdx, dxdy, dydx, dydy)


[10] [8.] [8.] [0.]


## 二阶微分算子支持情况¶

CPU支持算子：SquareExpNegMulMatMul

GPU支持算子：PowLogSquareExpNegMulDivMatMulSinCosTanAtanh

Ascend支持算子：PowLogSquareExpNegMulDivMatMulSinCosTanSinhCoshAtanh

## 引用¶

[1] Zhang L, Han J, Wang H, et al. Deep potential molecular dynamics: a scalable model with the accuracy of quantum mechanics[J]. Physical review letters, 2018, 120(14): 143001.

[2] Raissi M, Perdikaris P, Karniadakis G E. Physics informed deep learning (part i): Data-driven solutions of nonlinear partial differential equations[J]. arXiv preprint arXiv:1711.10561, 2017.

[3] Baydin A G, Pearlmutter B A, Radul A A, et al. Automatic differentiation in machine learning: a survey[J]. The Journal of Machine Learning Research, 2017, 18(1): 5595-5637.