@@ -24,9 +24,9 @@ http://cs231n.github.io/neural-networks-3/#ada 用于维持数值稳定性,避
2424 - **name ** - 名称前缀(可选)
2525 - **initial_accumulator_value ** (float) - moment累加器的初始值。
2626
27- **代码示例 **:
27+ **代码示例 **
2828
29- .. code-block :: python:
29+ .. code-block :: python
3030
3131 import paddle.fluid as fluid
3232 import numpy as np
@@ -45,6 +45,161 @@ http://cs231n.github.io/neural-networks-3/#ada 用于维持数值稳定性,避
4545 feed = {" inp" : np_inp},
4646 fetch_list = [out.name])
4747
48+ .. py :method :: apply_gradients(params_grads)
49+
50+ 为给定的params_grads对附加优化算子,为minimize过程的第二步
51+
52+ 参数:
53+ - **params_grads ** (list)- 用于优化的(param, grad)对组成的列表
54+
55+ 返回: 附加在当前Program的算子组成的列表
56+
57+ 返回类型: list
58+
59+ **代码示例 **
60+
61+ .. code-block :: python
62+
63+ import paddle.fluid as fluid
64+ loss = network()
65+ optimizer = fluid.optimizer.SGD(learning_rate = 0.1 )
66+ params_grads = optimizer.backward(loss)
67+ # you may append operations for params_grads here
68+ # ...
69+ optimizer.apply_gradients(params_grads)
70+
71+
72+ .. py :method :: apply_optimize(loss, startup_program, params_grads)
73+
74+ 为给定的params_grads对附加优化算子,为minimize过程的第二步。
75+
76+ 参数:
77+ - **loss ** (Variable) – 用于优化过程的损失值变量
78+ - **startup_program ** (Program) – 用于初始化在parameter_list中参数的startup_program
79+ - **params_grads ** (list)- 用于优化的(param, grad)对组成的列表
80+
81+ 返回: 附加在当前Program的算子组成的列表
82+
83+ 返回类型: list
84+
85+ .. py :method :: backward(loss, startup_program = None , parameter_list = None , no_grad_set = None , callbacks = None )
86+
87+ 自动做diff来向当前program附加反向算子,为minimize过程的第一步。
88+
89+ 参数:
90+ - **loss ** (Variable) – 用于优化过程的损失值变量
91+ - **startup_program ** (Program) – 用于初始化在parameter_list中参数的startup_program
92+ - **parameter_list ** (list) – 待更新的Variables组成的列表
93+ - **no_grad_set ** (set|None) – 应该被无视的Variables集合
94+ - **callbacks ** (list|None) – 当为某参数附加反向算子时所要运行的callables组成的列表
95+
96+ 返回: 附加在当前Program的算子组成的列表
97+
98+ 返回类型: list
99+
100+ **代码示例 **
101+
102+ 详见apply_gradients的示例
103+
104+
105+ .. py :method :: load(stat_dict)
106+
107+ 在dygraph模式下,附带学习率衰减来加载优化器。
108+
109+ 参数:
110+ - **stat_dict ** – load_persistable方法加载的dict
111+
112+ **代码示例 **
113+
114+ .. code-block :: python
115+
116+ from __future__ import print_function
117+ import numpy as np
118+ import paddle
119+ import paddle.fluid as fluid
120+ from paddle.fluid.optimizer import SGDOptimizer
121+ from paddle.fluid.dygraph.nn import FC
122+ from paddle.fluid.dygraph.base import to_variable
123+
124+ class MLP (fluid .Layer ):
125+ def __init__ (self , name_scope ):
126+ super (MLP , self ).__init__ (name_scope)
127+
128+ self ._fc1 = FC(self .full_name(), 10 )
129+ self ._fc2 = FC(self .full_name(), 10 )
130+
131+ def forward (self , inputs ):
132+ y = self ._fc1(inputs)
133+ y = self ._fc2(y)
134+ return y
135+
136+ with fluid.dygraph.guard():
137+ mlp = MLP(' mlp' )
138+ optimizer2 = SGDOptimizer(
139+ learning_rate = fluid.layers.natural_exp_decay(
140+ learning_rate = 0.1 ,
141+ decay_steps = 10000 ,
142+ decay_rate = 0.5 ,
143+ staircase = True ))
144+
145+ train_reader = paddle.batch(
146+ paddle.dataset.mnist.train(), batch_size = 128 , drop_last = True )
147+
148+ for batch_id, data in enumerate (train_reader()):
149+ dy_x_data = np.array(
150+ [x[0 ].reshape(1 , 28 , 28 ) for x in data]).astype(' float32' )
151+
152+ y_data = np.array([x[1 ] for x in data]).astype(' int64' ).reshape(
153+ 128 , 1 )
154+
155+ img = to_variable(dy_x_data)
156+ label = to_variable(y_data)
157+ label._stop_gradient = True
158+ cost = mlp(img)
159+ avg_loss = fluid.layers.reduce_mean(cost)
160+ avg_loss.backward()
161+ optimizer.minimize(avg_loss)
162+ mlp.clear_gradients()
163+ fluid.dygraph.save_persistables(
164+ mlp.state_dict(), [optimizer, optimizer2], " save_dir_2" )
165+ if batch_id == 2 :
166+ break
167+
168+ with fluid.dygraph.guard():
169+ mlp_load = MLP(' mlp' )
170+ optimizer_load2 = SGDOptimizer(
171+ learning_rate = fluid.layers.natural_exp_decay(
172+ learning_rate = 0.1 ,
173+ decay_steps = 10000 ,
174+ decay_rate = 0.5 ,
175+ staircase = True ))
176+ parameters, optimizers = fluid.dygraph.load_persistables(
177+ " save_dir_2" )
178+ mlp_load.load_dict(parameters)
179+ optimizer_load2.load(optimizers)
180+ self .assertTrue(optimizer2._learning_rate.__dict__ == optimizer_load2._learning_rate.__dict__ )
181+
182+
183+ .. py :method :: minimize(loss, startup_program = None , parameter_list = None , no_grad_set = None , grad_clip = None )
184+
185+
186+ 通过更新parameter_list来添加操作,进而使损失最小化。
187+
188+ 该算子相当于backward()和apply_gradients()功能的合体。
189+
190+ 参数:
191+ - **loss ** (Variable) – 用于优化过程的损失值变量
192+ - **startup_program ** (Program) – 用于初始化在parameter_list中参数的startup_program
193+ - **parameter_list ** (list) – 待更新的Variables组成的列表
194+ - **no_grad_set ** (set|None) – 应该被无视的Variables集合
195+ - **grad_clip ** (GradClipBase|None) – 梯度裁剪的策略
196+
197+ 返回: (optimize_ops, params_grads),分别为附加的算子列表;一个由(param, grad) 变量对组成的列表,用于优化
198+
199+ 返回类型: tuple
200+
201+
202+
48203
49204
50205
0 commit comments