Skip to content

Commit 03c1098

Browse files
Merge pull request #1065 from tensorlayer/reinforcement-learning
Reinforcement learning examples update
2 parents 76c07a1 + c628d24 commit 03c1098

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+3510
-9222
lines changed

docker/pypi_list.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,7 @@
3232
logger.debug("prerelease: %s" % args.prerelease)
3333
logger.debug("debug: %s" % args.debug)
3434

35-
finder = pip._internal.index.PackageFinder(
36-
[],
37-
['https://pypi.python.org/simple'],
38-
session=requests.Session()
39-
)
35+
finder = pip._internal.index.PackageFinder([], ['https://pypi.python.org/simple'], session=requests.Session())
4036
results = finder.find_all_candidates(args.package)
4137
tmp_versions = [str(p.version) for p in results]
4238

docker/version_prefix.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,7 @@
66
parser = argparse.ArgumentParser(description='Determine the version prefix to apply depending on the version name')
77

88
parser.add_argument(
9-
'--version',
10-
type=str,
11-
required=True,
12-
help='The Package Version to be installed in the container'
9+
'--version', type=str, required=True, help='The Package Version to be installed in the container'
1310
)
1411

1512
parser.add_argument('--debug', help='Print debug information', action='store_true')

docs/conf.py

+19-26
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#
2020
import os, sys, datetime
2121
sys.path.insert(0, os.path.abspath("../")) # Important
22-
sys.path.insert(0, os.path.abspath(os.path.join("..", "tensorlayer"))) # Important
22+
sys.path.insert(0, os.path.abspath(os.path.join("..", "tensorlayer"))) # Important
2323

2424
from package_info import __shortversion__
2525
from package_info import __version__
@@ -159,7 +159,6 @@
159159
# If true, `todo` and `todoList` produce output, else they produce nothing.
160160
todo_include_todos = False
161161

162-
163162
# -- Options for HTML output ----------------------------------------------
164163

165164
# The theme to use for HTML and HTML Help pages. See the documentation for
@@ -284,29 +283,28 @@
284283
# -- Options for LaTeX output ---------------------------------------------
285284

286285
latex_elements = {
287-
# The paper size ('letterpaper' or 'a4paper').
288-
#
289-
# 'papersize': 'letterpaper',
286+
# The paper size ('letterpaper' or 'a4paper').
287+
#
288+
# 'papersize': 'letterpaper',
290289

291-
# The font size ('10pt', '11pt' or '12pt').
292-
#
293-
# 'pointsize': '10pt',
290+
# The font size ('10pt', '11pt' or '12pt').
291+
#
292+
# 'pointsize': '10pt',
294293

295-
# Additional stuff for the LaTeX preamble.
296-
#
297-
# 'preamble': '',
294+
# Additional stuff for the LaTeX preamble.
295+
#
296+
# 'preamble': '',
298297

299-
# Latex figure (float) alignment
300-
#
301-
# 'figure_align': 'htbp',
298+
# Latex figure (float) alignment
299+
#
300+
# 'figure_align': 'htbp',
302301
}
303302

304303
# Grouping the document tree into LaTeX files. List of tuples
305304
# (source start file, target name, title,
306305
# author, documentclass [howto, manual, or own class]).
307306
latex_documents = [
308-
(master_doc, 'TensorLayer.tex', 'TensorLayer Documentation',
309-
'TensorLayer contributors', 'manual'),
307+
(master_doc, 'TensorLayer.tex', 'TensorLayer Documentation', 'TensorLayer contributors', 'manual'),
310308
]
311309

312310
# The name of an image file (relative to this directory) to place at the top of
@@ -335,30 +333,26 @@
335333
#
336334
# latex_domain_indices = True
337335

338-
339336
# -- Options for manual page output ---------------------------------------
340337

341338
# One entry per manual page. List of tuples
342339
# (source start file, name, description, authors, manual section).
343-
man_pages = [
344-
(master_doc, 'tensorlayer', 'TensorLayer Documentation',
345-
[author], 1)
346-
]
340+
man_pages = [(master_doc, 'tensorlayer', 'TensorLayer Documentation', [author], 1)]
347341

348342
# If true, show URL addresses after external links.
349343
#
350344
# man_show_urls = False
351345

352-
353346
# -- Options for Texinfo output -------------------------------------------
354347

355348
# Grouping the document tree into Texinfo files. List of tuples
356349
# (source start file, target name, title, author,
357350
# dir menu entry, description, category)
358351
texinfo_documents = [
359-
(master_doc, 'TensorLayer', 'TensorLayer Documentation',
360-
author, 'TensorLayer', 'Deep learning and Reinforcement learning library for Researchers and Engineers.',
361-
'Miscellaneous'),
352+
(
353+
master_doc, 'TensorLayer', 'TensorLayer Documentation', author, 'TensorLayer',
354+
'Deep learning and Reinforcement learning library for Researchers and Engineers.', 'Miscellaneous'
355+
),
362356
]
363357

364358
# Documents to append as an appendix to all manuals.
@@ -377,7 +371,6 @@
377371
#
378372
# texinfo_no_detailmenu = False
379373

380-
381374
# -- Options for Epub output ----------------------------------------------
382375

383376
# Bibliographic Dublin Core info.

examples/data_process/tutorial_fast_affine_transform.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
import multiprocessing
99
import time
1010

11+
import cv2
1112
import numpy as np
1213
import tensorflow as tf
1314

14-
import cv2
1515
import tensorlayer as tl
1616

1717
# tl.logging.set_verbosity(tl.logging.DEBUG)

examples/data_process/tutorial_tfrecord3.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,8 @@ def distort_image(image, thread_id):
231231

232232

233233
def prefetch_input_data(
234-
reader, file_pattern, is_training, batch_size, values_per_shard, input_queue_capacity_factor=16,
235-
num_reader_threads=1, shard_queue_name="filename_queue", value_queue_name="input_queue"
234+
reader, file_pattern, is_training, batch_size, values_per_shard, input_queue_capacity_factor=16,
235+
num_reader_threads=1, shard_queue_name="filename_queue", value_queue_name="input_queue"
236236
):
237237
"""Prefetches string values from disk into an input queue.
238238
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
model/
2+
image/

examples/reinforcement_learning/README.md

+12-7
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
<br/>
2121
-->
2222

23-
This repository contains the implementation of most popular reinforcement learning algorithms with Tensorlayer 2.0, supporting [Tensorflow 2.0](https://www.tensorflow.org/alpha/guide/effective_tf2). We aim to make the reinforcement learning tutorial for each algorithm simple and straight-forward to use, as this would not only benefit new learners of reinforcement learning but also provide convenience for senior researchers to testify their new ideas quickly. In addition to this project, we also released a [RL zoo](https://github.com/tensorlayer/RLzoo) for industrial users.
23+
This repository contains implementation of most popular reinforcement learning algorithms with Tensorlayer 2.0, supporting [Tensorflow 2.0](https://www.tensorflow.org/alpha/guide/effective_tf2). We aim to make the reinforcement learning tutorial for each algorithm simple and straight-forward to use, as this would not only benefits new learners of reinforcement learning, but also provide convenience for senior researchers to testify their new ideas quickly.
2424

2525
## Prerequisites:
2626

@@ -46,6 +46,8 @@ For each tutorial, open a terminal and run:
4646

4747
The tutorial algorithms follow the same basic structure, as shown in file: [`./tutorial_format.py`](https://github.com/tensorlayer/tensorlayer/blob/reinforcement-learning/examples/reinforcement_learning/tutorial_format.py)
4848

49+
The pretrained models and learning curves for each algorithm are stored [here](https://github.com/tensorlayer/pretrained-models). You can download the models and load the weights in the policies for tests.
50+
4951
## Table of Contents:
5052
### value-based
5153
| Algorithms | Action Space | Tutorial Env | Papers |
@@ -123,18 +125,19 @@ The tutorial algorithms follow the same basic structure, as shown in file: [`./t
123125

124126
```
125127
We implement Double DQN, Dueling DQN and Noisy DQN here.
126-
128+
127129
-The max operator in standard DQN uses the same values both to select and to evaluate an action by:
128-
130+
129131
Q(s_t, a_t) = R\_{t+1\} + gamma \* max\_{a}Q\_\{target\}(s_{t+1}, a).
130-
132+
131133
-Double DQN proposes to use following evaluation to address overestimation problem of max operator:
132-
134+
133135
Q(s_t, a_t) = R\_{t+1\} + gamma \* Q\_{target}(s\_\{t+1\}, max{a}Q(s_{t+1}, a)).
134-
136+
135137
-Dueling DQN uses dueling architecture where the value of state and the advantage of each action is estimated separately.
136-
138+
137139
-Noisy DQN propose to explore by adding parameter noises.
140+
```
138141

139142

140143
```
@@ -339,3 +342,5 @@ Our env wrapper: `./tutorial_wrappers.py`
339342
- @Tokarev-TT-33 Tianyang Yu @initial-h Hongming Zhang : PG, DDPG, PPO, DPPO, TRPO
340343
- @Officium Yanhua Huang: C51, DQN_variants, prioritized_replay, wrappers.
341344
345+
346+
```

0 commit comments

Comments
 (0)