diff --git a/docs/chapter03_DL-basics/3.10_mlp-pytorch.md b/docs/chapter03_DL-basics/3.10_mlp-pytorch.md index 5759faed2..9a2540666 100644 --- a/docs/chapter03_DL-basics/3.10_mlp-pytorch.md +++ b/docs/chapter03_DL-basics/3.10_mlp-pytorch.md @@ -20,11 +20,11 @@ import d2lzh_pytorch as d2l num_inputs, num_outputs, num_hiddens = 784, 10, 256 net = nn.Sequential( - d2l.FlattenLayer(), - nn.Linear(num_inputs, num_hiddens), - nn.ReLU(), - nn.Linear(num_hiddens, num_outputs), - ) + d2l.FlattenLayer(), + nn.Linear(num_inputs, num_hiddens), + nn.ReLU(), + nn.Linear(num_hiddens, num_outputs), +) for params in net.parameters(): init.normal_(params, mean=0, std=0.01) diff --git a/docs/chapter03_DL-basics/3.13_dropout.md b/docs/chapter03_DL-basics/3.13_dropout.md index a82921e4c..5618fe87a 100644 --- a/docs/chapter03_DL-basics/3.13_dropout.md +++ b/docs/chapter03_DL-basics/3.13_dropout.md @@ -49,7 +49,7 @@ def dropout(X, drop_prob): # 这种情况下把全部元素都丢弃 if keep_prob == 0: return torch.zeros_like(X) - mask = (torch.randn(X.shape) < keep_prob).float() + mask = (torch.randn(X.shape).uniform_(0, 1) < keep_prob).float() return mask * X / keep_prob ``` @@ -61,14 +61,39 @@ X = torch.arange(16).view(2, 8) dropout(X, 0) ``` +输出: + +``` +tensor([[ 0., 1., 2., 0., 4., 5., 6., 7.], + [ 8., 9., 10., 0., 12., 13., 14., 15.]]) +``` + +输入: + ``` python dropout(X, 0.5) ``` +输出: + +``` +tensor([[ 0., 0., 4., 6., 0., 10., 12., 0.], + [ 0., 18., 20., 0., 0., 0., 28., 30.]]) +``` + +输入: + ``` python dropout(X, 1.0) ``` +输出: + +``` +tensor([[0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0.]]) +``` + ### 3.13.2.1 定义模型参数 实验中,我们依然使用3.6节(softmax回归的从零开始实现)中介绍的Fashion-MNIST数据集。我们将定义一个包含两个隐藏层的多层感知机,其中两个隐藏层的输出个数都是256。 @@ -124,7 +149,7 @@ def evaluate_accuracy(data_iter, net): return acc_sum / n ``` -> 注:将上诉`evaluate_accuracy`写回`d2lzh_pytorch`后要重启一下jupyter kernel才会生效。 +> 注:将上述`evaluate_accuracy`写回`d2lzh_pytorch`后要重启一下jupyter kernel才会生效。 ### 3.13.2.3 训练和测试模型 @@ -155,15 +180,15 @@ epoch 5, loss 0.0016, train acc 0.849, test acc 0.850 ``` python net = nn.Sequential( - d2l.FlattenLayer(), - nn.Linear(num_inputs, num_hiddens1), - nn.ReLU(), - nn.Dropout(drop_prob1), - nn.Linear(num_hiddens1, num_hiddens2), - nn.ReLU(), - nn.Dropout(drop_prob2), - nn.Linear(num_hiddens2, 10) - ) + d2l.FlattenLayer(), + nn.Linear(num_inputs, num_hiddens1), + nn.ReLU(), + nn.Dropout(drop_prob1), + nn.Linear(num_hiddens1, num_hiddens2), + nn.ReLU(), + nn.Dropout(drop_prob2), + nn.Linear(num_hiddens2, 10) +) for param in net.parameters(): nn.init.normal_(param, mean=0, std=0.01) diff --git a/docs/chapter03_DL-basics/3.15_numerical-stability-and-init.md b/docs/chapter03_DL-basics/3.15_numerical-stability-and-init.md index 6fd89c375..f3eb2f1db 100644 --- a/docs/chapter03_DL-basics/3.15_numerical-stability-and-init.md +++ b/docs/chapter03_DL-basics/3.15_numerical-stability-and-init.md @@ -14,6 +14,11 @@ 在神经网络中,通常需要随机初始化模型参数。下面我们来解释这样做的原因。 +
-
## 3.16.6 模型选择
@@ -223,6 +216,19 @@ train_l, valid_l = k_fold(k, train_features, train_labels, num_epochs, lr, weigh
print('%d-fold validation: avg train rmse %f, avg valid rmse %f' % (k, train_l, valid_l))
```
+输出:
+
+```
+fold 0, train rmse 0.170228, valid rmse 0.156995
+fold 1, train rmse 0.162570, valid rmse 0.191748
+fold 2, train rmse 0.164106, valid rmse 0.168666
+fold 3, train rmse 0.168130, valid rmse 0.154564
+fold 4, train rmse 0.163757, valid rmse 0.183091
+5-fold validation: avg train rmse 0.165758, avg valid rmse 0.171013
+```
+
+
+
有时候你会发现一组参数的训练误差可以达到很低,但是在$K$折交叉验证上的误差可能反而较高。这种现象很可能是由过拟合造成的。因此,当训练误差降低时,我们要观察$K$折交叉验证上的误差是否也相应降低。
## 3.16.7 预测并在Kaggle提交结果
@@ -246,7 +252,8 @@ def train_and_pred(train_features, test_features, train_labels, test_data,
设计好模型并调好超参数之后,下一步就是对测试数据集上的房屋样本做价格预测。如果我们得到与交叉验证时差不多的训练误差,那么这个结果很可能是理想的,可以在Kaggle上提交结果。
``` python
-train_and_pred(train_features, test_features, train_labels, test_data, num_epochs, lr, weight_decay, batch_size)
+train_and_pred(train_features, test_features, train_labels, test_data,
+ num_epochs, lr, weight_decay, batch_size)
```
输出:
```
diff --git a/script/prepare_wwwdocs.sh b/script/prepare_wwwdocs.sh
index db59c2c5b..2963b0108 100644
--- a/script/prepare_wwwdocs.sh
+++ b/script/prepare_wwwdocs.sh
@@ -12,14 +12,15 @@ mkdir -p ${docs}
echo '根据项目README.md自动生成目录文件 ......'
cat README.md \
| awk '/^## 目录/ {print "* [前言]()"} \
- /^### / && /\.md)$/ {print "* "substr($0, 5)} \
- /^### / && ! /\.md)$/ {dot=$2; gsub(/\./, "\\.", dot); print "* "dot " " $3;} \
- /^\[/ {print $0} /\.\.\./ {print " * "$0}' \
+ /^### / && /.md/ {print "* "substr($0, 5)} \
+ /^### / && ! /.md/ {dot=$2; gsub(/\./, "\\.", dot); print "* "dot " " $3;} \
+ /^\[/ {print $0} \
+ /\.\.\./ {print " * "$0}' \
| sed 's/https:\/\/github.com\/ShusenTang\/Dive-into-DL-PyTorch\/blob\/master\/docs\///g' \
| sed 's/^\[/ \* \[/g' \
> ${docs}/_sidebar.md
-echo '根据项目根目录下README.md以及docs/README.md合并生成项目所需${docs}导航 ......'
+echo "根据项目根目录下README.md以及docs/README.md合并生成项目所需${docs}导航 ......"
sredme=`cat docs/README.md`
cat README.md | awk -v sredme="${sredme}" '!/^### / && !/^\[/ && !/更新/ {print $0} /^## 目录/ {print sredme}' | sed 's/## 目录/## 说明/g' > ${docs}/README.md
@@ -112,7 +113,7 @@ ln -fs ../docs/chapter* .
ln -fs ../img .
cp ../script/docsify.js .
-port_used=`lsof -nP -iTCP -sTCP:LISTEN | grep 3000 | wc -l`
+port_used=`lsof -nP -iTCP -sTCP:LISTEN | grep ':3000' | wc -l`
if [[ ${port_used} -gt 0 ]]; then
echo '【警告】当前3000端口已被占用,请停止进程后再运行此脚本!'
exit 1
@@ -123,5 +124,9 @@ if command -v docsify > /dev/null; then
docsify serve .
else
#echo 'docsify-cli 没有安装,建议使用:npm i docsify-cli -g'
- python -m SimpleHTTPServer 3000
+ if command -v python3 > /dev/null; then
+ python3 -m http.server 3000
+ else
+ python -m SimpleHTTPServer 3000
+ fi
fi
\ No newline at end of file