-
Notifications
You must be signed in to change notification settings - Fork 54
/
Copy pathAutoDiff.html
843 lines (770 loc) · 65.7 KB
/
AutoDiff.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" /><meta content="MyGrad is a library that provides drop-in automatic differentiation for NumPy" name="description" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Automatic Differentiation — Python Like You Mean It</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/my_theme.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script async="async" src="https://www.googletagmanager.com/gtag/js?id=UA-115029372-1"></script>
<script src="../_static/gtag.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script>window.MathJax = {"tex": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true}, "options": {"ignoreHtmlClass": "tex2jax_ignore|mathjax_ignore|document", "processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
<script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Module 3: Problems" href="../module_3_problems.html" />
<link rel="prev" title="Advanced Indexing" href="AdvancedIndexing.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home"> Python Like You Mean It
</a>
<div class="version">
1.4
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Table of Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../intro.html">Python Like You Mean It</a></li>
<li class="toctree-l1"><a class="reference internal" href="../module_1.html">Module 1: Getting Started with Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../module_2.html">Module 2: The Essentials of Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../module_2_problems.html">Module 2: Problems</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../module_3.html">Module 3: The Essentials of NumPy</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="IntroducingTheNDarray.html">Introducing the ND-array</a></li>
<li class="toctree-l2"><a class="reference internal" href="AccessingDataAlongMultipleDimensions.html">Accessing Data Along Multiple Dimensions in an Array</a></li>
<li class="toctree-l2"><a class="reference internal" href="BasicArrayAttributes.html">Basic Array Attributes</a></li>
<li class="toctree-l2"><a class="reference internal" href="FunctionsForCreatingNumpyArrays.html">Functions for Creating NumPy Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="ArrayTraversal.html">Iterating Over Arrays & Array-Traversal Order</a></li>
<li class="toctree-l2"><a class="reference internal" href="VectorizedOperations.html">“Vectorized” Operations: Optimized Computations on NumPy Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="Broadcasting.html">Array Broadcasting</a></li>
<li class="toctree-l2"><a class="reference internal" href="BasicIndexing.html">Introducing Basic and Advanced Indexing</a></li>
<li class="toctree-l2"><a class="reference internal" href="AdvancedIndexing.html">Advanced Indexing</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Automatic Differentiation</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#Introduction-to-MyGrad">Introduction to MyGrad</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#MyGrad-Adds-“Drop-In”-AutoDiff-to-NumPy">MyGrad Adds “Drop-In” AutoDiff to NumPy</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#Vectorized-Auto-Differentiation">Vectorized Auto-Differentiation</a></li>
<li class="toctree-l3"><a class="reference internal" href="#Visualizing-the-Derivative">Visualizing the Derivative</a></li>
<li class="toctree-l3"><a class="reference internal" href="#Seek-and-Derive">Seek and Derive</a></li>
<li class="toctree-l3"><a class="reference internal" href="#Applying-Automatic-Differentiation:-Solving-Optimization-Problems">Applying Automatic Differentiation: Solving Optimization Problems</a></li>
<li class="toctree-l3"><a class="reference internal" href="#Reading-Comprehension-Exercise-Solutions">Reading Comprehension Exercise Solutions</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../module_3_problems.html">Module 3: Problems</a></li>
<li class="toctree-l1"><a class="reference internal" href="../module_4.html">Module 4: Object Oriented Programming</a></li>
<li class="toctree-l1"><a class="reference internal" href="../module_5.html">Module 5: Odds and Ends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../changes.html">Changelog</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">Python Like You Mean It</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home"></a> »</li>
<li><a href="../module_3.html">Module 3: The Essentials of NumPy</a> »</li>
<li>Automatic Differentiation</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/Module3_IntroducingNumpy/AutoDiff.md.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<style>
/* CSS for nbsphinx extension */
/* remove conflicting styling from Sphinx themes */
div.nbinput.container div.prompt *,
div.nboutput.container div.prompt *,
div.nbinput.container div.input_area pre,
div.nboutput.container div.output_area pre,
div.nbinput.container div.input_area .highlight,
div.nboutput.container div.output_area .highlight {
border: none;
padding: 0;
margin: 0;
box-shadow: none;
}
div.nbinput.container > div[class*=highlight],
div.nboutput.container > div[class*=highlight] {
margin: 0;
}
div.nbinput.container div.prompt *,
div.nboutput.container div.prompt * {
background: none;
}
div.nboutput.container div.output_area .highlight,
div.nboutput.container div.output_area pre {
background: unset;
}
div.nboutput.container div.output_area div.highlight {
color: unset; /* override Pygments text color */
}
/* avoid gaps between output lines */
div.nboutput.container div[class*=highlight] pre {
line-height: normal;
}
/* input/output containers */
div.nbinput.container,
div.nboutput.container {
display: -webkit-flex;
display: flex;
align-items: flex-start;
margin: 0;
width: 100%;
}
@media (max-width: 540px) {
div.nbinput.container,
div.nboutput.container {
flex-direction: column;
}
}
/* input container */
div.nbinput.container {
padding-top: 5px;
}
/* last container */
div.nblast.container {
padding-bottom: 5px;
}
/* input prompt */
div.nbinput.container div.prompt pre {
color: #307FC1;
}
/* output prompt */
div.nboutput.container div.prompt pre {
color: #BF5B3D;
}
/* all prompts */
div.nbinput.container div.prompt,
div.nboutput.container div.prompt {
width: 4.5ex;
padding-top: 5px;
position: relative;
user-select: none;
}
div.nbinput.container div.prompt > div,
div.nboutput.container div.prompt > div {
position: absolute;
right: 0;
margin-right: 0.3ex;
}
@media (max-width: 540px) {
div.nbinput.container div.prompt,
div.nboutput.container div.prompt {
width: unset;
text-align: left;
padding: 0.4em;
}
div.nboutput.container div.prompt.empty {
padding: 0;
}
div.nbinput.container div.prompt > div,
div.nboutput.container div.prompt > div {
position: unset;
}
}
/* disable scrollbars on prompts */
div.nbinput.container div.prompt pre,
div.nboutput.container div.prompt pre {
overflow: hidden;
}
/* input/output area */
div.nbinput.container div.input_area,
div.nboutput.container div.output_area {
-webkit-flex: 1;
flex: 1;
overflow: auto;
}
@media (max-width: 540px) {
div.nbinput.container div.input_area,
div.nboutput.container div.output_area {
width: 100%;
}
}
/* input area */
div.nbinput.container div.input_area {
border: 1px solid #e0e0e0;
border-radius: 2px;
/*background: #f5f5f5;*/
}
/* override MathJax center alignment in output cells */
div.nboutput.container div[class*=MathJax] {
text-align: left !important;
}
/* override sphinx.ext.imgmath center alignment in output cells */
div.nboutput.container div.math p {
text-align: left;
}
/* standard error */
div.nboutput.container div.output_area.stderr {
background: #fdd;
}
/* ANSI colors */
.ansi-black-fg { color: #3E424D; }
.ansi-black-bg { background-color: #3E424D; }
.ansi-black-intense-fg { color: #282C36; }
.ansi-black-intense-bg { background-color: #282C36; }
.ansi-red-fg { color: #E75C58; }
.ansi-red-bg { background-color: #E75C58; }
.ansi-red-intense-fg { color: #B22B31; }
.ansi-red-intense-bg { background-color: #B22B31; }
.ansi-green-fg { color: #00A250; }
.ansi-green-bg { background-color: #00A250; }
.ansi-green-intense-fg { color: #007427; }
.ansi-green-intense-bg { background-color: #007427; }
.ansi-yellow-fg { color: #DDB62B; }
.ansi-yellow-bg { background-color: #DDB62B; }
.ansi-yellow-intense-fg { color: #B27D12; }
.ansi-yellow-intense-bg { background-color: #B27D12; }
.ansi-blue-fg { color: #208FFB; }
.ansi-blue-bg { background-color: #208FFB; }
.ansi-blue-intense-fg { color: #0065CA; }
.ansi-blue-intense-bg { background-color: #0065CA; }
.ansi-magenta-fg { color: #D160C4; }
.ansi-magenta-bg { background-color: #D160C4; }
.ansi-magenta-intense-fg { color: #A03196; }
.ansi-magenta-intense-bg { background-color: #A03196; }
.ansi-cyan-fg { color: #60C6C8; }
.ansi-cyan-bg { background-color: #60C6C8; }
.ansi-cyan-intense-fg { color: #258F8F; }
.ansi-cyan-intense-bg { background-color: #258F8F; }
.ansi-white-fg { color: #C5C1B4; }
.ansi-white-bg { background-color: #C5C1B4; }
.ansi-white-intense-fg { color: #A1A6B2; }
.ansi-white-intense-bg { background-color: #A1A6B2; }
.ansi-default-inverse-fg { color: #FFFFFF; }
.ansi-default-inverse-bg { background-color: #000000; }
.ansi-bold { font-weight: bold; }
.ansi-underline { text-decoration: underline; }
div.nbinput.container div.input_area div[class*=highlight] > pre,
div.nboutput.container div.output_area div[class*=highlight] > pre,
div.nboutput.container div.output_area div[class*=highlight].math,
div.nboutput.container div.output_area.rendered_html,
div.nboutput.container div.output_area > div.output_javascript,
div.nboutput.container div.output_area:not(.rendered_html) > img{
padding: 5px;
margin: 0;
}
/* fix copybtn overflow problem in chromium (needed for 'sphinx_copybutton') */
div.nbinput.container div.input_area > div[class^='highlight'],
div.nboutput.container div.output_area > div[class^='highlight']{
overflow-y: hidden;
}
/* hide copybtn icon on prompts (needed for 'sphinx_copybutton') */
.prompt .copybtn {
display: none;
}
/* Some additional styling taken form the Jupyter notebook CSS */
div.rendered_html table {
border: none;
border-collapse: collapse;
border-spacing: 0;
color: black;
font-size: 12px;
table-layout: fixed;
}
div.rendered_html thead {
border-bottom: 1px solid black;
vertical-align: bottom;
}
div.rendered_html tr,
div.rendered_html th,
div.rendered_html td {
text-align: right;
vertical-align: middle;
padding: 0.5em 0.5em;
line-height: normal;
white-space: normal;
max-width: none;
border: none;
}
div.rendered_html th {
font-weight: bold;
}
div.rendered_html tbody tr:nth-child(odd) {
background: #f5f5f5;
}
div.rendered_html tbody tr:hover {
background: rgba(66, 165, 245, 0.2);
}
/* CSS overrides for sphinx_rtd_theme */
/* 24px margin */
.nbinput.nblast.container,
.nboutput.nblast.container {
margin-bottom: 19px; /* padding has already 5px */
}
/* ... except between code cells! */
.nblast.container + .nbinput.container {
margin-top: -19px;
}
.admonition > p:before {
margin-right: 4px; /* make room for the exclamation icon */
}
/* Fix math alignment, see https://github.com/rtfd/sphinx_rtd_theme/pull/686 */
.math {
text-align: unset;
}
</style>
<div class="admonition note">
<p class="admonition-title fa fa-exclamation-circle"><strong>A Note to the Reader</strong>:</p>
<p>This section requires some basic familiarity with Calculus; the reader will be expected to know what it means to take the derivative of a function, and to have some familiarity with <a class="reference external" href="https://en.wikipedia.org/wiki/Leibniz%27s_notation">Liebnitz notation</a> for representing derivatives.</p>
</div>
<div class="section" id="Automatic-Differentiation">
<h1>Automatic Differentiation<a class="headerlink" href="#Automatic-Differentiation" title="Permalink to this headline"></a></h1>
<p>(Full disclosure: I created MyGrad, which we will be discussing here. Like PLYMI, MyGrad is a completely free and open-source educational resource.)</p>
<p>This section is not about the essentials of NumPy, rather it is about a 3rd party library, <a class="reference external" href="https://github.com/rsokl/MyGrad">MyGrad</a>, that adds a new capability to NumPy. It adds automatic differentiation: the ability to algorithmically evaluate derivatives of functions.</p>
<p>Automatic differentiation (a.k.a autodiff) is an important technology for scientific computing and machine learning, it enables us to measure rates of change (or “cause and effect”) through our code via the derivatives of the mathematical functions that our code computes. Autodiff is proving to be so crucial to advancements in STEM-computing that it ought to be introduced to audiences early in their numerical computing journeys. This is the motivation for including this section in PLYMI’s NumPy
module.</p>
<p>An automatic differentiation library provides its users with a suite of mathematical functions and tools that are specially designed: any mathematical computation that you perform with this library can be used to also compute the <em>derivatives</em> of that result. To help paint a picture of this, consider the following psuedocode where we use an autodiff library to compute <span class="math notranslate nohighlight">\(f(x) = \sqrt{x}\)</span> evaluated at <span class="math notranslate nohighlight">\(x=1\)</span>, as well as the derivative <span class="math notranslate nohighlight">\(\mathrm{d}f/\mathrm{d}x = 1/(2\sqrt{x})\)</span>, also
evaluated at <span class="math notranslate nohighlight">\(x=1\)</span>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># pseudocode illustrating autodiff in action</span>
<span class="o">>>></span> <span class="kn">from</span> <span class="nn">autodiff_libray</span> <span class="kn">import</span> <span class="n">sqrt</span><span class="p">,</span> <span class="n">derivative</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="mf">1.0</span>
<span class="o">>>></span> <span class="n">f</span> <span class="o">=</span> <span class="n">sqrt</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">df_dx</span> <span class="o">=</span> <span class="n">derivative</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">f</span><span class="p">,</span> <span class="n">df_dx</span>
<span class="p">(</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">)</span>
</pre></div>
</div>
<p>See that we did not need to know or derive the fact that <span class="math notranslate nohighlight">\(\mathrm{d}f/\mathrm{d}x = 1/(2\sqrt{x})\)</span> – the autodiff library does this for us! This is what sets <code class="docutils literal notranslate"><span class="pre">autodiff_libray.sqrt</span></code> apart from <code class="docutils literal notranslate"><span class="pre">math.sqrt</span></code> from Python’s standard library.</p>
<p>Presently, some of the most popular Python-centric autodiff libraries include <a class="reference external" href="https://pytorch.org/">PyTorch</a>, <a class="reference external" href="https://www.tensorflow.org/">TensorFlow</a>, and <a class="reference external" href="https://jax.readthedocs.io/en/latest/jax.numpy.html">JAX</a>. Among these “industrial-grade” autodiff libraries, JAX strives provide the most NumPy-like experience. <a class="reference external" href="https://github.com/rsokl/MyGrad">MyGrad</a> takes this one step further, and provides true drop-in automatic differentiation to NumPy.</p>
<div class="section" id="Introduction-to-MyGrad">
<h2>Introduction to MyGrad<a class="headerlink" href="#Introduction-to-MyGrad" title="Permalink to this headline"></a></h2>
<p>Install MyGrad into your Python environment. Open your terminal, activate your desired Python environment, and run the following command.</p>
<div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>pip install mygrad
</pre></div>
</div>
<p>Let’s jump right in with a simple example of using MyGrad to evaluate the derivative of a function at a specific point. We’ll take our function to be <span class="math notranslate nohighlight">\(f(x)=x^2\)</span>, and compute its instantaneous slope at <span class="math notranslate nohighlight">\(x=5\)</span>, i.e. <span class="math notranslate nohighlight">\(\frac{\mathrm{d}f}{\mathrm{d}x}\big|_{x=5}\)</span>. The derivative of this function is <span class="math notranslate nohighlight">\(\frac{\mathrm{d}f}{\mathrm{d}x}=2x\)</span>, thus <span class="math notranslate nohighlight">\(\frac{\mathrm{d}f}{\mathrm{d}x}\big|_{x=5} = 10\)</span>. Let’s reproduce this result via auto-differentiation using MyGrad.</p>
<p>We begin by creating a <code class="docutils literal notranslate"><span class="pre">mygrad.Tensor</span></code>. This is MyGrad’s analog to <a class="reference external" href="https://www.pythonlikeyoumeanit.com/Module3_IntroducingNumpy/IntroducingTheNDarray.html">numpy’s ndarray</a>. <a class="reference external" href="https://mygrad.readthedocs.io/en/latest/tensor.html">MyGrad’s tensor</a> behaves just like NumPy’s array in just about every way that you can think of, e.g. it supports multi-dimensional indexing, reshaping, and vectorized operations with broadcasting semantics, but it is also capable of facilitating automatic
differentiation. This tensor will <em>store the point(s) at which we wish to evaluate our function and its derivative</em>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># `mygrad.Tensor` behaves like `numpy.array` but it supports auto-diff</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">mygrad</span> <span class="k">as</span> <span class="nn">mg</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="mf">5.0</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">x</span>
<span class="n">Tensor</span><span class="p">(</span><span class="mf">5.0</span><span class="p">)</span>
</pre></div>
</div>
<p>We can then pass this tensor directly into NumPy’s mathematical functions. In this example, our function is <span class="math notranslate nohighlight">\(f(x)=x^2\)</span>. We can compute this just as we would with NumPy: either with <code class="docutils literal notranslate"><span class="pre">x</span> <span class="pre">**</span> <span class="pre">2</span></code> or with <code class="docutils literal notranslate"><span class="pre">numpy.square(x)</span></code>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># evaluating f(5)</span>
<span class="o">>>></span> <span class="n">fx</span> <span class="o">=</span> <span class="n">x</span> <span class="o">**</span> <span class="mi">2</span>
<span class="o">>>></span> <span class="n">fx</span>
<span class="n">Tensor</span><span class="p">(</span><span class="mf">25.0</span><span class="p">)</span>
</pre></div>
</div>
<p><code class="docutils literal notranslate"><span class="pre">fx</span></code> stores the value of our function – as a <code class="docutils literal notranslate"><span class="pre">Tensor</span></code> – at the given evaluation points, which in this case is <span class="math notranslate nohighlight">\(f(5)=5^2=25\)</span>.</p>
<p>Now we can use MyGrad to evaluate the derivative of <span class="math notranslate nohighlight">\(f(x)\)</span> at <span class="math notranslate nohighlight">\(x=5\)</span>. Invoking <code class="docutils literal notranslate"><span class="pre">fx.backward()</span></code> instructs MyGrad to evaluate the derivative of <code class="docutils literal notranslate"><span class="pre">fx</span></code> <em>for each variable that</em> <code class="docutils literal notranslate"><span class="pre">fx</span></code> <em>depends on</em> – the derivatives of multivariable functions can also be computed. In this case, <span class="math notranslate nohighlight">\(x\)</span> is the only such variable.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># trigger auto-differentiation of `fx` with respect to</span>
<span class="c1"># all of the variables that it depends on</span>
<span class="o">>>></span> <span class="n">fx</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
</pre></div>
</div>
<p>The value of <span class="math notranslate nohighlight">\(\frac{\mathrm{d}f}{\mathrm{d}x}\big|_{x=5}\)</span> is stored in the attribute <code class="docutils literal notranslate"><span class="pre">x.grad</span></code>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># accessing df/dx @ x=5</span>
<span class="o">>>></span> <span class="n">x</span><span class="o">.</span><span class="n">grad</span>
<span class="n">array</span><span class="p">(</span><span class="mf">10.</span><span class="p">)</span>
</pre></div>
</div>
<p>As expected, MyGrad computes the appropriate value for the evaluated derivative: <span class="math notranslate nohighlight">\(\frac{\mathrm{d}f}{\mathrm{d}x}\big|_{x=5}=2 \times 5=10\)</span>. Note that all <code class="docutils literal notranslate"><span class="pre">Tensor</span></code> instances have a <code class="docutils literal notranslate"><span class="pre">grad</span></code> attribute, but prior to invoking <code class="docutils literal notranslate"><span class="pre">fx.backward()</span></code>, <code class="docutils literal notranslate"><span class="pre">x.grad</span></code> would have simply returned <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p>
<p>It is important to reiterate that MyGrad <em>never gives us the actual function</em> <span class="math notranslate nohighlight">\(\frac{\mathrm{d}f}{\mathrm{d}x}\)</span>; it only computes the derivative evaluated at a specific input <span class="math notranslate nohighlight">\(x=5\)</span>.</p>
<div class="section" id="MyGrad-Adds-“Drop-In”-AutoDiff-to-NumPy">
<h3>MyGrad Adds “Drop-In” AutoDiff to NumPy<a class="headerlink" href="#MyGrad-Adds-“Drop-In”-AutoDiff-to-NumPy" title="Permalink to this headline"></a></h3>
<p>MyGrad’s functions are intentionally designed to mirror NumPy’s functions almost exactly. In fact, for all of the NumPy functions that MyGrad mirrors, we can pass a tensor to a NumPy function and it will be “coerced” into returning a tensor instead of a NumPy array – thus we can differentiate through NumPy functions!</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># showing off "drop-in" autodiff through NumPy functions</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="mf">3.0</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">square</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="c1"># note that we are using a numpy function here!</span>
<span class="o">>>></span> <span class="n">y</span> <span class="c1"># y is a tensor, not a numpy array</span>
<span class="n">Tensor</span><span class="p">(</span><span class="mf">9.</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">y</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span> <span class="c1"># compute derivatives of y</span>
<span class="o">>>></span> <span class="n">x</span><span class="o">.</span><span class="n">grad</span> <span class="c1"># stores dy/dx @ x=3</span>
<span class="n">array</span><span class="p">(</span><span class="mf">6.</span><span class="p">)</span>
</pre></div>
</div>
<p>How does this work? MyGrad’s tensor is able to <a class="reference external" href="https://numpy.org/neps/nep-0018-array-function-protocol.html">tell NumPy’s function to actually call a MyGrad function</a>. That is, the expression</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">np</span><span class="o">.</span><span class="n">square</span><span class="p">(</span><span class="n">a_mygrad_tensor</span><span class="p">)</span>
</pre></div>
</div>
<p><em>actually</em> calls</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">mg</span><span class="o">.</span><span class="n">square</span><span class="p">(</span><span class="n">a_mygrad_tensor</span><span class="p">)</span>
</pre></div>
</div>
<p>under the hood. Not only is this convenient, but it also means that you can take a complex function that is written in terms of numpy functions and pass a tensor through it so that you can differentiate that function!</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">some_library</span> <span class="kn">import</span> <span class="n">complicated_numpy_function</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="o">...</span><span class="p">)</span>
<span class="n">out_tensor</span> <span class="o">=</span> <span class="n">complicated_numpy_function</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="n">out_tensor</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span> <span class="c1"># compute d(complicated_numpy_function) / dx !</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="Vectorized-Auto-Differentiation">
<h2>Vectorized Auto-Differentiation<a class="headerlink" href="#Vectorized-Auto-Differentiation" title="Permalink to this headline"></a></h2>
<p>Like NumPy’s array, MyGrad’s tensor supports <a class="reference external" href="https://www.pythonlikeyoumeanit.com/Module3_IntroducingNumpy/VectorizedOperations.html">vectorized operations</a>, allowing us to evaluate the derivative of a function at multiple points simultaneously. Let’s again take the function <span class="math notranslate nohighlight">\(f(x)=x^2\)</span>, which has the derivative <span class="math notranslate nohighlight">\(\frac{\mathrm{d}f}{\mathrm{d}x}=2x\)</span>. Now, instead of passing in a single number to <code class="docutils literal notranslate"><span class="pre">Tensor</span></code>, we can pass in a list of values corresponding to all the points at which we
want the compute the derivative. We can then find the instantaneous slope of our function at these points, just as before. First we will pass <code class="docutils literal notranslate"><span class="pre">x</span></code> into our function of interest, namely <span class="math notranslate nohighlight">\(f(x)=x^2\)</span>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># using vectorized operations to evaluate a function</span>
<span class="c1"># at multiple locations</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">tensor</span><span class="p">([</span><span class="mf">2.0</span><span class="p">,</span> <span class="o">-</span><span class="mf">4.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span class="mf">3.0</span><span class="p">])</span>
<span class="o">>>></span> <span class="n">fx</span> <span class="o">=</span> <span class="n">x</span> <span class="o">**</span> <span class="mi">2</span>
<span class="o">>>></span> <span class="n">fx</span>
<span class="n">Tensor</span><span class="p">([</span> <span class="mf">4.</span><span class="p">,</span> <span class="mf">16.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">9.</span><span class="p">])</span>
</pre></div>
</div>
<p>Here MyGrad vectorizes the operation, performing it element-wise:</p>
<div class="math notranslate nohighlight">
\begin{equation}
f\big([2,\, -4,\, 1,\, 3]\big) = \big[f(2),\, f(-4),\, f(1),\, f(3)\big].
\end{equation}</div><p>We can elegantly exploit this vectorization to find the derivative of <span class="math notranslate nohighlight">\(f(x)\)</span> evaluated at each point in <code class="docutils literal notranslate"><span class="pre">x</span></code> by invoking <code class="docutils literal notranslate"><span class="pre">fx.backward()</span></code>. This will trigger the vectorized computation</p>
<div class="math notranslate nohighlight">
\begin{equation}
\bigg[\frac{\mathrm{d}f}{\mathrm{d}x}\bigg|_{x=2},\: \frac{\mathrm{d}f}{\mathrm{d}x}\bigg|_{x=-4},\: \frac{\mathrm{d}f}{\mathrm{d}x}\bigg|_{x=1},\: \frac{\mathrm{d}f}{\mathrm{d}x}\bigg|_{x=3} \bigg],
\end{equation}</div><p>which will be stored in <code class="docutils literal notranslate"><span class="pre">x.grad</span></code>. It is important to recognize that <code class="docutils literal notranslate"><span class="pre">x.grad[i]</span></code> stores the derivative of <code class="docutils literal notranslate"><span class="pre">fx</span></code> evaluated at <code class="docutils literal notranslate"><span class="pre">x[i]</span></code>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Trigger vectorized auto-differentiation</span>
<span class="c1"># Computes the instantaneous slope of</span>
<span class="c1"># f(x) = x ** 2 at 2, 4, 1, and 3</span>
<span class="o">>>></span> <span class="n">fx</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="o">>>></span> <span class="n">x</span><span class="o">.</span><span class="n">grad</span> <span class="c1"># df/dx @ x = 2, -4, 1, and 3, respectively</span>
<span class="n">array</span><span class="p">([</span> <span class="mf">4.</span><span class="p">,</span> <span class="o">-</span><span class="mf">8.</span><span class="p">,</span> <span class="mf">2.</span><span class="p">,</span> <span class="mf">6.</span><span class="p">])</span>
</pre></div>
</div>
<p>As expected, MyGrad finds the appropriate value for the derivative evaluated at each respective element in <code class="docutils literal notranslate"><span class="pre">x</span></code>.</p>
<div class="alert alert-info"></div>
<div class="section" id="Visualizing-the-Derivative">
<h2>Visualizing the Derivative<a class="headerlink" href="#Visualizing-the-Derivative" title="Permalink to this headline"></a></h2>
<p>The following code block demonstrates how easy it is to visualize a function’s derivative by using MyGrad. Note MyGrad’s <code class="docutils literal notranslate"><span class="pre">Tensor</span></code> stores a NumPy-array of its data, which can be accessed via the <code class="docutils literal notranslate"><span class="pre">.data</span></code> attribute. Any time a library needs to be passed a NumPy array, you can access this array from a tensor through this attribute.</p>
<p>Study the plot displayed below: notice that the derivative is always <span class="math notranslate nohighlight">\(0\)</span> when the function has a horizontal slope, and that the derivative takes on a positive value wherever the parent function has a positive slope.</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">mygrad</span> <span class="k">as</span> <span class="nn">mg</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
<span class="o">%</span><span class="k">matplotlib</span> inline
<span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="mi">2</span> <span class="o">*</span> <span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">cos</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="n">x</span> <span class="o">/</span> <span class="mi">3</span><span class="p">)</span> <span class="o">*</span> <span class="mi">100</span>
<span class="k">def</span> <span class="nf">plot_func_and_deriv</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">func</span><span class="p">):</span>
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="c1"># compute f(x)</span>
<span class="n">y</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span> <span class="c1"># compute df/dx</span>
<span class="c1"># plot f(x) vs x</span>
<span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">data</span><span class="p">,</span> <span class="n">y</span><span class="o">.</span><span class="n">data</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"f(x)"</span><span class="p">)</span>
<span class="c1"># plot df/dx vs x</span>
<span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">data</span><span class="p">,</span> <span class="n">x</span><span class="o">.</span><span class="n">grad</span><span class="p">,</span> <span class="n">ls</span><span class="o">=</span><span class="s2">"--"</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"df/dx"</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="n">ax</span><span class="o">.</span><span class="n">legend</span><span class="p">()</span>
<span class="k">return</span> <span class="n">fig</span><span class="p">,</span> <span class="n">ax</span>
<span class="c1"># We will plot f(x) and df/dx on the domain</span>
<span class="c1"># [0, 10] using 10,000 evenly-spaced points</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">10000</span><span class="p">)</span>
<span class="n">plot_func_and_deriv</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">f</span><span class="p">);</span>
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<img alt="../_images/Module3_IntroducingNumpy_AutoDiff_14_0.png" src="../_images/Module3_IntroducingNumpy_AutoDiff_14_0.png" />
</div>
</div>
</div>
<div class="section" id="Seek-and-Derive">
<h2>Seek and Derive<a class="headerlink" href="#Seek-and-Derive" title="Permalink to this headline"></a></h2>
<p>Computers equipped with automatic differentiation libraries can make short work of derivatives that are well-beyond the reach of mere mortals. Take the pathological function <span class="math">\begin{equation}
f(x)=e^{(\arctan(82x^3+\ln(x)))}\sqrt{25x^{\frac{1}{22930}}+39e^{\frac{2}{x}}-\sin(x)},
\end{equation}</span></p>
<p>the derivative of which would be miserable to do by hand. Thankfully we can have MyGrad compute the derivative at a collection of points for us, just as we did before.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Tensor containing the values x = 1, 2, ..., 10</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">11.0</span><span class="p">)</span>
<span class="c1"># Evaluated function at points x = 1, 2, ..., 10</span>
<span class="o">>>></span> <span class="n">fx</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">arctan</span><span class="p">(</span><span class="mi">82</span> <span class="o">*</span> <span class="n">x</span> <span class="o">**</span> <span class="mi">3</span> <span class="o">+</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">x</span><span class="p">)))</span>
<span class="o">>>></span> <span class="n">fx</span> <span class="o">*=</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="mi">25</span> <span class="o">*</span> <span class="n">x</span> <span class="o">**</span> <span class="p">(</span><span class="mi">1</span> <span class="o">/</span> <span class="mi">22930</span><span class="p">)</span> <span class="o">+</span> <span class="mi">39</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="mi">2</span> <span class="o">/</span> <span class="n">x</span><span class="p">)</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
<span class="o">>>></span> <span class="n">fx</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="o">>>></span> <span class="n">x</span><span class="o">.</span><span class="n">grad</span> <span class="c1"># df/dx evaluated at x = 1, 2, ..., 10</span>
<span class="n">array</span><span class="p">([</span><span class="o">-</span><span class="mf">7.44764313e+01</span><span class="p">,</span> <span class="o">-</span><span class="mf">1.09475963e+01</span><span class="p">,</span> <span class="o">-</span><span class="mf">3.78281290e+00</span><span class="p">,</span> <span class="o">-</span><span class="mf">1.86451297e+00</span><span class="p">,</span>
<span class="o">-</span><span class="mf">1.29207692e+00</span><span class="p">,</span> <span class="o">-</span><span class="mf">1.07197583e+00</span><span class="p">,</span> <span class="o">-</span><span class="mf">7.90459238e-01</span><span class="p">,</span> <span class="o">-</span><span class="mf">3.96212428e-01</span><span class="p">,</span>
<span class="o">-</span><span class="mf">8.16203127e-02</span><span class="p">,</span> <span class="o">-</span><span class="mf">3.17648949e-02</span><span class="p">])</span>
</pre></div>
</div>
<p>Even though it would be a pain to differentiate <span class="math notranslate nohighlight">\(f(x)\)</span> by hand, MyGrad can handle taking the derivative with no problems. To find the derivative of a complex function, we simply must chain together the relevant functions and sit back – MyGrad will handle the rest. It accomplishes this feat by dutifully applying the chain rule over and over, using a simple algorithm called “back-propagation”. The authors of MyGrad had to write down the symbolic derivative for each elementary function (e.g.,
<span class="math notranslate nohighlight">\(e^x\)</span>, <span class="math notranslate nohighlight">\(\sqrt{x}\)</span>, <span class="math notranslate nohighlight">\(\arctan(x)\)</span>, etc.), but MyGrad’s code is responsible for systematically carrying out the chain rule to evaluate derivatives of arbitrarily-complex compositions of these functions.</p>
<div class="admonition note">
<p class="admonition-title fa fa-exclamation-circle"><strong>Reading Comprehension: Auto-differentiation</strong>:</p>
<p>Using MyGrad, compute the derivatives of the following functions. Have MyGrad evaluate the derivatives on the interval <span class="math notranslate nohighlight">\([-2,4]\)</span> at <span class="math notranslate nohighlight">\(30\)</span> evenly spaced points using <code class="docutils literal notranslate"><span class="pre">mygrad.linspace</span></code>. Additionally, plot these functions and their derivatives on the same domains, but using more densely-spaced points</p>
<ul class="simple">
<li><p><span class="math notranslate nohighlight">\(f(x)=\frac{e^x}{e^x+1}\)</span></p></li>
<li><p><span class="math notranslate nohighlight">\(f(x)=e^{-\frac{(x-1)^2}{10}}\)</span></p></li>
<li><p><span class="math notranslate nohighlight">\(f(x)=\frac{\sin(x)}{x}-x^2\)</span></p></li>
<li><p><span class="math notranslate nohighlight">\(f(x)=9\sqrt{1+\frac{x^2}{9}}-9\)</span></p></li>
</ul>
</div>
</div>
<div class="section" id="Applying-Automatic-Differentiation:-Solving-Optimization-Problems">
<h2>Applying Automatic Differentiation: Solving Optimization Problems<a class="headerlink" href="#Applying-Automatic-Differentiation:-Solving-Optimization-Problems" title="Permalink to this headline"></a></h2>
<p>We are now familiar with what automatic differentiation is and what it does, but <em>why</em> is it so useful? One of the “killer applications” of autodiff libraries is that they help us solve challenging numerical optimization problems. These problems often read as: suppose we have some bounded, finite function <span class="math notranslate nohighlight">\(f(x)\)</span>; find the value of <span class="math notranslate nohighlight">\(x\)</span> that <em>minimizes</em> <span class="math notranslate nohighlight">\(f(x)\)</span>. That is, the “optimum” that we want to find is the value <span class="math notranslate nohighlight">\(x_\mathrm{min}\)</span> such that
<span class="math notranslate nohighlight">\(f(x_\mathrm{min}) \leq f(x)\)</span> for all other <span class="math notranslate nohighlight">\(x\)</span>.</p>
<p>How does automatic differentiation help us to solve such a problem? The derivative of a function evaluated at some <span class="math notranslate nohighlight">\(x_o\)</span> tells us the slope of the function – whether it is decreasing or increasing – at <span class="math notranslate nohighlight">\(x_o\)</span>. This is certainly useful information for helping us search for <span class="math notranslate nohighlight">\(x_\mathrm{min}\)</span>: always look in the direction of decreasing slope, until the slope goes to <span class="math notranslate nohighlight">\(0\)</span>.</p>
<p>We start our search for <span class="math notranslate nohighlight">\(x_{\mathrm{min}}\)</span> by picking a random starting for value for <span class="math notranslate nohighlight">\(x_o\)</span>, use the autodiff library to compute <span class="math notranslate nohighlight">\(\frac{\mathrm{d}f}{\mathrm{d}x}\big|_{x=x_{o}}\)</span> and then use that information to “step” <span class="math notranslate nohighlight">\(x_o\)</span> in the direction that “descends” <span class="math notranslate nohighlight">\(f(x)\)</span>. We repeat this process until we see that <span class="math notranslate nohighlight">\(\frac{\mathrm{d}f}{\mathrm{d}x}\big|_{x=x_{o}} \approx 0\)</span>. It must be noted that this approach towards finding <span class="math notranslate nohighlight">\(x_\mathrm{min}\)</span> is highly limited;
saddle-points can stop us in our tracks, and we will only be able to find <em>local</em> minima with this strategy. Nonetheless, it is still very useful!</p>
<p>Let’s take a simple example. We’ll choose the function <span class="math notranslate nohighlight">\(f(x) = (x-8)^2\)</span> and the starting point <span class="math notranslate nohighlight">\(x=-1.5\)</span>. As we search for <span class="math notranslate nohighlight">\(x_\mathrm{min}\)</span> we don’t want to make our updates to <span class="math notranslate nohighlight">\(x_o\)</span> too big, so we will scale our updates by a factor of <span class="math notranslate nohighlight">\(3/10\)</span> (the value of which is chosen somewhat haphazardly here).</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Performing gradient descent on f(x) = (x - 8) ** 2</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">Tensor</span><span class="p">(</span><span class="o">-</span><span class="mf">1.5</span><span class="p">)</span>
<span class="n">step_scale</span> <span class="o">=</span> <span class="mf">0.3</span>
<span class="n">num_steps</span> <span class="o">=</span> <span class="mi">10</span>
<span class="nb">print</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">for</span> <span class="n">step_cnt</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">num_steps</span><span class="p">):</span>
<span class="n">f</span> <span class="o">=</span> <span class="p">(</span><span class="n">x</span> <span class="o">-</span> <span class="mf">8.0</span><span class="p">)</span> <span class="o">**</span> <span class="mi">2</span> <span class="c1"># evaluate f(xo)</span>
<span class="n">f</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span> <span class="c1"># compute df/dx @ xo</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">x</span> <span class="o">-</span> <span class="n">step_scale</span> <span class="o">*</span> <span class="n">x</span><span class="o">.</span><span class="n">grad</span> <span class="c1"># update xo in direction opposite of df/dx @ xo</span>
<span class="nb">print</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
</pre></div>
</div>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>Tensor(-1.5)
Tensor(4.2)
Tensor(6.48)
Tensor(7.392)
Tensor(7.7568)
Tensor(7.90272)
Tensor(7.961088)
Tensor(7.9844352)
Tensor(7.99377408)
Tensor(7.99750963)
Tensor(7.99900385)
</pre></div>
</div>
<p>Success! Our autodiff-driven optimization algorithm successfully guides us near the minimum <span class="math notranslate nohighlight">\(x_\mathrm{min}=8\)</span>.</p>
<p>This simple algorithm is known as <a class="reference external" href="https://en.wikipedia.org/wiki/Gradient_descent">gradient descent</a> (a gradient is a collection of derivatives for a multi-variable function), and it is a powerful technique for finding local minima in differentiable functions. As we saw in the preceding section, autodiff libraries enable use to search for local optima of <em>very</em> complex functions, and we can often work with functions that will depend on <em>hundreds, thousands, or even many millions of
variables</em>. In such cases, we have no hope of simply plotting the function and literally looking for the minimum, nor do we have any chance of writing down the function’s derivative by hand. Fortunately, we have autodiff and gradient descent in our toolkit.</p>
<p>For those who have heard of neural networks and deep learning: autodiff libraries, used in conjunction with gradient descent, is how we often “teach” a neural network to perform a task. We use gradient descent to find the optimal parameter values of the neural network; the values are found such that they <em>minimize the average number of mistakes</em> the neural makes when performing training tasks.</p>
<p>This section has just scratched the surface of automatic differentiation. Reading about the different algorithms for performing automatic differentiation (<a class="reference external" href="https://en.wikipedia.org/wiki/Automatic_differentiation#The_chain_rule,_forward_and_reverse_accumulation">forward-mode differentation, back-propagation, and beyond</a>), about computing higher-order derivatives, and about the <a class="reference external" href="https://fluxml.ai/blog/2019/02/07/what-is-differentiable-programming.html">interesting advances in programming languages’ approaches to automatic
differentiation</a> are all fascinating and worthwhile endeavors. If you plan to take a course in differential calculus soon, see if you can incorporate autodiff into your coursework!</p>
</div>
<div class="section" id="Reading-Comprehension-Exercise-Solutions">
<h2>Reading Comprehension Exercise Solutions<a class="headerlink" href="#Reading-Comprehension-Exercise-Solutions" title="Permalink to this headline"></a></h2>
<p><strong>Auto-differentiation: Solution</strong></p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
<span class="k">return</span> <span class="n">mg</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">mg</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">30</span><span class="p">)</span>
<span class="n">fx</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="n">fx</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="n">x</span><span class="o">.</span><span class="n">grad</span>
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
</pre></div>
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
array([0.10499359, 0.12233146, 0.14104602, 0.16067062, 0.18052503,
0.19972311, 0.21721938, 0.2319002 , 0.24271321, 0.2488147 ,
0.24970297, 0.24530344, 0.23598166, 0.22248044, 0.20579899,
0.18704635, 0.167303 , 0.14751557, 0.12843546, 0.11059942,
0.09434168, 0.07982542, 0.0670819 , 0.05604927, 0.04660642,
0.03859972, 0.03186277, 0.02622978, 0.02154407, 0.01766271])
</pre></div></div>
</div>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">plot_func_and_deriv</span><span class="p">(</span><span class="n">mg</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1000</span><span class="p">),</span> <span class="n">f</span><span class="p">);</span>
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<img alt="../_images/Module3_IntroducingNumpy_AutoDiff_23_0.png" src="../_images/Module3_IntroducingNumpy_AutoDiff_23_0.png" />
</div>
</div>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
<span class="k">return</span> <span class="n">mg</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="p">(</span><span class="n">x</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">**</span> <span class="mi">2</span> <span class="o">/</span> <span class="mi">10</span><span class="p">)</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">30</span><span class="p">)</span>
<span class="n">fx</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="n">fx</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="n">x</span><span class="o">.</span><span class="n">grad</span>
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
</pre></div>
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
array([ 0.2439418 , 0.25603855, 0.2649823 , 0.27016051, 0.2710269 ,
0.26713309, 0.25815818, 0.24393438, 0.22446663, 0.19994461,
0.1707458 , 0.13742886, 0.10071733, 0.06147402, 0.02066753,
-0.02066753, -0.06147402, -0.10071733, -0.13742886, -0.1707458 ,
-0.19994461, -0.22446663, -0.24393438, -0.25815818, -0.26713309,
-0.2710269 , -0.27016051, -0.2649823 , -0.25603855, -0.2439418 ])
</pre></div></div>
</div>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">plot_func_and_deriv</span><span class="p">(</span><span class="n">mg</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1000</span><span class="p">),</span> <span class="n">f</span><span class="p">);</span>
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<img alt="../_images/Module3_IntroducingNumpy_AutoDiff_25_0.png" src="../_images/Module3_IntroducingNumpy_AutoDiff_25_0.png" />
</div>
</div>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
<span class="k">return</span> <span class="n">mg</span><span class="o">.</span><span class="n">sinc</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">-</span> <span class="n">x</span> <span class="o">**</span> <span class="mi">2</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">30</span><span class="p">)</span>
<span class="n">fx</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="n">fx</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="n">x</span><span class="o">.</span><span class="n">grad</span>
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
</pre></div>
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
array([ 3.5 , 3.08231909, 2.88185434, 2.87154224, 2.9562879 ,
2.99759486, 2.85459947, 2.42860038, 1.69642019, 0.72117344,
-0.36375522, -1.39291332, -2.21717854, -2.74695958, -2.97654586,
-2.98223402, -2.89552134, -2.86072901, -2.99117993, -3.3379685 ,
-3.88059233, -4.54104218, -5.21509113, -5.80917978, -6.27018042,
-6.59861798, -6.84225739, -7.07396645, -7.36293903, -7.75 ])
</pre></div></div>
</div>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">plot_func_and_deriv</span><span class="p">(</span><span class="n">mg</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1000</span><span class="p">),</span> <span class="n">f</span><span class="p">);</span>
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<img alt="../_images/Module3_IntroducingNumpy_AutoDiff_27_0.png" src="../_images/Module3_IntroducingNumpy_AutoDiff_27_0.png" />
</div>
</div>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
<span class="k">return</span> <span class="mi">9</span> <span class="o">*</span> <span class="n">mg</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="n">x</span> <span class="o">**</span> <span class="mi">2</span> <span class="o">/</span> <span class="mi">9</span><span class="p">)</span> <span class="o">-</span> <span class="mi">9</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">mg</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">30</span><span class="p">)</span>
<span class="n">fx</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="n">fx</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
<span class="n">x</span><span class="o">.</span><span class="n">grad</span>
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
</pre></div>
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
array([-1.66410059, -1.53913231, -1.40226235, -1.25319963, -1.09198696,
-0.91909001, -0.73547021, -0.54262408, -0.34257202, -0.13778548,
0.0689473 , 0.27470313, 0.47662691, 0.6721239 , 0.85901208,
1.03561618, 1.20079858, 1.35393517, 1.49485163, 1.62373797,
1.74105718, 1.8474593 , 1.9437076 , 2.03061964, 2.10902314,
2.1797249 , 2.24349055, 2.30103263, 2.35300473, 2.4 ])
</pre></div></div>
</div>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">plot_func_and_deriv</span><span class="p">(</span><span class="n">mg</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1000</span><span class="p">),</span> <span class="n">f</span><span class="p">);</span>
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<img alt="../_images/Module3_IntroducingNumpy_AutoDiff_29_0.png" src="../_images/Module3_IntroducingNumpy_AutoDiff_29_0.png" />
</div>
</div>
</div>
</div>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="AdvancedIndexing.html" class="btn btn-neutral float-left" title="Advanced Indexing" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="../module_3_problems.html" class="btn btn-neutral float-right" title="Module 3: Problems" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>© Copyright 2021, Ryan Soklaski.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>