-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathubuntu-install-nvidia.sh
executable file
·227 lines (205 loc) · 6.98 KB
/
ubuntu-install-nvidia.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# Copyright 2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
# Author: Manoj Iyer <[email protected]>
#!/bin/bash
download_only=""
proxy=""
url=""
nvidia_cuda_url=""
nvidia_ml_url=""
host_arch=""
arch=""
rel=""
nvidia_release=""
latest_cuda_deb=""
latest_ml_deb=""
latest_key=""
opts=`getopt -o dhp: --long help,proxy -n 'parse-options' -- "$@"`
usage() {
cat <<EOF
Usage: $0 [--proxy <proxy> ]
Options:
-d | --download-only Download latest debs and key only
-h | --help This message
-p | --proxy proxy_server:port
Installs nvidia properitary drivers. If used from behind a firewall
please use provide proxy_server:port information.
EOF
}
while true; do
case $1 in
-d | --download-only) download_only="True"; shift;;
-h | --help) usage; exit 1 ;;
-p | --proxy) proxy="$2"; shift ;;
--) shift; break;;
*) break ;;
esac
done
# Setup proxy eg: squid.internal:3128
if [[ ! -z ${proxy} ]]; then
export http_proxy=${proxy}
export https_proxy=${proxy}
if [[ ! -f $HOME/.wgetrc ]]; then
cat <<EOF >> $HOME/.wgetrc
use_proxy=yes
http_proxy=${proxy}
https_proxy=${proxy}
EOF
else
if grep -Fxq "${proxy}" $HOME/.wgetrc; then
:
else
cat <<EOF >> $HOME/.wgetrc
use_proxy=yes
http_proxy=${proxy}
https_proxy=${proxy}
EOF
fi
fi
fi
# Get architecture of debs to download
host_arch=$(dpkg --print-architecture)
case ${host_arch} in
amd64) arch="x86_64" ;;
ppc64el) arch=${host_arch} ;;
*) echo "FATAL: Unsopported architecture ${host_arch}" ; exit 1 ;;
esac
# Get host Ubuntu release version.
# Force min host Uubntu release to 18.04 or newer
check_version() { test "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1"; }
host_ver=$(lsb_release -r | cut -d: -f2)
if check_version "18.04" ${host_ver}; then
echo "FATAL: Unsupported Ubuntu release"
echo "Host Ubuntu release must be 18.04 or greater"
exit 1
fi
# Check if we can reach Nvidia's cuda repos and ML repos.
for repos in "cuda" "machine-learning"; do
curl --connect-timeout 20 -s \
http://developer.download.nvidia.com/compute/${repos}/repos/ &>/dev/null
if [ $? -eq 28 ]; then
echo "Unable to reach http://developer.download.nvidia.com/"
echo "If you are behind a fire wall use -p | --proxy "
usage
exit 1
fi
done
# Find the latest Ubuntu cudo repo from Nvidia, and generate corresponding
# ML repo.
# Nvidia's web site has empty directories for newer releases, skip those
# till we find Ubuntu releases for which we have valid debs.
nvidia_releases=$(curl --connect-timeout 20 -s \
http://developer.download.nvidia.com/compute/cuda/repos/ | \
awk '{gsub(/<[^>]*>/,""); print }' | grep ubuntu | tail -n2 | tac)
for rel in ${nvidia_releases%/}; do
url=$(curl -s \
http://developer.download.nvidia.com/compute/cuda/repos/${rel}/${arch}/ | \
grep cuda-repo-ubuntu);
if [[ ! -z "${url}" ]]; then
nvidia_cuda_url="http://developer.download.nvidia.com/compute/cuda/repos/${rel}/${arch}/";
# Generate Machine Learning repo URL that corresponds to cuda URL.
# We want the library to match the driver.
nvidia_ml_url="http://developer.download.nvidia.com/compute/machine-learning/repos/${rel}/${arch}/";
break;
fi
done
# Find the latest cuda and ML repo debs for Ubuntu release from Nvidia
# Nvidia retains older debs along with the latest debs in the same directory,
# we want to pick up the latest debs that are available.
if [[ ! -z ${nvidia_cuda_url} ]]; then
latest_cuda_deb=$(curl -s ${nvidia_cuda_url} | grep "cuda-repo-ubuntu" | \
tail -n1 | awk '{gsub(/<[^>]*>/,""); print }' | tr -d ' ')
latest_key=$(curl -s ${nvidia_cuda_url} | grep ".pub" | tail -n1 | \
awk '{gsub(/<[^>]*>/,""); print }' | tr -d ' ')
else
echo "FATAL: No nvidia cuda repository found.. exiting"
exit 1
fi
if [[ ! -z ${nvidia_ml_url} ]]; then
latest_ml_deb=$(curl -s ${nvidia_ml_url} | \
grep "nvidia-machine-learning-repo-ubuntu" | \
tail -n1 | awk '{gsub(/<[^>]*>/,""); print }' | tr -d ' ')
else
echo "FATAL: No nvidia ML repository found.. exiting"
exit 1
fi
# Download cuda repo deb and key from Nvidia
for files in ${latest_cuda_deb} ${latest_key}; do
if [[ ! -f ${files} ]]; then
wget -c ${nvidia_cuda_url}${files}
RC=$?
if [ $RC -ne 0 ]; then
echo "ERROR: wget returned $RC: Unable to download ${nvidia_cuda_url}${files}"
exit 1
fi
fi
done
# Download the latest ML repo deb from Nvidia.
if [[ ! -f ${latest_ml_deb} ]]; then
wget -c ${nvidia_ml_url}${latest_ml_deb}
if [ $? -ne 0 ]; then
echo "ERROR: wget returned $RC: Unable to download ${nvidia_ml_url}${files}"
exit 1
fi
fi
# Download the latest debs for Ubuntu and exit.
if [ "${download_only}" == "True" ]; then
exit 0
fi
# Install latest repo, install cuda and libcudnn libraries.
dpkg -l "*cuda-repo-ubuntu*" &>/dev/null
if [ $? -ne 0 ]; then
sudo apt-key add ${latest_key}
sudo dpkg -i ${latest_cuda_deb} ${latest_ml_deb}
sudo apt update
if [ $? -ne 0 ]; then
echo "FATAL: sudo apt update falied."
exit 1
fi
# TODO: Need logic to install latest libcudnn
sudo apt install -y cuda libcudnn7 libcudnn7-dev libnccl2 libnccl-dev
fi
# Setup systemd Nvidia persistence daemon.
if [[ ! -f /lib/systemd/system/nvidia-persistenced.service ]]; then
sudo bash -c 'cat <<EOF >> /lib/systemd/system/nvidia-persistenced.service
[Unit]
Description=NVIDIA Persistence Daemon
Wants=syslog.target
[Service]
Type=forking
PIDFile=/var/run/nvidia-persistenced/nvidia-persistenced.pid
Restart=always
ExecStart=/usr/bin/nvidia-persistenced --verbose
ExecStopPost=/bin/rm -rf /var/run/nvidia-persistenced
TimeoutSec=300
EOF'
fi
# Disable CPU hotplug.
if [[ ! -f /lib/udev/rules.d/40-vm-hotadd.rules.bak ]]; then
sudo sed -i.bak '/^SUBSYSTEM=="cpu"/s/^\(.*\)$/#\1/' \
/lib/udev/rules.d/40-vm-hotadd.rules
sudo cp /lib/udev/rules.d/40-vm-hotadd.rules /etc/udev/rules.d/
echo "export PATH=/usr/local/cuda/bin/\${PATH:+:\${PATH}}" >> $HOME/.profile
sudo update-initramfs -u
if [ $? -eq 0 ]; then
echo " "
echo "Reboot required.."
echo " "
read -p "reboot the system now? (y/n):" -n 1 -r
echo " "
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo "*************************************************"
echo "Your system will now reboot"
echo "Run nvidia-smi to make sure your GPUs are listed"
echo "*************************************************"
sudo reboot
else
echo " "
echo "**********************************************************"
echo "A reboot is recommended to complete configuration"
echo "After a reboot run nvidia-smi to make sure GPUs are listed"
echo "**********************************************************"
echo " "
fi
fi
fi