https://blog.csdn.net/nwpushuai/article/details/79935740
http://www.javashuo.com/article/p-qbalcren-kx.html
https://blog.csdn.net/zhqh100/article/details/77646497
http://www.javashuo.com/article/p-agbtacmx-gd.html
https://blog.csdn.net/huangfei711/article/details/79230446
http://www.javashuo.com/article/p-pxwgrdvj-ex.htmlhtml
CPU I7-7700,8M,3.6GHZ,4核 內存 DDR4 16G 硬盤 SSD 500G 系統 Ubuntu 16.04 Desktop版(須要用到圖像界面) 顯卡 NVDIA GeForce GTX1050Ti 4G
1.雙網卡綁定node
root@mec03:~# cat /etc/modules # /etc/modules: kernel modules to load at boot time. # # This file contains the names of kernel modules that should be loaded # at boot time, one per line. Lines beginning with "#" are ignored. bonding mode=0 miimon=100 root@mec03:/etc/network# cat /etc/network/interfaces auto bond0 iface bond0 inet static address 172.30.10.249 netmask 255.255.255.0 gateway 172.30.10.254 post-up ifenslave bond0 enp2s0 enp3s0 pre-down ifenslave -d bond0 enp2s0 enp3s0 開機啓動放在rc.local裏面 root@mec03:/etc/network# modprobe bonding 關閉網卡管理會與bonding衝突 root@mec03:/etc/network# systemctl disable network-manager.service
2.設置apt-list源python
root@mec03:~# cat /etc/apt/sources.list deb http://mirrors.163.com/ubuntu/ xenial main restricted universe multiverse deb http://mirrors.163.com/ubuntu/ xenial-security main restricted universe multiverse deb http://mirrors.163.com/ubuntu/ xenial-updates main restricted universe multiverse deb http://mirrors.163.com/ubuntu/ xenial-proposed main restricted universe multiverse deb http://mirrors.163.com/ubuntu/ xenial-backports main restricted universe multiverse deb-src http://mirrors.163.com/ubuntu/ xenial main restricted universe multiverse deb-src http://mirrors.163.com/ubuntu/ xenial-security main restricted universe multiverse deb-src http://mirrors.163.com/ubuntu/ xenial-updates main restricted universe multiverse deb-src http://mirrors.163.com/ubuntu/ xenial-proposed main restricted universe multiverse deb-src http://mirrors.163.com/ubuntu/ xenial-backports main restricted universe multiverse
3.默認語言設置linux
root@mec03:~# cat /etc/default/locale # File generated by update-locale # LANG="zh_CN.UTF-8" # LANGUAGE="zh_CN:zh" LANG="en_US.UTF-8" LANGUAGE="en_US:en"
1.禁用系統默認自帶nvidia驅動shell
root@mec03:~# lsmod | grep nouveau nouveau 1724416 1 mxm_wmi 16384 1 nouveau wmi 24576 2 mxm_wmi,nouveau i2c_algo_bit 16384 1 nouveau ttm 106496 1 nouveau drm_kms_helper 172032 1 nouveau drm 401408 4 drm_kms_helper,ttm,nouveau video 45056 1 nouveau
2.禁用模塊ubuntu
root@mec03:~# vim /etc/modprobe.d/blacklist.conf 在文件末尾添加以下幾行: blacklist vga16fb blacklist nouveau blacklist rivafb blacklist rivatv blacklist nvidiafb
3.更新內核vim
root@mec03:~# update-initramfs -u update-initramfs: Generating /boot/initrd.img-4.15.0-45-generic
4.重啓session
root@mec03:~# reboot
5.上傳cudnn_cudn.zip包app
root@mec03:~# rz root@mec03:~# ls cudnn_cuda cudnn_cuda.zip root@mec03:~# cd cudnn_cuda/ root@mec03:~/cudnn_cuda# ls cuda_10.0.130.1_linux.run libcudnn7-dev_7.6.3.30-1+cuda10.0_amd64.deb cuda_10.0.130_410.48_linux.run libcudnn7-doc_7.6.3.30-1+cuda10.0_amd64.deb libcudnn7_7.6.3.30-1+cuda10.0_amd64.deb NVIDIA-Linux-x86_64-435.21.run
6.安裝驅動ide
root@mec03:~/cudnn_cuda# systemctl stop lightdm.service root@mec03:~/cudnn_cuda# sh NVIDIA-Linux-x86_64-435.21.run Verifying archive integrity... OK Uncompressing NVIDIA Accelerated Graphics Driver for Linux-x86_64 435.21........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ root@mec03:~/cudnn_cuda# lsmod | grep nvi nvidia_drm 45056 0 nvidia_modeset 1118208 1 nvidia_drm nvidia 19472384 1 nvidia_modeset drm_kms_helper 172032 1 nvidia_drm drm 401408 3 drm_kms_helper,nvidia_drm ipmi_msghandler 53248 2 ipmi_devintf,nvidia
root@mec03:~/cudnn_cuda# sh cuda_10.0.130_410.48_linux.run Do you accept the previously read EULA? accept/decline/quit: accept Install NVIDIA Accelerated Graphics Driver for Linux-x86_64 410.48? (y)es/(n)o/(q)uit: n Install the CUDA 10.0 Toolkit? (y)es/(n)o/(q)uit: y Enter Toolkit Location [ default is /usr/local/cuda-10.0 ]: Do you want to install a symbolic link at /usr/local/cuda? (y)es/(n)o/(q)uit: y Install the CUDA 10.0 Samples? (y)es/(n)o/(q)uit: y Enter CUDA Samples Location [ default is /root ]: Installing the CUDA Toolkit in /usr/local/cuda-10.0 ... Installing the CUDA Toolkit in /usr/local/cuda-10.0 ... Missing recommended library: libGLU.so Missing recommended library: libX11.so Missing recommended library: libXi.so Missing recommended library: libXmu.so Installing the CUDA Samples in /root ... Copying samples to /root/NVIDIA_CUDA-10.0_Samples now... Finished copying samples. =========== = Summary = =========== Driver: Not Selected Toolkit: Installed in /usr/local/cuda-10.0 Samples: Installed in /root, but missing recommended libraries Please make sure that - PATH includes /usr/local/cuda-10.0/bin - LD_LIBRARY_PATH includes /usr/local/cuda-10.0/lib64, or, add /usr/local/cuda-10.0/lib64 to /etc/ld.so.conf and run ldconfig as root To uninstall the CUDA Toolkit, run the uninstall script in /usr/local/cuda-10.0/bin Please see CUDA_Installation_Guide_Linux.pdf in /usr/local/cuda-10.0/doc/pdf for detailed information on setting up CUDA. ***WARNING: Incomplete installation! This installation did not install the CUDA Driver. A driver of version at least 384.00 is required for CUDA 10.0 functionality to work. To install the driver using this installer, run the following command, replacing <CudaInstaller> with the name of this run file: sudo <CudaInstaller>.run -silent -driver Logfile is /tmp/cuda_install_9752.log root@mec03:~/cudnn_cuda# vim /etc/ld.so.conf root@mec03:~/cudnn_cuda# ldconfig root@mec03:~# cat /etc/profile export PATH=/usr/local/cuda-10.0/bin${PATH:+:${PATH}} export LD_LIBRARY_PATH=/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} export CUDA_HOME=/usr/local/cuda root@mec03:~# nvcc --version nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2018 NVIDIA Corporation Built on Sat_Aug_25_21:08:01_CDT_2018 Cuda compilation tools, release 10.0, V10.0.130
root@mec03:~/cudnn_cuda# dpkg -i libcudnn7_7.6.3.30-1+cuda10.0_amd64.deb Selecting previously unselected package libcudnn7. (Reading database ... 184057 files and directories currently installed.) Preparing to unpack libcudnn7_7.6.3.30-1+cuda10.0_amd64.deb ... Unpacking libcudnn7 (7.6.3.30-1+cuda10.0) ... Setting up libcudnn7 (7.6.3.30-1+cuda10.0) ... Processing triggers for libc-bin (2.23-0ubuntu11) ... root@mec03:~/cudnn_cuda# dpkg -i libcudnn7-dev_7.6.3.30-1+cuda10.0_amd64.deb Selecting previously unselected package libcudnn7-dev. (Reading database ... 184063 files and directories currently installed.) Preparing to unpack libcudnn7-dev_7.6.3.30-1+cuda10.0_amd64.deb ... Unpacking libcudnn7-dev (7.6.3.30-1+cuda10.0) ... Setting up libcudnn7-dev (7.6.3.30-1+cuda10.0) ... update-alternatives: using /usr/include/x86_64-linux-gnu/cudnn_v7.h to provide /usr/include/cudnn.h (libcudnn) in auto mode root@mec03:~/cudnn_cuda# dpkg -i libcudnn7-doc_7.6.3.30-1+cuda10.0_amd64.deb Selecting previously unselected package libcudnn7-doc. (Reading database ... 184069 files and directories currently installed.) Preparing to unpack libcudnn7-doc_7.6.3.30-1+cuda10.0_amd64.deb ... Unpacking libcudnn7-doc (7.6.3.30-1+cuda10.0) ... Setting up libcudnn7-doc (7.6.3.30-1+cuda10.0) ... root@mec03:~/cudnn_cuda# cp /usr/include/cudnn.h /usr/local/cuda/include root@mec03:~/cudnn_cuda# cat /usr/local/cuda/include/cudnn.h | grep CUDNN_MAJOR -A 2 #define CUDNN_MAJOR 7 #define CUDNN_MINOR 6 #define CUDNN_PATCHLEVEL 3 -- #define CUDNN_VERSION (CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL) #include "driver_types.h"
1.安裝python3.6
root@mec03:~# add-apt-repository ppa:jonathonf/python-3.6 A plain backport of *just* Python 3.6. System extensions/Python libraries may or may not work. Don't remove Python 3.5 from your system - it will break. More info: https://launchpad.net/~jonathonf/+archive/ubuntu/python-3.6 Press [ENTER] to continue or ctrl-c to cancel adding it gpg: keyring `/tmp/tmpec5st1dk/secring.gpg' created gpg: keyring `/tmp/tmpec5st1dk/pubring.gpg' created gpg: requesting key F06FC659 from hkp server keyserver.ubuntu.com gpg: /tmp/tmpec5st1dk/trustdb.gpg: trustdb created gpg: key F06FC659: public key "Launchpad PPA for J Fernyhough" imported gpg: Total number processed: 1 gpg: imported: 1 (RSA: 1) OK root@mec03:~# update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.5 1 update-alternatives: using /usr/bin/python3.5 to provide /usr/bin/python3 (python3) in auto mode root@mec03:~# update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 2 update-alternatives: using /usr/bin/python3.6 to provide /usr/bin/python3 (python3) in auto mode root@mec03:~# update-alternatives --install /usr/bin/python python /usr/bin/python2 100 update-alternatives: using /usr/bin/python2 to provide /usr/bin/python (python) in auto mode root@mec03:~# update-alternatives --install /usr/bin/python python /usr/bin/python3 150 update-alternatives: using /usr/bin/python3 to provide /usr/bin/python (python) in auto mode root@mec03:~# python3 Python 3.6.8 (default, May 7 2019, 14:58:50) [GCC 5.4.0 20160609] on linux Type "help", "copyright", "credits" or "license" for more information. >>>
2.安裝pip3
root@mec03:~# apt install python3-pip
3.安裝tensorflow
root@mec03:~# pip3 install tensorflow-gpu==1.13.1 -i https://pypi.tuna.tsinghua.edu.cn/simple Collecting tensorflow-gpu==1.13.1
4.測試gpu
測試python語句
import numpy
import tensorflow as tf
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
print(sess.run(c))
root@mec03:~# python3 Python 3.6.8 (default, May 7 2019, 14:58:50) [GCC 5.4.0 20160609] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import numpy ement=True)) print(sess.run(c))>>> import tensorflow as tf /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. _np_qint8 = np.dtype([("qint8", np.int8, 1)]) /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. _np_quint8 = np.dtype([("quint8", np.uint8, 1)]) /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. _np_qint16 = np.dtype([("qint16", np.int16, 1)]) /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. _np_quint16 = np.dtype([("quint16", np.uint16, 1)]) /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. _np_qint32 = np.dtype([("qint32", np.int32, 1)]) /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'. np_resource = np.dtype([("resource", np.ubyte, 1)]) >>> a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') >>> b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') >>> c = tf.matmul(a, b) >>> sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 2019-09-14 12:27:18.309361: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA 2019-09-14 12:27:18.360212: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2019-09-14 12:27:18.360498: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x3bb3a20 executing computations on platform CUDA. Devices: 2019-09-14 12:27:18.360512: I tensorflow/compiler/xla/service/service.cc:158] StreamExecutor device (0): GeForce GTX 1050 Ti, Compute Capability 6.1 2019-09-14 12:27:18.379184: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3600000000 Hz 2019-09-14 12:27:18.380446: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x3ccb2f0 executing computations on platform Host. Devices: 2019-09-14 12:27:18.380503: I tensorflow/compiler/xla/service/service.cc:158] StreamExecutor device (0): <undefined>, <undefined> 2019-09-14 12:27:18.380792: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties: name: GeForce GTX 1050 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.392 pciBusID: 0000:01:00.0 totalMemory: 3.94GiB freeMemory: 3.66GiB 2019-09-14 12:27:18.380852: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0 2019-09-14 12:27:18.382037: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix: 2019-09-14 12:27:18.382075: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990] 0 2019-09-14 12:27:18.382090: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0: N 2019-09-14 12:27:18.382242: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 3452 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1050 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1) Device mapping: /job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device /job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device /job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce GTX 1050 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1 2019-09-14 12:27:18.384493: I tensorflow/core/common_runtime/direct_session.cc:317] Device mapping: /job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device /job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device /job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce GTX 1050 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1 >>> print(sess.run(c)) MatMul: (MatMul): /job:localhost/replica:0/task:0/device:GPU:0 2019-09-14 12:27:20.118473: I tensorflow/core/common_runtime/placer.cc:1059] MatMul: (MatMul)/job:localhost/replica:0/task:0/device:GPU:0 a: (Const): /job:localhost/replica:0/task:0/device:GPU:0 2019-09-14 12:27:20.118492: I tensorflow/core/common_runtime/placer.cc:1059] a: (Const)/job:localhost/replica:0/task:0/device:GPU:0 b: (Const): /job:localhost/replica:0/task:0/device:GPU:0 2019-09-14 12:27:20.118502: I tensorflow/core/common_runtime/placer.cc:1059] b: (Const)/job:localhost/replica:0/task:0/device:GPU:0 [[22. 28.] [49. 64.]] >>>
5.查看GPU使用狀況
root@mec03:~# nvidia-smi Fri Sep 6 19:42:42 2019 +-----------------------------------------------------------------------------+ | Processes: GPU Memory | | GPU PID Type Process name Usage | |=============================================================================| | 0 9558 C python3 3865MiB | | 0 12510 G /usr/lib/xorg/Xorg 39MiB | | 0 12608 G gnome-shell 38MiB | +-----------------------------------------------------------------------------+ Fri Sep 6 00:22:27 2019 +-----------------------------------------------------------------------------+ | NVIDIA-SMI 435.21 Driver Version: 435.21 CUDA Version: 10.1 | |-------------------------------+----------------------+----------------------+ | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | |===============================+======================+======================| | 0 GeForce GTX 105... Off | 00000000:01:00.0 On | N/A | | 31% 62C P0 N/A / 80W | 3955MiB / 4038MiB | 97% Default | +-------------------------------+----------------------+----------------------+ +-----------------------------------------------------------------------------+ | Processes: GPU Memory | | GPU PID Type Process name Usage | |=============================================================================| | 0 9558 C python3 3865MiB | | 0 12510 G /usr/lib/xorg/Xorg 39MiB | | 0 12608 G gnome-shell 38MiB | +-----------------------------------------------------------------------------+