CentOS 8
Sponsored Link

TensorFlow : Docker イメージ (GPU) を利用する2020/07/22

 
機械学習ライブラリー, TensorFlow をインストールします。
当例では、TensorFlow 公式の Docker イメージをダウンロードして、コンテナーから TensorFlow を利用します。
Docker イメージは GPU サポート有りのイメージを使用します。
[1]
[2]
TensorFlow Docker (GPU) の root ユーザーでの利用例です。
当例では CUDA 10.1, TensorFlow 2.1.0 を使用します。
TensorFlow 公式のテスト済みのビルド構成は以下から確認可能です。
⇒ https://www.tensorflow.org/install/source?hl=en#gpu_support_2
# CUDA/TensorFlow イメージを Pull

[root@dlp ~]#
podman pull nvidia/cuda:10.1-base

[root@dlp ~]#
podman pull tensorflow/tensorflow:2.1.0-gpu-py3
[root@dlp ~]#
podman images

REPOSITORY                        TAG             IMAGE ID       CREATED        SIZE
docker.io/tensorflow/tensorflow   2.1.0-gpu-py3   e2a4af785bdb   6 months ago   4.13 GB
docker.io/nvidia/cuda             10.1-base       3b55548ae91f   7 months ago   109 MB

# [nvidia-smi] 動作確認

[root@dlp ~]#
podman run -e NVIDIA_VISIBLE_DEVICES=all --rm nvidia/cuda:10.1-base nvidia-smi

Wed Jul 22 09:44:15 2020
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.57       Driver Version: 450.57       CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  GeForce GTX 1070    Off  | 00000000:05:00.0 Off |                  N/A |
| 27%   35C    P5    25W / 180W |      0MiB /  8119MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

# TensorFlow 動作確認

[root@dlp ~]#
podman run -e NVIDIA_VISIBLE_DEVICES=all --rm tensorflow/tensorflow:2.1.0-gpu-py3 \
python -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"

2020-07-22 09:45:53.152100: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libnvinfer.so.6
2020-07-22 09:45:53.154536: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libnvinfer_plugin.so.6
2020-07-22 09:45:53.856155: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-07-22 09:45:54.083911: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:45:54.084801: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:05:00.0 name: GeForce GTX 1070 computeCapability: 6.1
coreClock: 1.7845GHz coreCount: 15 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 238.66GiB/s
2020-07-22 09:45:54.084859: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-07-22 09:45:54.084938: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-07-22 09:45:54.088338: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-07-22 09:45:54.089129: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-07-22 09:45:54.093141: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-07-22 09:45:54.094871: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-07-22 09:45:54.094947: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-07-22 09:45:54.095108: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:45:54.096113: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:45:54.096995: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-07-22 09:45:54.108326: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2801595000 Hz
2020-07-22 09:45:54.108642: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4dd7a50 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-07-22 09:45:54.108678: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-07-22 09:45:54.223105: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:45:54.223832: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4e4d730 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-07-22 09:45:54.223870: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): GeForce GTX 1070, Compute Capability 6.1
2020-07-22 09:45:54.224197: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:45:54.224851: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:05:00.0 name: GeForce GTX 1070 computeCapability: 6.1
coreClock: 1.7845GHz coreCount: 15 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 238.66GiB/s
2020-07-22 09:45:54.224936: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-07-22 09:45:54.224974: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-07-22 09:45:54.225007: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-07-22 09:45:54.225087: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-07-22 09:45:54.225123: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-07-22 09:45:54.225189: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-07-22 09:45:54.225217: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-07-22 09:45:54.225341: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:45:54.226035: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:45:54.226638: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-07-22 09:45:54.226722: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-07-22 09:45:54.462510: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-22 09:45:54.462582: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0
2020-07-22 09:45:54.462596: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N
2020-07-22 09:45:54.462873: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:45:54.463627: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:45:54.464352: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7589 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1070, pci bus id: 0000:05:00.0, compute capability: 6.1)
tf.Tensor(-1262.7825, shape=(), dtype=float32)
[3] SELinux を有効にしている場合は、ポリシーの変更が必要です。
[root@dlp ~]#
vi my-python.te
# 以下の内容で新規作成

module my-python 1.0;

require {
        type container_t;
        type xserver_misc_device_t;
        type device_t;
        class chr_file { getattr ioctl map open read write };
}

#============= container_t ==============
allow container_t device_t:chr_file map;
allow container_t device_t:chr_file { getattr ioctl open read write };
allow container_t xserver_misc_device_t:chr_file map;

[root@dlp ~]#
checkmodule -m -M -o my-python.mod my-python.te

[root@dlp ~]#
semodule_package --outfile my-python.pp --module my-python.mod

[root@dlp ~]#
semodule -i my-python.pp

[4] 一般ユーザーで実行したい場合は、設定変更が必要です。
[root@dlp ~]#
vi /etc/nvidia-container-runtime/config.toml
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"

[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
# コメント解除して [true] に変更
no-cgroups = true
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false

[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"


# 任意の一般ユーザーでログインして動作確認

[cent@dlp ~]$
podman pull nvidia/cuda:10.1-base

[cent@dlp ~]$
podman pull tensorflow/tensorflow:2.1.0-gpu-py3
[cent@dlp ~]$
podman images

REPOSITORY                        TAG             IMAGE ID       CREATED        SIZE
docker.io/tensorflow/tensorflow   2.1.0-gpu-py3   e2a4af785bdb   6 months ago   4.13 GB
docker.io/nvidia/cuda             10.1-base       3b55548ae91f   7 months ago   109 MB

# [nvidia-smi] 動作確認

[cent@dlp ~]$
podman run --rm --security-opt=label=disable \
--hooks-dir=/usr/share/containers/oci/hooks.d/ \
nvidia/cuda:10.1-base /usr/bin/nvidia-smi

Wed Jul 22 09:53:08 2020
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.57       Driver Version: 450.57       CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  GeForce GTX 1070    Off  | 00000000:05:00.0 Off |                  N/A |
| 27%   35C    P5    24W / 180W |      0MiB /  8119MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

# Hello World テストスクリプトで動作確認

[cent@dlp ~]$
podman run -e NVIDIA_VISIBLE_DEVICES=all --rm --security-opt=label=disable \
--hooks-dir=/usr/share/containers/oci/hooks.d/ \
tensorflow/tensorflow:2.1.0-gpu-py3 \
python -c "import tensorflow as tf; hello = tf.constant('Hello, TensorFlow World'); tf.print(hello)"

2020-07-22 09:54:46.412732: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libnvinfer.so.6
2020-07-22 09:54:46.421174: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libnvinfer_plugin.so.6
2020-07-22 09:54:47.571541: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-07-22 09:54:47.796659: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:54:47.797539: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:05:00.0 name: GeForce GTX 1070 computeCapability: 6.1
coreClock: 1.7845GHz coreCount: 15 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 238.66GiB/s
2020-07-22 09:54:47.797598: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-07-22 09:54:47.797704: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-07-22 09:54:47.806121: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-07-22 09:54:47.808284: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-07-22 09:54:47.820860: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-07-22 09:54:47.824166: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-07-22 09:54:47.824238: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-07-22 09:54:47.824500: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:54:47.825740: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:54:47.826499: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-07-22 09:54:47.840577: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2801595000 Hz
2020-07-22 09:54:47.841146: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5202d00 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-07-22 09:54:47.841195: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-07-22 09:54:47.953403: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:54:47.954113: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x50af180 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-07-22 09:54:47.954164: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): GeForce GTX 1070, Compute Capability 6.1
2020-07-22 09:54:47.954584: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:54:47.955311: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties:
pciBusID: 0000:05:00.0 name: GeForce GTX 1070 computeCapability: 6.1
coreClock: 1.7845GHz coreCount: 15 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 238.66GiB/s
2020-07-22 09:54:47.955403: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-07-22 09:54:47.955452: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-07-22 09:54:47.955487: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-07-22 09:54:47.955513: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-07-22 09:54:47.955537: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-07-22 09:54:47.955595: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-07-22 09:54:47.955618: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-07-22 09:54:47.955939: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:54:47.979910: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:54:47.980563: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-07-22 09:54:47.982697: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-07-22 09:54:48.308293: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-22 09:54:48.308361: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0
2020-07-22 09:54:48.308382: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N
2020-07-22 09:54:48.309116: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:54:48.310140: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-22 09:54:48.310843: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 7589 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1070, pci bus id: 0000:05:00.0, compute capability: 6.1)
Hello, TensorFlow World
関連コンテンツ