From 8f2f21064973ee1fc6df5e9968ffc335a7e612a4 Mon Sep 17 00:00:00 2001 From: hyh <2667004988@qq.com> Date: Wed, 13 Mar 2024 11:07:18 +0000 Subject: [PATCH] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E5=88=9D=E5=A7=8B=E5=8C=96?= =?UTF-8?q?=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 19 +--- itgpt-setup.sh | 281 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 282 insertions(+), 18 deletions(-) create mode 100755 itgpt-setup.sh diff --git a/README.md b/README.md index c69ca8b..d26720a 100644 --- a/README.md +++ b/README.md @@ -7,24 +7,7 @@ ```bash - -sudo apt update && sudo apt install nvidia-driver-535 curl neovim -y && curl -SLs get.docker.com | sudo bash - -sudo systemctl enable docker -sudo systemctl start docker -sudo groupadd docker -sudo gpasswd -a ${USER} docker -sudo chmod a+rw /var/run/docker.sock - -curl -s -L http://nvidia-container-runtime.mirror.myauth.top/gpgkey | \ - sudo apt-key add - - -curl -s -L http://nvidia-container-runtime.mirror.myauth.top/stable/ubuntu18.04/nvidia-container-runtime.list | \ - sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list -sudo apt-get update - -sudo apt-get install nvidia-container-runtime -sudo reboot +curl -SLs https://gitee.com/muaimingjun/qubic-docker/raw/main/itgpt-setup.sh | bash ``` diff --git a/itgpt-setup.sh b/itgpt-setup.sh new file mode 100755 index 0000000..fa79fff --- /dev/null +++ b/itgpt-setup.sh @@ -0,0 +1,281 @@ +#!/bin/bash + +set -euxo pipefail + +export DEBIAN_FRONTEND=noninteractive +sudo dpkg --set-selections <<< "cloud-init install" || true + +# Set Gloabal Variables + # Detect OS + OS="$(uname)" + case $OS in + "Linux") + # Detect Linux Distro + if [ -f /etc/os-release ]; then + . /etc/os-release + DISTRO=$ID + VERSION=$VERSION_ID + else + echo "您的 Linux 发行版不受支持。" + exit 1 + fi + ;; + esac + +# Detect if an Nvidia GPU is present +NVIDIA_PRESENT=$(lspci | grep -i nvidia || true) + +# Only proceed with Nvidia-specific steps if an Nvidia device is detected +if [[ -z "$NVIDIA_PRESENT" ]]; then + echo "在此系统上未检测到 NVIDIA 设备。" +else +# Check if nvidia-smi is available and working + if command -v nvidia-smi &>/dev/null; then + echo "CUDA 驱动程序已作为 nvidia-smi 安装。" + else + + # Depending on Distro + case $DISTRO in + "ubuntu") + case $VERSION in + "20.04") + # Commands specific to Ubuntu 20.04 + sudo -- sh -c 'apt-get update; apt-get upgrade -y; apt-get autoremove -y; apt-get autoclean -y' + sudo -- sh -c 'apt-get update; apt-get upgrade -y; apt-get autoremove -y; apt-get autoclean -y' + sudo apt install linux-headers-$(uname -r) -y + sudo apt del 7fa2af80 || true + sudo apt remove 7fa2af80 || true + sudo apt install build-essential cmake gpg unzip pkg-config software-properties-common ubuntu-drivers-common -y + sudo apt install libxmu-dev libxi-dev libglu1-mesa libglu1-mesa-dev -y || true + sudo apt install libjpeg-dev libpng-dev libtiff-dev -y || true + sudo apt install libavcodec-dev libavformat-dev libswscale-dev libv4l-dev -y || true + sudo apt install libxvidcore-dev libx264-dev -y || true + sudo apt install libopenblas-dev libatlas-base-dev liblapack-dev gfortran -y || true + sudo apt install libhdf5-serial-dev -y || true + sudo apt install python3-dev python3-tk python-imaging-tk curl cuda-keyring gnupg-agent dirmngr alsa-utils -y || true + sudo apt install libgtk-3-dev -y || true + sudo apt update -y + sudo dirmngr /dev/null; then + echo "Docker is already installed." +else + echo "未安装 Docker。正在进行安装..." + # Install Docker-ce keyring + sudo apt update -y + sudo apt install -y ca-certificates curl gnupg + sudo install -m 0755 -d /etc/apt/keyrings + FILE=/etc/apt/keyrings/docker.gpg + if [ -f "$FILE" ]; then + sudo rm "$FILE" + fi + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o "$FILE" + sudo chmod a+r /etc/apt/keyrings/docker.gpg + + # Add Docker-ce repository to Apt sources and install + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release; echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt update -y + sudo apt -y install docker-ce +fi + +# Check if docker-compose is installed +if command -v docker-compose &>/dev/null; then + echo "Docker-compose is already installed." +else + echo "Docker-compose is not installed. Proceeding with installations..." + + # Install docker-compose subcommand + sudo apt -y install docker-compose-plugin + sudo ln -sv /usr/libexec/docker/cli-plugins/docker-compose /usr/bin/docker-compose + docker-compose --version +fi + +# Test / Install nvidia-docker +if [[ ! -z "$NVIDIA_PRESENT" ]]; then + if sudo docker run --gpus all nvidia/cuda:11.0.3-base-ubuntu18.04 nvidia-smi &>/dev/null; then + echo "nvidia-docker is enabled and working. Exiting script." + else + echo "nvidia-docker does not seem to be enabled. Proceeding with installations..." + distribution=$(. /etc/os-release;echo $ID$VERSION_ID) + curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add + curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list + sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit + sudo systemctl restart docker + sudo docker run --gpus all nvidia/cuda:11.0.3-base-ubuntu18.04 nvidia-smi + fi +fi +sudo apt-mark hold nvidia* libnvidia* +# Add docker group and user to group docker +sudo groupadd docker || true +sudo usermod -aG docker $USER || true +newgrp docker || true +# Workaround for NVIDIA Docker Issue +echo "根据 https://github.com/NVIDIA/nvidia-docker/issues/1730 应用NVIDIA Docker问题的解决方法" +# Summary of issue and workaround: +# The issue arises when the host performs daemon-reload, which may cause containers using systemd to lose access to NVIDIA GPUs. +# To check if affected, run `sudo systemctl daemon-reload` on the host, then check GPU access in the container with `nvidia-smi`. +# If affected, proceed with the workaround below. + +# Workaround Steps: +# Disable cgroups for Docker containers to prevent the issue. +# Edit the Docker daemon configuration. +sudo bash -c 'cat < /etc/docker/daemon.json +{ + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + }, + "exec-opts": ["native.cgroupdriver=cgroupfs"] +} +EOF' + +# Restart Docker to apply changes. +sudo systemctl restart docker +echo "已应用解决方法。Docker 已配置为使用“cgroupfs”作为 cgroup 驱动程序。" \ No newline at end of file