From 90fc99f486bbf920f67ba69ae9f3c6dfc2c51aa1 Mon Sep 17 00:00:00 2001 From: raziel2001au <7069692+raziel2001au@users.noreply.github.com> Date: Sat, 27 Dec 2025 01:32:26 +1000 Subject: [PATCH] Separate dependencies for DGX OS devices (#610) --- dgx_instructions.md | 22 +++++++++++----------- dgx_requirements.txt | 41 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/dgx_instructions.md b/dgx_instructions.md index f9798382..06198aa8 100644 --- a/dgx_instructions.md +++ b/dgx_instructions.md @@ -12,39 +12,39 @@ This guide will assume you have a fresh installation of DGX OS, and will guide y Install the latest version of miniconda: ``` -$ wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh -$ chmod u+x Miniconda3-latest-Linux-aarch64.sh -$ ./Miniconda3-latest-Linux-aarch64.sh +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh +chmod u+x Miniconda3-latest-Linux-aarch64.sh +./Miniconda3-latest-Linux-aarch64.sh ``` Restart your bash or ssh session. If miniconda was installed successfully, it will automatically load the 'base' environment by default. If you want to disable this behaviour, run: ``` -$ conda config --set auto_activate_base false +conda config --set auto_activate_base false ``` Now you can create a Python 3.11 environment for ai-toolkit: ``` -$ conda create --name ai-toolkit python=3.11 +conda create --name ai-toolkit python=3.11 ``` Then activate the environment with: ``` -$ conda activate ai-toolkit +conda activate ai-toolkit ``` **2) Install PyTorch** ``` -$ pip3 install torch==2.9.1 torchvision==0.24.1 torchaudio==2.9.1 --index-url https://download.pytorch.org/whl/cu130 +pip3 install torch==2.9.1 torchvision==0.24.1 torchaudio==2.9.1 --index-url https://download.pytorch.org/whl/cu130 ``` **3) Install the remaining requirements (dgx_requirements.txt)** ``` -$ pip3 install -r dgx_requirements.txt +pip3 install -r dgx_requirements.txt ``` ### Running the UI on DGX OS: @@ -66,8 +66,8 @@ export PATH=“/opt/node-v24.11.1-linux-arm64/bin:$PATH” Change to the ui directory, then build and run the UI: ``` -$ cd ui -$ npm run build_and_start +cd ui +npm run build_and_start ``` If all went well, you’ll be able to access the UI on port 8675 and start training. @@ -78,7 +78,7 @@ If all went well, you’ll be able to access the UI on port 8675 and start train If you’re not getting any output when starting a training job from the UI, it’s probably crashing before the process started, the best way to debug these issues is to run the python training script directly (which is normally started by the UI). To do this, set up a training job in the UI, go to the advanced config screen, copy and paste the configuration into a file like train.yaml, then run the training script like this with the conda virtual environment active: ``` -$ python run.py path/to/train.yaml +python run.py path/to/train.yaml ```
\ No newline at end of file diff --git a/dgx_requirements.txt b/dgx_requirements.txt index b97cc586..41338a10 100644 --- a/dgx_requirements.txt +++ b/dgx_requirements.txt @@ -10,5 +10,42 @@ pywavelets==1.9.0 contourpy==1.3.3 opencv_python_headless==4.11.0.86 -# we include the base requirements.txt for the remaining dependencies: --r requirements.txt \ No newline at end of file +# additional dependencies matching requirements.txt: +torchao==0.10.0 +safetensors +git+https://github.com/huggingface/diffusers@6bf668c4d217ebc96065e673d8a257fd79950d34 +transformers==4.57.3 +lycoris-lora==1.8.3 +flatten_json +pyyaml +oyaml +tensorboard +kornia +invisible-watermark +einops +accelerate +toml +albumentations==1.4.15 +albucore==0.0.16 +pydantic +omegaconf +k-diffusion +open_clip_torch +timm +prodigyopt +controlnet_aux==0.0.10 +python-dotenv +bitsandbytes +hf_transfer +lpips +pytorch_fid +optimum-quanto==0.2.4 +sentencepiece +huggingface_hub +peft +gradio +python-slugify +opencv-python +pytorch-wavelets==1.3.0 +matplotlib==3.10.1 +setuptools==69.5.1 \ No newline at end of file