diff --git a/frameworks/tensorrt-llm/1.2.0/Dockerfile b/frameworks/tensorrt-llm/1.2.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..2e5b77de0ca82098975888b8c94ed39fde3c310a --- /dev/null +++ b/frameworks/tensorrt-llm/1.2.0/Dockerfile @@ -0,0 +1,59 @@ +# syntax=docker/dockerfile:1 + +FROM opencloudos/opencloudos9-cuda-devel:12.8 + +LABEL maintainer="OpenCloudOS AI Community" +LABEL org.opencontainers.image.source="https://gitee.com/OpenCloudOS/ai-agent-container" +LABEL org.opencontainers.image.description="TensorRT-LLM 1.2.0 (GPU) on OpenCloudOS 9" + +# Install system dependencies +RUN --mount=type=cache,target=/var/cache/dnf \ + dnf install -y \ + python3.12 \ + python3.12-devel \ + openmpi \ + openmpi-devel \ + && dnf clean all + +# Set environment variables +ENV NVIDIA_VISIBLE_DEVICES=all +ENV PATH=/usr/lib64/openmpi/bin:$PATH +ENV LD_LIBRARY_PATH=/usr/lib64:/usr/lib64/cuda-compat-12-8:/usr/lib64/openmpi/lib:/usr/lib/python3.12/site-packages/nvidia/nvshmem/lib:/usr/lib64/cuda:/usr/lib64/cuda/lib64:/usr/local/cuda/lib64 + +# Configure pip: parallel downloads + prefer pre-built wheels +ENV PIP_CONCURRENT_DOWNLOADS=5 + +# Install pip for python3.12, then install tensorrt-llm from NVIDIA PyPI +RUN python3.12 -m ensurepip --upgrade \ + && python3.12 -m pip install --no-cache-dir --upgrade pip setuptools wheel + +RUN --mount=type=cache,target=/root/.cache/pip \ + for i in 1 2 3; do \ + python3.12 -m pip install --prefer-binary \ + nvidia-cublas \ + torch \ + tensorrt_llm==1.2.0 \ + --extra-index-url https://pypi.nvidia.com/ \ + && break || (echo "pip install attempt $i failed, retrying in 10s..." && sleep 10); \ + done; \ + python3.12 -m pip show tensorrt_llm > /dev/null || { echo "ERROR: tensorrt_llm not installed after 3 attempts"; exit 1; } + +# Register nvidia pip package libraries (CUDA 13 runtime libs from pip) +RUN touch /etc/ld.so.conf.d/nvidia-pip.conf \ + && for dir in $(find / -path "*/nvidia/*/lib" -type d 2>/dev/null); do \ + echo "$dir" >> /etc/ld.so.conf.d/nvidia-pip.conf; \ + done \ + && cat /etc/ld.so.conf.d/nvidia-pip.conf \ + && ldconfig + +# Fix: OpenCloudOS Python reports version "3.12.12+" which breaks packaging.version.parse +RUN DATASETS_CFG="$(python3.12 -c 'import importlib.util as u; s=u.find_spec("datasets"); print(s.submodule_search_locations[0]+"/config.py") if s else ""')" \ + && if [ -n "$DATASETS_CFG" ]; then \ + sed -i "s/platform.python_version()/platform.python_version().split('+')[0]/" "$DATASETS_CFG"; \ + else \ + echo "datasets not installed, skipping version fix"; \ + fi + +RUN echo $(date +"%Y-%m-%dT%H:%M:%S%z") > /opencloudos_build_date.txt + +CMD ["python3.12"] diff --git a/frameworks/tensorrt-llm/1.2.0/README.md b/frameworks/tensorrt-llm/1.2.0/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cd13a639bc3d3095e72c4cdbbe21474360143711 --- /dev/null +++ b/frameworks/tensorrt-llm/1.2.0/README.md @@ -0,0 +1,54 @@ +# TensorRT-LLM 1.2.0 on OpenCloudOS 9 + +## 基本信息 +- **框架版本**:1.2.0 +- **基础镜像**:opencloudos/opencloudos9-cuda-devel:12.8 +- **Python 版本**:3.12 +- **CUDA 版本**:12.8(运行时依赖 CUDA 13.x 库由 pip 自动安装) + +## 构建 + +```bash +docker build -t oc9-tensorrtllm:1.2.0 . +``` + +## 使用示例 + +### 验证安装 + +```bash +docker run --rm --gpus all oc9-tensorrtllm:1.2.0 \ + python3 -c "import tensorrt_llm; print(tensorrt_llm.__version__)" +``` + +### 启动推理服务 + +```bash +# 1. 下载模型权重 +sudo mkdir -p /models +sudo git clone https://www.modelscope.cn/Qwen/Qwen2.5-7B-Instruct.git /models/Qwen2.5-7B-Instruct + +# 2. 启动容器 +docker run -itd --name tensorrt_serving \ + --gpus all \ + -p 8000:8000 \ + -v /models/Qwen2.5-7B-Instruct:/models/Qwen2.5-7B-Instruct \ + oc9-tensorrtllm:1.2.0 tail -f /dev/null + +# 3. 容器内启动推理服务 +docker exec -it tensorrt_serving bash -c \ + "trtllm-serve /models/Qwen2.5-7B-Instruct --host 0.0.0.0 --port 8000" + +# 4. 访问服务 +curl http://localhost:8000/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "Qwen2.5-7B-Instruct", + "messages": [{"role":"user","content":"你好"}], + "max_tokens": 128 + }' +``` + +## 已知问题 +- 构建时间较长(需从源码编译 TensorRT-LLM) +- 需要 NVIDIA Driver >= 535,建议使用较新的 GPU 驱动以兼容 CUDA 13.x 运行时 diff --git a/frameworks/tensorrt-llm/1.2.0/build.conf b/frameworks/tensorrt-llm/1.2.0/build.conf new file mode 100644 index 0000000000000000000000000000000000000000..ec5c4e30984b9557342c883ac7d47b1a5dbbcc01 --- /dev/null +++ b/frameworks/tensorrt-llm/1.2.0/build.conf @@ -0,0 +1,4 @@ +# TensorRT-LLM 1.2.0 on OpenCloudOS 9 (GPU) +IMAGE_NAME=oc9-tensorrtllm +IMAGE_TAG=1.2.0 +GPU_TEST=true diff --git a/frameworks/tensorrt-llm/1.2.0/test.sh b/frameworks/tensorrt-llm/1.2.0/test.sh new file mode 100755 index 0000000000000000000000000000000000000000..a4ee1389f93ea79cbd547c90638ac9bdd36335c4 --- /dev/null +++ b/frameworks/tensorrt-llm/1.2.0/test.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -e + +IMAGE="${1:?ERROR: 缺少镜像参数。用法: bash test.sh }" + +echo "=== TensorRT-LLM 1.2.0 镜像功能测试 ===" + +# 1. Verify CUDA environment +echo -n "检查 CUDA 环境... " +docker run --rm --gpus all "$IMAGE" python3.12 -c " +import torch +assert torch.cuda.is_available(), 'CUDA not available' +print(f'GPU: {torch.cuda.get_device_name(0)}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +# 2. Verify tensorrt_llm import +echo -n "检查 tensorrt_llm import... " +docker run --rm --gpus all "$IMAGE" python3.12 -c " +import tensorrt_llm +print(f'tensorrt_llm version: {tensorrt_llm.__version__}') +assert tensorrt_llm.__version__ == '1.2.0', f'version mismatch: {tensorrt_llm.__version__}' +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +# 3. Verify trtllm-serve command +echo -n "检查 trtllm-serve 命令... " +docker run --rm --gpus all "$IMAGE" python3.12 -c " +import subprocess, sys +result = subprocess.run(['trtllm-serve', '--help'], capture_output=True, text=True) +if result.returncode == 0: + print('trtllm-serve 命令可用') +else: + print(f'trtllm-serve 异常: {result.stderr[:200]}') + sys.exit(1) +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +# 4. Verify TensorRT version +echo -n "检查 TensorRT 版本... " +docker run --rm --gpus all "$IMAGE" python3.12 -c " +import tensorrt as trt +print(f'TensorRT version: {trt.__version__}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +# 5. Verify key dependencies +echo -n "检查关键依赖... " +docker run --rm --gpus all "$IMAGE" python3.12 -c " +import torch, transformers, numpy as np +print(f'PyTorch: {torch.__version__}') +print(f'Transformers: {transformers.__version__}') +print(f'NumPy: {np.__version__}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo "=== 所有测试通过 ===" diff --git a/frameworks/tensorrt-llm/1.2.0/test_result.png b/frameworks/tensorrt-llm/1.2.0/test_result.png new file mode 100644 index 0000000000000000000000000000000000000000..f8e5e6bde61b6566ae639765fc8c913ff3aeeb4a Binary files /dev/null and b/frameworks/tensorrt-llm/1.2.0/test_result.png differ