# syntax = edrevo/dockerfile-plus
# ^^^ this line enables the INCLUDE+ directive

FROM nvcr.io/nvidia/pytorch:23.10-py3

INCLUDE+ cuda-settings.dockerfile
INCLUDE+ common.dockerfile

ENV NVTE_FRAMEWORK=pytorch

COPY vllm-requirements.txt /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt

# Uninstall the transformer engine that comes with the base image.
# Otherwise it will cause error when importing vLLM (LLAVA models).
RUN pip uninstall -y transformer_engine 

# crashes docker?
# RUN pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers

# takes forever!
# RUN pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
# RUN pip install typing_extensions==4.5.0
# RUN pip install -U flash-attn

# RUN pip install torch==2.1.0 nvidia-cuda-runtime
# the .so file seems to be missing
RUN ln -s /usr/local/lib/python3.10/dist-packages/nvidia/cuda_runtime/lib/libcudart.so{.12,}

# perf tool
RUN apt-get install -y linux-tools-`uname -r`

RUN source /usr/local/nvm/nvm.sh && npm install -g yarn
