#
# Copyright 2022 DMetaSoul
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
ARG SPARK_RELEASE="tarball"
ARG METASPORE_RELEASE="http"
ARG INSTALL_SPARK_CLOUD="false"

FROM ubuntu:20.04 as env

ENV DEBIAN_FRONTEND=noninteractive
RUN echo "deb http://repo.huaweicloud.com/ubuntu/ focal main restricted" >/etc/apt/sources.list
RUN echo "deb http://repo.huaweicloud.com/ubuntu/ focal-updates main restricted" >>/etc/apt/sources.list
RUN echo "deb http://repo.huaweicloud.com/ubuntu/ focal universe" >>/etc/apt/sources.list
RUN echo "deb http://repo.huaweicloud.com/ubuntu/ focal-updates universe" >>/etc/apt/sources.list
RUN echo "deb http://repo.huaweicloud.com/ubuntu/ focal multiverse" >>/etc/apt/sources.list
RUN echo "deb http://repo.huaweicloud.com/ubuntu/ focal-updates multiverse" >>/etc/apt/sources.list
RUN echo "deb http://repo.huaweicloud.com/ubuntu/ focal-backports main restricted universe multiverse" >>/etc/apt/sources.list
RUN echo "deb http://repo.huaweicloud.com/ubuntu focal-security main restricted" >>/etc/apt/sources.list
RUN echo "deb http://repo.huaweicloud.com/ubuntu focal-security universe" >>/etc/apt/sources.list
RUN echo "deb http://repo.huaweicloud.com/ubuntu focal-security multiverse" >>/etc/apt/sources.list
RUN apt-get update && apt-get upgrade -y && apt-get clean
RUN apt-get install -y locales && apt-get clean
RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && \
    locale-gen
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US.UTF-8
ENV LC_CTYPE=en_US.UTF-8
ENV LC_ALL=en_US.UTF-8
ENV TZ=Asia/Shanghai
RUN ln -svf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && apt-get install -y tzdata && apt-get clean
RUN apt-get install -y python3 python3-venv python3-pip python-is-python3 ca-certificates openjdk-11-jre-headless curl wget git libgomp1 cron zip && apt-get clean
ENV JAVA_HOME=/usr

RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 30
RUN python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple/
RUN python -m pip config set globa.extra-index-url "https://pypi.tuna.tsinghua.edu.cn/simple/ https://pypi.org/simple/"

RUN apt-get install -y pkg-config uuid-dev libpulse-dev && \
    apt-get install -y tini libpam-modules krb5-user libnss3 procps && apt-get clean && \
    echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su

RUN python -m pip install --upgrade pip setuptools wheel && \
    python -m pip install numpy && \
    python -m pip install torch --index-url https://download.pytorch.org/whl/cpu && \
    python -m pip install onnx onnxmltools onnxruntime tabulate && \
    python -m pip cache purge && \
    echo OK: python

FROM env as metaspore_http_install
ARG METASPORE_WHEEL="metaspore==1.2.0"
RUN python -m pip install ${METASPORE_WHEEL} && pip cache purge

FROM env as metaspore_build_install
COPY --from=metaspore_build /opt/metaspore-build-release/metaspore-*.whl .
COPY --from=metaspore_build /lib/x86_64-linux-gnu/libstdc++.so.6.0.29 /lib/x86_64-linux-gnu
RUN ln -svf libstdc++.so.6.0.29 /lib/x86_64-linux-gnu/libstdc++.so.6
RUN python -m pip install metaspore-*.whl && pip cache purge && rm -f metaspore-*.whl

FROM metaspore_${METASPORE_RELEASE}_install as spark-tarball-install
ARG SPARK_FILE="https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/spark/spark-3.3.2-bin-dmetasoul.tgz"
# ARG SYNAPSEML_VERSION="0.10.1"
RUN mkdir -p /opt/spark && wget ${SPARK_FILE} && tar xf `basename ${SPARK_FILE}` -C /opt/spark --strip-components 1 && rm -f `basename ${SPARK_FILE}`
ENV SPARK_HOME /opt/spark
ENV SPARK_CONF_DIR /opt/spark/conf
ENV PATH=$SPARK_HOME/bin:$PATH
# ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/jars/synapseml_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-cognitive_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-core_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-deep-learning_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-lightgbm_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-opencv_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-vw_2.12-${SYNAPSEML_VERSION}.jar:$PYTHONPATH
ENV PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH

FROM metaspore_${METASPORE_RELEASE}_install as spark-pyspark-install
ARG SPARK_FILE="https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/spark/pyspark-3.3.2.tar.gz"
ARG SYNAPSEML_VERSION="0.10.1"
RUN python -m pip install ${SPARK_FILE} && pip cache purge
ENV SPARK_HOME /usr/local/lib/python3.8/dist-packages/pyspark
RUN mkdir -p /opt/spark/conf
ENV SPARK_CONF_DIR /opt/spark/conf
ENV PATH=$SPARK_HOME/bin:$PATH
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/jars/synapseml_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-cognitive_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-core_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-deep-learning_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-lightgbm_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-opencv_2.12-${SYNAPSEML_VERSION}.jar:$SPARK_HOME/jars/synapseml-vw_2.12-${SYNAPSEML_VERSION}.jar:$PYTHONPATH
COPY docker/ubuntu20.04/release-copy-deps-pom.xml docker/ubuntu20.04/maven-proxy-settings.xml .
ARG MAVEN_OPTS="-Xmx4g -XX:ReservedCodeCacheSize=1g"
ENV MAVEN_OPTS=${MAVEN_OPTS}
RUN mvn -f release-copy-deps-pom.xml -s maven-proxy-settings.xml org.apache.maven.plugins:maven-dependency-plugin:2.10:copy-dependencies -DincludeScope=runtime -Dsynapseml.version=$SYNAPSEML_VERSION -DskipTests -Dmaven.test.skip=true -Dactivate.spark-hadoop-cloud=${INSTALL_SPARK_CLOUD} -DoutputDirectory=${SPARK_HOME}/jars && rm -rf ~/.m2 maven-proxy-settings.xml release-copy-deps-pom.xml
ENV MAVEN_OPTS=

FROM spark-${SPARK_RELEASE}-install as release
RUN wget -O $SPARK_HOME/jars/lakesoul-spark-2.5.1-spark-3.3.jar https://github.com/lakesoul-io/LakeSoul/releases/download/v2.5.3/lakesoul-spark-3.3-2.5.3.jar
