Skip to content

Commit 55049c5

Browse files
Creating K8S only image
1 parent 7c29c66 commit 55049c5

File tree

2 files changed

+165
-0
lines changed

2 files changed

+165
-0
lines changed

docker_templates/Dockerfile.k8sOnly

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
FROM eclipse-temurin:17-jammy
18+
19+
ARG spark_uid=185
20+
ARG SPARK_TGZ_FILENAME
21+
22+
RUN groupadd --system --gid=${spark_uid} spark && \
23+
useradd --system --uid=${spark_uid} --gid=spark spark
24+
25+
RUN set -ex; \
26+
apt-get update; \
27+
apt-get install -y bash gnupg2 gosu krb5-user libc6 libnss-wrapper libnss3 libpam-modules net-tools procps tini; \
28+
mkdir -p /opt/spark; \
29+
mkdir /opt/spark/python; \
30+
mkdir -p /opt/spark/examples; \
31+
mkdir -p /opt/spark/work-dir; \
32+
chmod g+w /opt/spark/work-dir; \
33+
touch /opt/spark/RELEASE; \
34+
chown -R spark:spark /opt/spark; \
35+
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \
36+
rm -rf /var/lib/apt/lists/*
37+
38+
COPY ${SPARK_TGZ_FILENAME} /tmp/spark.tgz
39+
40+
RUN set -ex; \
41+
export SPARK_TMP="$(mktemp -d)"; \
42+
cd "$SPARK_TMP"; \
43+
tar -xf /tmp/spark.tgz --strip-components=1; \
44+
chown -R spark:spark .; \
45+
mv jars /opt/spark/; \
46+
mv RELEASE /opt/spark/; \
47+
mv bin /opt/spark/; \
48+
mv sbin /opt/spark/; \
49+
mv examples /opt/spark/; \
50+
ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \
51+
mv python/pyspark /opt/spark/python/pyspark/; \
52+
mv python/lib /opt/spark/python/lib/; \
53+
cd ..; \
54+
rm -rf "$SPARK_TMP";
55+
56+
COPY entrypoint.sh /opt/
57+
RUN chmod +x /opt/entrypoint.sh
58+
59+
ENV SPARK_HOME=/opt/spark
60+
61+
WORKDIR /opt/spark/work-dir
62+
63+
USER spark
64+
65+
ENTRYPOINT [ "/opt/entrypoint.sh" ]

makeSparkK8sOnlyImage.sh

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#!/usr/bin/env bash
2+
3+
#==============================================================================
4+
# Setup
5+
#==============================================================================
6+
set -e -x
7+
trap 'handle_error' ERR
8+
9+
handle_error() {
10+
set +e +x
11+
echo "An error occurred. Cleaning up and exiting..."
12+
exit 1
13+
}
14+
15+
function banner {
16+
set +x
17+
echo "+----------------------------------------------------"
18+
echo "| $1"
19+
echo "+----------------------------------------------------"
20+
set -x
21+
}
22+
23+
#==============================================================================
24+
# Configuration Variables (SET THESE!)
25+
#==============================================================================
26+
: "${VERSION:?ERROR: VERSION is not set (e.g., 3.5.5)}"
27+
: "${IMG_NAME:?ERROR: IMG_NAME is not set (e.g., my-spark-image)}"
28+
: "${DOCKER_IMG_ROOT:?ERROR: DOCKER_IMG_ROOT is not set (e.g., docker.io/myuser/...)}"
29+
PROJ_HOME="/Users/david.english/workspace/spark" # Your Spark source directory
30+
31+
# Define the base Dockerfile to fetch from apache/spark-docker (for entrypoint.sh etc.)
32+
SPARK_DOCKER_ORG="https://github.com/apache/spark-docker"
33+
SPARK_DOCKER_SUBDIR="master/${VERSION}/scala2.12-java17-ubuntu" # Common subdirectory for Dockerfile and entrypoint
34+
35+
# Location of your Dockerfile template (create this file as described above)
36+
DOCKERFILE_TEMPLATE_PATH="${PROJ_HOME}/docker_templates/Dockerfile.k8sOnly"
37+
38+
# Temporary directory for Docker build context
39+
DOCKER_BUILD_DIR="${PROJ_HOME}/docker_build_temp"
40+
41+
42+
#==============================================================================
43+
# Body
44+
#==============================================================================
45+
banner "Starting Spark Custom Image Build for $VERSION"
46+
47+
if [[ "$(pwd)" != "$PROJ_HOME" ]]; then
48+
echo "Error: You must run this script from $PROJ_HOME"
49+
exit 1
50+
fi
51+
52+
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
53+
if [ "$CURRENT_BRANCH" = "master" ]; then
54+
echo "Error: Cannot run this script on the 'master' branch. Please switch to a different branch."
55+
exit 1
56+
fi
57+
58+
# 1. Build Spark distribution LOCALLY
59+
banner "Building custom Spark distribution..."
60+
./dev/make-distribution.sh --name "$IMG_NAME-$VERSION" \
61+
--tgz \
62+
-Pkubernetes
63+
64+
SPARK_DIST_TGZ="$PROJ_HOME/spark-$VERSION-bin-$IMG_NAME-$VERSION.tgz"
65+
SPARK_TGZ_FILENAME=$(basename "$SPARK_DIST_TGZ") # Get just the filename
66+
67+
banner "Setting up temporary Docker build directory: $DOCKER_BUILD_DIR"
68+
rm -rf "$DOCKER_BUILD_DIR" # Clean up previous build dir
69+
mkdir -p "$DOCKER_BUILD_DIR"
70+
71+
# 2. Copy your pre-modified Dockerfile template to the build directory
72+
banner "Copying pre-modified Dockerfile template..."
73+
cp "$DOCKERFILE_TEMPLATE_PATH" "${DOCKER_BUILD_DIR}/Dockerfile"
74+
75+
# 3. Fetch necessary supporting files (like entrypoint.sh) from apache/spark-docker
76+
banner "Fetching official Spark entrypoint.sh and other supporting files..."
77+
curl -sSL "${SPARK_DOCKER_ORG}/raw/${SPARK_DOCKER_SUBDIR}/entrypoint.sh" -o "${DOCKER_BUILD_DIR}/entrypoint.sh"
78+
79+
80+
# 4. Copy your custom-built TGZ into the Docker build context
81+
banner "Copying custom Spark TGZ into Docker build context..."
82+
cp "$SPARK_DIST_TGZ" "$DOCKER_BUILD_DIR/"
83+
84+
# 5. Build the Docker image
85+
banner "Building Docker image..."
86+
docker buildx build --platform linux/amd64,linux/arm64 \
87+
--build-arg spark_uid=185 \
88+
--build-arg java_image_tag=17-jammy \
89+
--build-arg SPARK_TGZ_FILENAME="$SPARK_TGZ_FILENAME" \
90+
-t "$DOCKER_IMG_ROOT/$IMG_NAME:$VERSION" \
91+
--push "$DOCKER_BUILD_DIR" # Set the build context to your new directory
92+
93+
#==============================================================================
94+
# Cleanup
95+
#==============================================================================
96+
banner "Cleaning up temporary Docker build directory..."
97+
rm -rf "$DOCKER_BUILD_DIR" # Clean up the temporary directory
98+
99+
set +e +x
100+
banner 'Script completed successfully'

0 commit comments

Comments
 (0)