mirror of
https://github.com/bitnami/minideb.git
synced 2026-06-04 10:13:55 +08:00
Make the image build reproducible
Change the mkimage script so that the produced image is
reproducible. This involves:
- removing the ldconfig aux-cache as it changes on every build.
- set the mtimes of the files to a specific date so that the
resulting tar file will have the same contents.
- The `strings` guard around the unsafe-io tweak seems to be
non-deterministic. It was sometimes not adding the tweak
for the same file. Remove it as we don't care about older
than jessie.
- Importing the image by constructing a docker image with
a specific timestamp and doing `docker load`.
Also change the buildall script to build each image twice and
confirm that the same tarball is produced, and that results
in the layers in the imported images matching.
Add a dockerdiff script that checks that two images are equivalent,
and tries to show the differences if not. This is useful when the
build script reports differences, as it can point to what the
differences are.
This commit is contained in:
committed by
James Westby
parent
6befeedf99
commit
44030c910b
@@ -1,7 +1,37 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Build a minideb image for each supported dist
|
||||
#
|
||||
# First we build the image as a tarball, then we import it and tag it.
|
||||
#
|
||||
# However we aim to allow our images to be reproduced. That means
|
||||
# we need more control over the import process. We also build and import
|
||||
# each image twice to confirm that our images are still reproducible.
|
||||
#
|
||||
# To reproduce an image you have to:
|
||||
#
|
||||
# - Produce exactly the same base tarball. `mkimage` will take care of that
|
||||
# for the same package inputs.
|
||||
# - Import the image with the same config (`CMD` etc.)
|
||||
# - Have the same creation date on the image.
|
||||
#
|
||||
# That last requirement leads us to some extra work to re-use timestamps.
|
||||
#
|
||||
# The steps are:
|
||||
#
|
||||
# 1. Pull image from Dockerhub and save creation date and image_id
|
||||
# 2. Build image locally and import it, setting creation date to the pulled one
|
||||
# 3. Build the image again and import it, also setting creation date to the pulled one
|
||||
# 4. Compare the built image ids. Error if they are not the same (Docker thinks images are different, thanks to checksum)
|
||||
# 5. Compare built image id with pulled image id. Both will have same creation date but may differ in checksum so ids may be different
|
||||
# - If the image is the same as the pulled one then nothing changed in this build
|
||||
# - If the image differs from the pulled one then:
|
||||
# - Re-import the locally built image with the current timestamp so it will be shown as a new image
|
||||
# - Tag the built image with the target tag, ready to push.
|
||||
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
DISTS="jessie
|
||||
unstable
|
||||
@@ -13,19 +43,55 @@ GCR_BASENAME=gcr.io/bitnami-containers/minideb
|
||||
|
||||
mkdir -p build
|
||||
|
||||
log() {
|
||||
echo "$@" >&2
|
||||
}
|
||||
|
||||
for DIST in $DISTS; do
|
||||
[ -f debootstrap/$DIST ] || (echo "buildall: Unknown distribution: $DIST" && exit 1)
|
||||
echo "============================================"
|
||||
echo "Building $BASENAME:$DIST"
|
||||
echo "============================================"
|
||||
current_ts="$(date -u +%Y-%m-%dT%H:%M:%S.%NZ)"
|
||||
if docker pull $BASENAME:$DIST > /dev/null; then
|
||||
target_ts="$(docker inspect $BASENAME:$DIST | jq --raw-output ".[0].Created")"
|
||||
pulled_image_id="$(docker inspect $BASENAME:$DIST | jq --raw-output ".[0].Id")"
|
||||
else
|
||||
target_ts="$current_ts"
|
||||
pulled_image_id=
|
||||
fi
|
||||
log "============================================"
|
||||
log "Building $BASENAME:$DIST"
|
||||
log "============================================"
|
||||
./mkimage build/$DIST.tar $DIST
|
||||
IMPORTED=$(docker import --change "CMD /bin/bash" build/$DIST.tar)
|
||||
echo "============================================"
|
||||
echo "Running tests for $BASENAME:$DIST"
|
||||
echo "============================================"
|
||||
./test $IMPORTED
|
||||
docker tag $IMPORTED $BASENAME:$DIST
|
||||
docker tag $IMPORTED $GCR_BASENAME:$DIST
|
||||
built_image_id=$(./import build/$DIST.tar "$target_ts")
|
||||
log "============================================"
|
||||
log "Running tests for $BASENAME:$DIST"
|
||||
log "============================================"
|
||||
./test $built_image_id
|
||||
log "============================================"
|
||||
log "Rebuilding $BASENAME:$DIST to test reproducibility"
|
||||
log "============================================"
|
||||
./mkimage build/${DIST}-repro.tar $DIST
|
||||
repro_image_id=$(./import build/${DIST}-repro.tar "$target_ts")
|
||||
if [ "$repro_image_id" != "$built_image_id" ]; then
|
||||
log "$BASENAME:$DIST differs after a rebuild. Examine $built_image_id and $repro_image_id"
|
||||
log "to find the differences and fix the build to be reproducible again."
|
||||
log "Running \`./dockerdiff $built_image_id $repro_image_id\` might be useful."
|
||||
exit 1
|
||||
fi
|
||||
rm build/${DIST}-repro.tar
|
||||
if [ -n "$pulled_image_id" ]; then
|
||||
if [ "$built_image_id" != "$pulled_image_id" ]; then
|
||||
log "Image changed $built_image_id (new) != $pulled_image_id (old)"
|
||||
# Re-import with the current timestamp so that the image shows
|
||||
# as new
|
||||
built_image_id="$(./import build/$DIST.tar "$current_ts")"
|
||||
else
|
||||
log "Image didn't change"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
docker tag $built_image_id $BASENAME:$DIST
|
||||
docker tag $built_image_id $GCR_BASENAME:$DIST
|
||||
log "Tagged $built_image_id as $BASENAME:$DIST $GCR_BASENAME:$DIST"
|
||||
done
|
||||
docker tag $BASENAME:$LATEST $BASENAME:latest
|
||||
docker tag $GCR_BASENAME:$LATEST $GCR_BASENAME:latest
|
||||
|
||||
Executable
+46
@@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Compare two docker images, reporting what changed.
|
||||
# The script will exit 1 if there are differences between the images
|
||||
# other than their tags.
|
||||
#
|
||||
# It will also try and show what the differences are, comparing
|
||||
# - the image config
|
||||
# - the installed dpkg packages
|
||||
# - changed file metadata
|
||||
# - changed file checksums
|
||||
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
IMAGE1=$1
|
||||
IMAGE2=$2
|
||||
|
||||
inspect() {
|
||||
docker inspect $1 | jq ".[0]|del(.RepoTags,.RepoDigests)"
|
||||
}
|
||||
|
||||
dpkgl() {
|
||||
docker run --rm $1 dpkg -l
|
||||
}
|
||||
|
||||
lslr() {
|
||||
docker run --rm $1 bash -c 'find / -xdev -not -path /proc -a -not -path /sys -print0 | sort -z | xargs -0 ls -ld'
|
||||
}
|
||||
|
||||
md5() {
|
||||
docker run --rm $1 bash -c 'find / -xdev -not -path /etc/hosts -a -not -path /etc/hostname -a -type f -print0 | sort -z | xargs -0 md5sum'
|
||||
}
|
||||
|
||||
_diff() {
|
||||
local cmd=$1
|
||||
diff -u --label $IMAGE1 --label $IMAGE2 <($cmd $IMAGE1) <($cmd $IMAGE2)
|
||||
}
|
||||
|
||||
if ! _diff inspect; then
|
||||
_diff dpkgl || true
|
||||
_diff lslr || true
|
||||
_diff md5 || true
|
||||
exit 1
|
||||
fi
|
||||
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Import a tarball as a docker image, specifying the desired image
|
||||
# creation date.
|
||||
|
||||
# This is useful as there's no other way to manipulate the creation
|
||||
# date, and the date is part of the calculation of the image id.
|
||||
# This means that the only way to reproduce an image is to specify
|
||||
# the same timestamp.
|
||||
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
CONF_TEMPLATE='{"architecture":"amd64","comment":"from Bitnami with love","config":{"Hostname":"","Domainname":"","User":"","AttachStdin":false,"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false,"StdinOnce":false,"Env":null,"Cmd":["/bin/bash"],"Image":"","Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":null,"Labels":null},"container_config":{"Hostname":"","Domainname":"","User":"","AttachStdin":false,"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false,"StdinOnce":false,"Env":null,"Cmd":null,"Image":"","Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":null,"Labels":null},"created":"%TIMESTAMP%","docker_version":"1.13.0","history":[{"created":"%TIMESTAMP%","comment":"from Bitnami with love"}],"os":"linux","rootfs":{"type":"layers","diff_ids":["sha256:%LAYERSUM%"]}}'
|
||||
MANIFEST_TEMPLATE='[{"Config":"%CONF_SHA%.json","RepoTags":null,"Layers":["%LAYERSUM%/layer.tar"]}]'
|
||||
|
||||
SOURCE=${1:?Specify the tarball to import}
|
||||
TIMESTAMP=${2:?Specify the timestamp to use}
|
||||
|
||||
import() {
|
||||
local TDIR="$(mktemp -d)"
|
||||
local LAYERSUM="$(sha256sum $SOURCE | awk '{print $1}')"
|
||||
mkdir $TDIR/$LAYERSUM
|
||||
cp $SOURCE $TDIR/$LAYERSUM/layer.tar
|
||||
echo -n '1.0' > $TDIR/$LAYERSUM/VERSION
|
||||
local CONF="$(echo -n "$CONF_TEMPLATE" | sed -e "s/%TIMESTAMP%/$TIMESTAMP/g" -e "s/%LAYERSUM%/$LAYERSUM/g")"
|
||||
local CONF_SHA="$(echo -n "$CONF" | sha256sum | awk '{print $1}')"
|
||||
echo -n "$CONF" > "$TDIR/${CONF_SHA}.json"
|
||||
local MANIFEST="$(echo -n "$MANIFEST_TEMPLATE" | sed -e "s/%CONF_SHA%/$CONF_SHA/g" -e "s/%LAYERSUM%/$LAYERSUM/g")"
|
||||
echo -n "$MANIFEST" > $TDIR/manifest.json
|
||||
tar cf $TDIR/import.tar -C $TDIR manifest.json "${CONF_SHA}.json" "$LAYERSUM"
|
||||
local ID=$(docker load -i $TDIR/import.tar | awk '{print $4}')
|
||||
if [ "$ID" != "sha256:$CONF_SHA" ]; then
|
||||
echo "Failed to load $ID correctly, expected id to be $CONF_SHA, source in $TDIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
rm -r "$TDIR"
|
||||
echo $ID
|
||||
}
|
||||
|
||||
import
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
ROOT=$(cd $(dirname $0) && pwd)
|
||||
|
||||
@@ -40,10 +41,15 @@ fi
|
||||
chroot "$rootfsDir" apt-get update
|
||||
chroot "$rootfsDir" apt-get upgrade -y -o Dpkg::Options::="--force-confdef"
|
||||
|
||||
chroot "rootfsDir" dpkg -l | tee "$TARGET.manifest"
|
||||
chroot "$rootfsDir" dpkg -l | tee "$TARGET.manifest"
|
||||
|
||||
echo "Applying docker-specific tweaks"
|
||||
# These are copied from the docker contrib/mkimage/debootstrap script.
|
||||
# Modifications:
|
||||
# - remove `strings` check for applying the --force-unsafe-io tweak.
|
||||
# This was sometimes wrongly detected as not applying, and we aren't
|
||||
# interested in building versions that this guard would apply to,
|
||||
# so simply apply the tweak unconditionally.
|
||||
|
||||
# get path to "chroot" in our current PATH
|
||||
chrootPath="$(type -P chroot)"
|
||||
@@ -82,18 +88,15 @@ chmod +x "$rootfsDir/usr/sbin/policy-rc.d"
|
||||
# don't even have kernels installed
|
||||
rm -f "$rootfsDir/etc/apt/apt.conf.d/01autoremove-kernels"
|
||||
|
||||
# Ubuntu 10.04 sucks... :)
|
||||
if strings "$rootfsDir/usr/bin/dpkg" | grep -q unsafe-io; then
|
||||
# force dpkg not to call sync() after package extraction (speeding up installs)
|
||||
echo >&2 "+ echo force-unsafe-io > '$rootfsDir/etc/dpkg/dpkg.cfg.d/docker-apt-speedup'"
|
||||
cat > "$rootfsDir/etc/dpkg/dpkg.cfg.d/docker-apt-speedup" <<-'EOF'
|
||||
# For most Docker users, package installs happen during "docker build", which
|
||||
# doesn't survive power loss and gets restarted clean afterwards anyhow, so
|
||||
# this minor tweak gives us a nice speedup (much nicer on spinning disks,
|
||||
# obviously).
|
||||
force-unsafe-io
|
||||
EOF
|
||||
fi
|
||||
# force dpkg not to call sync() after package extraction (speeding up installs)
|
||||
echo >&2 "+ echo force-unsafe-io > '$rootfsDir/etc/dpkg/dpkg.cfg.d/docker-apt-speedup'"
|
||||
cat > "$rootfsDir/etc/dpkg/dpkg.cfg.d/docker-apt-speedup" <<-'EOF'
|
||||
# For most Docker users, package installs happen during "docker build", which
|
||||
# doesn't survive power loss and gets restarted clean afterwards anyhow, so
|
||||
# this minor tweak gives us a nice speedup (much nicer on spinning disks,
|
||||
# obviously).
|
||||
force-unsafe-io
|
||||
EOF
|
||||
|
||||
if [ -d "$rootfsDir/etc/apt/apt.conf.d" ]; then
|
||||
# _keep_ us lean by effectively running "apt-get clean" after every install
|
||||
@@ -188,12 +191,24 @@ EOF
|
||||
chmod 0755 "$rootfsDir/usr/sbin/install_packages"
|
||||
|
||||
|
||||
# Capture the most recent date that a package in the image was changed.
|
||||
# We don't care about the particular date, or which package it comes from,
|
||||
# we just need a date that isn't very far in the past.
|
||||
BUILD_DATE="$(find $rootfsDir/usr/share/doc -name changelog.Debian.gz -exec dpkg-parsechangelog -SDate -l'{}' \; | xargs -l -i date --date="{}" +%s | sort -n | tail -n 1)"
|
||||
echo "Trimming down"
|
||||
for DIR in $DIRS_TO_TRIM; do
|
||||
rm -r "$rootfsDir/$DIR"/*
|
||||
done
|
||||
# Remove the aux-cache as it isn't reproducible. It doesn't seem to
|
||||
# cause any problems to remove it.
|
||||
rm "$rootfsDir/var/cache/ldconfig/aux-cache"
|
||||
find "$rootfsDir/usr/share/doc" -mindepth 1 -not -name copyright -not -type d -delete
|
||||
find "$rootfsDir/usr/share/doc" -mindepth 1 -type d -empty -delete
|
||||
# Set the mtime on all files to be no older than $BUILD_DATE.
|
||||
# This is required to have the same metadata on files so that the
|
||||
# same tarball is produced. We assume that it is not important
|
||||
# that any file have a newer mtime than this.
|
||||
find "$rootfsDir" -depth -newermt "@$BUILD_DATE" -print0 | xargs -0r touch --no-dereference --date="@$BUILD_DATE"
|
||||
echo "Total size"
|
||||
du -skh "$rootfsDir"
|
||||
echo "Package sizes"
|
||||
|
||||
Reference in New Issue
Block a user