diff --git a/buildall b/buildall index adb08b8..f10b437 100755 --- a/buildall +++ b/buildall @@ -1,7 +1,37 @@ #!/bin/bash +# Build a minideb image for each supported dist +# +# First we build the image as a tarball, then we import it and tag it. +# +# However we aim to allow our images to be reproduced. That means +# we need more control over the import process. We also build and import +# each image twice to confirm that our images are still reproducible. +# +# To reproduce an image you have to: +# +# - Produce exactly the same base tarball. `mkimage` will take care of that +# for the same package inputs. +# - Import the image with the same config (`CMD` etc.) +# - Have the same creation date on the image. +# +# That last requirement leads us to some extra work to re-use timestamps. +# +# The steps are: +# +# 1. Pull image from Dockerhub and save creation date and image_id +# 2. Build image locally and import it, setting creation date to the pulled one +# 3. Build the image again and import it, also setting creation date to the pulled one +# 4. Compare the built image ids. Error if they are not the same (Docker thinks images are different, thanks to checksum) +# 5. Compare built image id with pulled image id. Both will have same creation date but may differ in checksum so ids may be different +# - If the image is the same as the pulled one then nothing changed in this build +# - If the image differs from the pulled one then: +# - Re-import the locally built image with the current timestamp so it will be shown as a new image +# - Tag the built image with the target tag, ready to push. + set -e set -u +set -o pipefail DISTS="jessie unstable @@ -13,19 +43,55 @@ GCR_BASENAME=gcr.io/bitnami-containers/minideb mkdir -p build +log() { + echo "$@" >&2 +} + for DIST in $DISTS; do [ -f debootstrap/$DIST ] || (echo "buildall: Unknown distribution: $DIST" && exit 1) - echo "============================================" - echo "Building $BASENAME:$DIST" - echo "============================================" + current_ts="$(date -u +%Y-%m-%dT%H:%M:%S.%NZ)" + if docker pull $BASENAME:$DIST > /dev/null; then + target_ts="$(docker inspect $BASENAME:$DIST | jq --raw-output ".[0].Created")" + pulled_image_id="$(docker inspect $BASENAME:$DIST | jq --raw-output ".[0].Id")" + else + target_ts="$current_ts" + pulled_image_id= + fi + log "============================================" + log "Building $BASENAME:$DIST" + log "============================================" ./mkimage build/$DIST.tar $DIST - IMPORTED=$(docker import --change "CMD /bin/bash" build/$DIST.tar) - echo "============================================" - echo "Running tests for $BASENAME:$DIST" - echo "============================================" - ./test $IMPORTED - docker tag $IMPORTED $BASENAME:$DIST - docker tag $IMPORTED $GCR_BASENAME:$DIST + built_image_id=$(./import build/$DIST.tar "$target_ts") + log "============================================" + log "Running tests for $BASENAME:$DIST" + log "============================================" + ./test $built_image_id + log "============================================" + log "Rebuilding $BASENAME:$DIST to test reproducibility" + log "============================================" + ./mkimage build/${DIST}-repro.tar $DIST + repro_image_id=$(./import build/${DIST}-repro.tar "$target_ts") + if [ "$repro_image_id" != "$built_image_id" ]; then + log "$BASENAME:$DIST differs after a rebuild. Examine $built_image_id and $repro_image_id" + log "to find the differences and fix the build to be reproducible again." + log "Running \`./dockerdiff $built_image_id $repro_image_id\` might be useful." + exit 1 + fi + rm build/${DIST}-repro.tar + if [ -n "$pulled_image_id" ]; then + if [ "$built_image_id" != "$pulled_image_id" ]; then + log "Image changed $built_image_id (new) != $pulled_image_id (old)" + # Re-import with the current timestamp so that the image shows + # as new + built_image_id="$(./import build/$DIST.tar "$current_ts")" + else + log "Image didn't change" + continue + fi + fi + docker tag $built_image_id $BASENAME:$DIST + docker tag $built_image_id $GCR_BASENAME:$DIST + log "Tagged $built_image_id as $BASENAME:$DIST $GCR_BASENAME:$DIST" done docker tag $BASENAME:$LATEST $BASENAME:latest docker tag $GCR_BASENAME:$LATEST $GCR_BASENAME:latest diff --git a/dockerdiff b/dockerdiff new file mode 100755 index 0000000..0e62464 --- /dev/null +++ b/dockerdiff @@ -0,0 +1,46 @@ +#!/bin/bash + +# Compare two docker images, reporting what changed. +# The script will exit 1 if there are differences between the images +# other than their tags. +# +# It will also try and show what the differences are, comparing +# - the image config +# - the installed dpkg packages +# - changed file metadata +# - changed file checksums + +set -e +set -u +set -o pipefail + +IMAGE1=$1 +IMAGE2=$2 + +inspect() { + docker inspect $1 | jq ".[0]|del(.RepoTags,.RepoDigests)" +} + +dpkgl() { + docker run --rm $1 dpkg -l +} + +lslr() { + docker run --rm $1 bash -c 'find / -xdev -not -path /proc -a -not -path /sys -print0 | sort -z | xargs -0 ls -ld' +} + +md5() { + docker run --rm $1 bash -c 'find / -xdev -not -path /etc/hosts -a -not -path /etc/hostname -a -type f -print0 | sort -z | xargs -0 md5sum' +} + +_diff() { + local cmd=$1 + diff -u --label $IMAGE1 --label $IMAGE2 <($cmd $IMAGE1) <($cmd $IMAGE2) +} + +if ! _diff inspect; then + _diff dpkgl || true + _diff lslr || true + _diff md5 || true + exit 1 +fi diff --git a/import b/import new file mode 100755 index 0000000..57580ea --- /dev/null +++ b/import @@ -0,0 +1,42 @@ +#!/bin/bash + +# Import a tarball as a docker image, specifying the desired image +# creation date. + +# This is useful as there's no other way to manipulate the creation +# date, and the date is part of the calculation of the image id. +# This means that the only way to reproduce an image is to specify +# the same timestamp. + +set -e +set -u +set -o pipefail + +CONF_TEMPLATE='{"architecture":"amd64","comment":"from Bitnami with love","config":{"Hostname":"","Domainname":"","User":"","AttachStdin":false,"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false,"StdinOnce":false,"Env":null,"Cmd":["/bin/bash"],"Image":"","Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":null,"Labels":null},"container_config":{"Hostname":"","Domainname":"","User":"","AttachStdin":false,"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false,"StdinOnce":false,"Env":null,"Cmd":null,"Image":"","Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":null,"Labels":null},"created":"%TIMESTAMP%","docker_version":"1.13.0","history":[{"created":"%TIMESTAMP%","comment":"from Bitnami with love"}],"os":"linux","rootfs":{"type":"layers","diff_ids":["sha256:%LAYERSUM%"]}}' +MANIFEST_TEMPLATE='[{"Config":"%CONF_SHA%.json","RepoTags":null,"Layers":["%LAYERSUM%/layer.tar"]}]' + +SOURCE=${1:?Specify the tarball to import} +TIMESTAMP=${2:?Specify the timestamp to use} + +import() { + local TDIR="$(mktemp -d)" + local LAYERSUM="$(sha256sum $SOURCE | awk '{print $1}')" + mkdir $TDIR/$LAYERSUM + cp $SOURCE $TDIR/$LAYERSUM/layer.tar + echo -n '1.0' > $TDIR/$LAYERSUM/VERSION + local CONF="$(echo -n "$CONF_TEMPLATE" | sed -e "s/%TIMESTAMP%/$TIMESTAMP/g" -e "s/%LAYERSUM%/$LAYERSUM/g")" + local CONF_SHA="$(echo -n "$CONF" | sha256sum | awk '{print $1}')" + echo -n "$CONF" > "$TDIR/${CONF_SHA}.json" + local MANIFEST="$(echo -n "$MANIFEST_TEMPLATE" | sed -e "s/%CONF_SHA%/$CONF_SHA/g" -e "s/%LAYERSUM%/$LAYERSUM/g")" + echo -n "$MANIFEST" > $TDIR/manifest.json + tar cf $TDIR/import.tar -C $TDIR manifest.json "${CONF_SHA}.json" "$LAYERSUM" + local ID=$(docker load -i $TDIR/import.tar | awk '{print $4}') + if [ "$ID" != "sha256:$CONF_SHA" ]; then + echo "Failed to load $ID correctly, expected id to be $CONF_SHA, source in $TDIR" >&2 + exit 1 + fi + rm -r "$TDIR" + echo $ID +} + +import diff --git a/mkimage b/mkimage index 339ae85..89be54f 100755 --- a/mkimage +++ b/mkimage @@ -1,6 +1,7 @@ #!/bin/bash set -e set -u +set -o pipefail ROOT=$(cd $(dirname $0) && pwd) @@ -40,10 +41,15 @@ fi chroot "$rootfsDir" apt-get update chroot "$rootfsDir" apt-get upgrade -y -o Dpkg::Options::="--force-confdef" -chroot "rootfsDir" dpkg -l | tee "$TARGET.manifest" +chroot "$rootfsDir" dpkg -l | tee "$TARGET.manifest" echo "Applying docker-specific tweaks" # These are copied from the docker contrib/mkimage/debootstrap script. +# Modifications: +# - remove `strings` check for applying the --force-unsafe-io tweak. +# This was sometimes wrongly detected as not applying, and we aren't +# interested in building versions that this guard would apply to, +# so simply apply the tweak unconditionally. # get path to "chroot" in our current PATH chrootPath="$(type -P chroot)" @@ -82,18 +88,15 @@ chmod +x "$rootfsDir/usr/sbin/policy-rc.d" # don't even have kernels installed rm -f "$rootfsDir/etc/apt/apt.conf.d/01autoremove-kernels" -# Ubuntu 10.04 sucks... :) -if strings "$rootfsDir/usr/bin/dpkg" | grep -q unsafe-io; then - # force dpkg not to call sync() after package extraction (speeding up installs) - echo >&2 "+ echo force-unsafe-io > '$rootfsDir/etc/dpkg/dpkg.cfg.d/docker-apt-speedup'" - cat > "$rootfsDir/etc/dpkg/dpkg.cfg.d/docker-apt-speedup" <<-'EOF' - # For most Docker users, package installs happen during "docker build", which - # doesn't survive power loss and gets restarted clean afterwards anyhow, so - # this minor tweak gives us a nice speedup (much nicer on spinning disks, - # obviously). - force-unsafe-io - EOF -fi +# force dpkg not to call sync() after package extraction (speeding up installs) +echo >&2 "+ echo force-unsafe-io > '$rootfsDir/etc/dpkg/dpkg.cfg.d/docker-apt-speedup'" +cat > "$rootfsDir/etc/dpkg/dpkg.cfg.d/docker-apt-speedup" <<-'EOF' +# For most Docker users, package installs happen during "docker build", which +# doesn't survive power loss and gets restarted clean afterwards anyhow, so +# this minor tweak gives us a nice speedup (much nicer on spinning disks, +# obviously). +force-unsafe-io +EOF if [ -d "$rootfsDir/etc/apt/apt.conf.d" ]; then # _keep_ us lean by effectively running "apt-get clean" after every install @@ -188,12 +191,24 @@ EOF chmod 0755 "$rootfsDir/usr/sbin/install_packages" +# Capture the most recent date that a package in the image was changed. +# We don't care about the particular date, or which package it comes from, +# we just need a date that isn't very far in the past. +BUILD_DATE="$(find $rootfsDir/usr/share/doc -name changelog.Debian.gz -exec dpkg-parsechangelog -SDate -l'{}' \; | xargs -l -i date --date="{}" +%s | sort -n | tail -n 1)" echo "Trimming down" for DIR in $DIRS_TO_TRIM; do rm -r "$rootfsDir/$DIR"/* done +# Remove the aux-cache as it isn't reproducible. It doesn't seem to +# cause any problems to remove it. +rm "$rootfsDir/var/cache/ldconfig/aux-cache" find "$rootfsDir/usr/share/doc" -mindepth 1 -not -name copyright -not -type d -delete find "$rootfsDir/usr/share/doc" -mindepth 1 -type d -empty -delete +# Set the mtime on all files to be no older than $BUILD_DATE. +# This is required to have the same metadata on files so that the +# same tarball is produced. We assume that it is not important +# that any file have a newer mtime than this. +find "$rootfsDir" -depth -newermt "@$BUILD_DATE" -print0 | xargs -0r touch --no-dereference --date="@$BUILD_DATE" echo "Total size" du -skh "$rootfsDir" echo "Package sizes"