Skip to content

Commit

Permalink
simplifying docker build (#17)
Browse files Browse the repository at this point in the history
* simplifying docker build
* demo on pythonanywhere.com
* upgrade to duckdb 0.10.0 to fix cve
* fix cve
HIGH CVE-2024-24762 --   pkg:pypi/fastapi@0.101.0
HIGH CVE-2024-26130 -- pkg:pypi/cryptography@42.0.1
HIGH CVE-2024-24762 --  pkg:pypi/starlette@0.27.0

* fix cve
HIGH CVE-2024-24762 -- pkg:pypi/starlette@0.35.1

* fix cve
CRITICAL CVE-2024-22682 --pkg:pypi/duckdb@0.9.0

* upgrade duckdb
CVE-2024-22682--pkg:pypi/duckdb@0.9.0

* o-updated readme
o-configuring secrets no longer required due to presence of secretmanager in duckdb

* build arm64

---------

Co-authored-by: vishnu chanderraju <vishnu.ch@cuezen.com>
  • Loading branch information
jaihind213 and vishnu chanderraju committed Mar 11, 2024
1 parent d936f6c commit 9d3022f
Show file tree
Hide file tree
Showing 13 changed files with 229 additions and 255 deletions.
42 changes: 18 additions & 24 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,20 @@ on:

env:
IMAGE_NAME: jaihind213/radio-duck
PYTHON_VERSION: "3.10"
PLATFORM: "linux/amd64,linux/arm64"

permissions:
contents: read
pull-requests: write

jobs:
qemu:
runs-on: ubuntu-latest
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
build:

runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -56,32 +62,19 @@ jobs:
- name: Docker build image
run: |
echo ${{ secrets.DHUB }} | docker login --username ${{ secrets.DHUB_USER }} --password-stdin
export PUSH_LATEST_TAG=yes
DOCKER_VERSION=`cat pyproject.toml |grep version |grep -v python|grep -v target | awk '{print $3}' |sed -e "s/\"//g"`
export DOCKERFILE_VERSION=`sha256sum Dockerfile | cut -c 1-8`
DUCKDB_VERSION=`cat pyproject.toml |grep 'duckdb =' |grep -v target | awk '{print $3}' |sed -e "s/\"//g"`
export PUSH_REPO=local
bash -x buildDocker.sh $DOCKER_VERSION $DUCKDB_VERSION
#why we issue build twice ?-> bug-> https://github.com/docker/buildx/issues/59, one is build and one is to load to local registry
#export PUSH_REPO=remote
#sh buildDocker.sh $DOCKER_VERSION $DUCKDB_VERSION
PROJECT_VERSION=`cat pyproject.toml |grep 'version =' |grep -v target | awk '{print $3}' |sed -e "s/\"//g"`
export PLATFORM="linux/amd64" #for docker scout, use one platform, as multiple platform not allowed with --load
bash buildDocker.sh $PROJECT_VERSION $DUCKDB_VERSION local no
IMAGE_VERSION=`cat /tmp/version`
DOCKER_DUCKX_IMAGE_VERSION=`cat /tmp/duckdbx_version`
echo "IMAGE_VERSION is $IMAGE_VERSION"
echo "IMAGE_VERSION=$IMAGE_VERSION" >> $GITHUB_ENV
echo "DOCKER_DUCKX_IMAGE_VERSION=$DOCKER_DUCKX_IMAGE_VERSION" >> $GITHUB_ENV
echo "-------------------------------"
docker pull $IMAGE_NAME:$IMAGE_VERSION
#IMAGE_TAG=${IMAGE_NAME}:${DOCKER_VERSION}
#echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV
#echo "IMAGE_TAG is $IMAGE_TAG"
#docker buildx build --platform=linux/x86_64,linux/amd64 -t $IMAGE_TAG -t $IMAGE_NAME:latest .
#docker buildx build --output type=docker -t $IMAGE_TAG -t $IMAGE_NAME:latest .
#why we issue build twice ?-> bug-> https://github.com/docker/buildx/issues/59, one is build and one is to load to local registry
# todo: linux/arm64 takes a long time
echo "PYTHON_VERSION=$PYTHON_VERSION" >> $GITHUB_ENV
echo "DUCKDB_VERSION=$DUCKDB_VERSION" >> $GITHUB_ENV
#docker pull --platform ${{ env.PLATFORM }} $IMAGE_NAME:$IMAGE_VERSION
- name: Docker Scout
id: docker-scout
if: ${{ github.event_name == 'pull_request' }}
Expand All @@ -108,10 +101,11 @@ jobs:
uses: docker/build-push-action@v2
with:
context: .
platforms: linux/x86_64,linux/amd64 # todo: linux/arm64 takes a long time
platforms: ${{ env.PLATFORM }} # todo: linux/arm64 takes a long time
push: true
build-args: DOCKER_DUCKX_IMAGE_VERSION=${{ env.DOCKER_DUCKX_IMAGE_VERSION}}
build-args: PYTHON_VERSION=${{ env.PYTHON_VERSION}}
tags: ${{ env.IMAGE_NAME }}:latest,${{ env.IMAGE_NAME }}:${{ env.IMAGE_VERSION }}
labels: ${{ steps.meta.outputs.labels }},${{ env.DUCKDB_VERSION }}
- name: Docker Hub Description
if: (github.event_name == 'push' && github.ref == 'refs/heads/main') || github.event_name == 'workflow_dispatch'
uses: peter-evans/dockerhub-description@v3
Expand Down
41 changes: 30 additions & 11 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,46 @@
# Use an official Python runtime as a parent image
ARG DOCKER_DUCKX_IMAGE_VERSION
FROM jaihind213/duckdbx:${DOCKER_DUCKX_IMAGE_VERSION}
FROM ubuntu:22.04
LABEL maintainer="jaihind213@gmail.com"

USER 0
RUN mkdir -p /radio-duck
WORKDIR /radio-duck
ARG PYTHON_VERSION=3.10
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8

RUN useradd -m -d /home/radio-duck radio-duck
WORKDIR /home/radio-duck
RUN mkdir "pond"

RUN apt update && apt autoclean && apt install -y software-properties-common && apt update && add-apt-repository ppa:apt-fast/stable -y && add-apt-repository ppa:deadsnakes/ppa && apt update && apt install -y apt-fast
RUN apt-fast -y install linux-libc-dev libssl-dev
#RUN apt-fast -y install gcc git g++ libblis64-3-pthread
RUN apt-fast -y install libblis64-3-pthread

RUN apt-fast update && apt-fast install -y python${PYTHON_VERSION}
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1

#for psutil need to do python3.10-dev u need it later when u do '' pip3 install -r requirements-dev.txt''
RUN apt-fast -y install python${PYTHON_VERSION}-dev
RUN apt-fast -y install curl && curl -sS https://bootstrap.pypa.io/get-pip.py | python3 && apt-fast -y remove curl

# Copy the current directory contents into the container at /app
COPY . /radio-duck/
COPY default.ini /radio-duck/default.ini
COPY *.py /home/radio-duck
COPY requirements.txt /home/radio-duck
COPY default.ini /home/radio-duck/default.ini
# Install any needed packages specified in requirements.txt
RUN pip3 install --no-cache-dir -r requirements.txt

RUN --mount=type=secret,id=duck_sekrets
#RUN rm -f /etc/ssl/certs/ca-bundle.crt && apt update && apt install --reinstall ca-certificates && update-ca-certificates
RUN echo "base docker image version: $DOCKER_DUCKX_IMAGE_VERSION" >> base_docker_image_version

RUN mkdir "/quack"
RUN echo "import duckdb; duckdb.query('install httpfs; load httpfs; install azure; load azure;');" >> /quack/test.py && echo "" >> /quack/test.py
RUN cd /quack && python3 test.py && cd -

#RUN echo http://dl-cdn.alpinelinux.org/alpine/edge/main >> /etc/apk/repositories
#RUN apk update && apk add --no-cache zlib-dev==1.3-r2 #->for docker CRITICAL CVE-2023-45853
# Make port 8000 available to the world outside this container
EXPOSE 8000

# Run server.py when the container launches
CMD ["python3", "/radio-duck/server.py"]
CMD ["python3", "/home/radio-duck/server.py"]

VOLUME /radio-duck/pond
VOLUME /home/radio-duck/pond
50 changes: 13 additions & 37 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,49 +85,25 @@ Then access http://localhost:8000/docs and play with the api.

### Configure RadioDuck with your secret credentials to access Azure blob storage / Aws s3

Create a file with your secrets
```text
#for duckdb specific secrets, your key should have duck_ prefix.
duck_s3_access_key_id=<your_aws_access_key_value>
duck_s3_secret_access_key=<your_aws_secret_key_value>
duck_azure_storage_connection_string=DefaultEndpointsProtocol=https;AccountName=<account_name>;AccountKey=<account_key>;EndpointSuffix=core.windows.net
```
now provide that secret file as volume
```
docker run -p 8000:8000 \
-v <path_to_data_dir>:/radio-duck/pond \
-v <path_to_secrets_file>:/run/secrets/duck_sekrets \
-v <path_to_my_config.ini>:/radio-duck/pond/my_config.ini \
-t jaihind213/radio-duck:latest python3 /radio-duck/server.py /radio-duck/pond/my_config.ini
#For macM1 --> use --platform linux/amd64 also.
refer to https://duckdb.org/docs/configuration/secrets_manager.html

### Build Docker image
```bash
sh buildDocker.sh <proj_version> <duckb_version>
#sh buildDocker.sh 0.1.2 0.9.0 local no
```

Then access http://localhost:8000/docs
#### Docker Image versioning

Sample payloads:
```json
{
"schema": "main",
"sql": "set session s3_region='ap-southeast-1';SELECT * FROM read_parquet('s3://bucket/sample/*.parquet');"
}
```
The docker image version is in the following format:
```text
d<DUCKDB_VERSION>-v<PROJECT_VERSION>-<DOCKERFILE_VERSION>
ex: d0.10.0-v0.1.2-c1d9c346
or
```json
{

"schema": "main",
"sql": "SELECT count(*) FROM 'azure://container/data/part-00000-3213aaeb-1a41-421f-9e1e-a4290dccf509-c000.snappy.parquet';"
}
This specifies that radio duck is running with duckdb=0.10.0, the project version being 0.1.2.
These versions are derived from pyproject.toml
```

### Build Docker image
```bash
echo "Build duckdb Db from source on ubuntu with extensions"
sh build_ubuntu_duckdbx.sh
sh buildDocker.sh <version> <duckb_version>
# sh buildDocker.sh 0.1.1-test 0.9.0
```
## Try me with Apache Superset
```
#run radio-duck
Expand Down
10 changes: 10 additions & 0 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,5 +115,15 @@ def run_sql(
) from oom


@router.get(
"/demo_disclaimer", tags=["_notice"], response_class=PlainTextResponse
)
def disclaimer():
"""
This api returns disclaimer for https://www.pythonanywhere.com/ demo website, kindly request to read it by trying out the api. # noqa: E501,B950
"""
return "Disclaimer: if you are accessing radio-duck hosted on https://radioduck.pythonanywhere.com/ ,please not it is intended for demonstration purposes only. Do not store any sensitive information, passwords, or secrets on this server. The owner of this server is not liable for any data, including secrets, stored on the server that may be leaked. Use this service responsibly & at your own risk." # noqa: E501,B950


def _deserialize(schema, rows):
return deser.jsonify_rows(tuple(schema), rows)
46 changes: 26 additions & 20 deletions buildDocker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

PROJECT_VERSION=$1
DUCKDB_VERSION=$2
PUSH_REPO=$3
PUSH_LATEST_TAG=$4

if [ "$PROJECT_VERSION" == "" ];then
echo "PROJECT_VERSION not set as 1st argument. bash buildDocker.sh <version> <duckdb_version>"
exit 2
Expand All @@ -10,44 +13,47 @@ if [ "$DUCKDB_VERSION" == "" ];then
echo "DUCKDB_VERSION not set as 2nd argument. bash buildDocker.sh <project_version> <duckdb_version>"
exit 2
fi
if [ "$PYTHON_VERSION" == "" ];then
PYTHON_VERSION=3.10
fi
if [ "$DOCKERFILE_VERSION" == "" ];then
DOCKERFILE_VERSION=0.1
fi
if [ "$PLATFORM" == "" ];then
PLATFORM=linux/amd64
fi
#####################
export IMAGE_NAME=jaihind213/radio-duck
export DOCKER_DUCKX_IMAGE_VERSION=$DUCKDB_VERSION-0.3
IMAGE_VERSION=${PROJECT_VERSION}-${DOCKER_DUCKX_IMAGE_VERSION}
PLATFORM=linux/x86_64,linux/amd64
export IMAGE_VERSION="d${DUCKDB_VERSION}-v${PROJECT_VERSION}-$DOCKERFILE_VERSION"

PUSH_REPO=remote
if [ "$PUSH_REPO" == "local" ];then
DOCKER_ARGS="--load" #or --output=type=docker both are same
else
#push to remote
if [ "$PUSH_REPO" == "remote" ];then
DOCKER_ARGS="--output=type=registry"
else
DOCKER_ARGS="--load" #or --output=type=docker both are same
fi

PUSH_LATEST_TAG="no"
TAGS="-t $IMAGE_NAME:$IMAGE_VERSION"
if [ "$PUSH_LATEST_TAG" == "yes" ];then
echo "setting latest tag..."
LATEST_TAG = "-t $IMAGE_NAME:latest"
TAGS = "$TAGS -t $IMAGE_NAME:latest"
fi

cd "$(dirname "$0")"
echo "exporting poetry deps to requirements.txt..."
#easy to build docker image with python and install with pip. hence converting to requirements.txt :)
poetry export --without-hashes --format=requirements.txt > requirements.txt

echo "building docker image... with version $IMAGE_NAME:$IMAGE_VERSION"
echo "base duckdbX version: $DOCKER_DUCKX_IMAGE_VERSION ..."
echo "PUSH_REPO FLAG: $PUSH_REPO"
echo "TAGS: $TAGS"
echo "PUSH_LATEST_TAG FLAG: $PUSH_LATEST_TAG"
sleep 10
echo "PYTHON_VERSION FLAG: $PYTHON_VERSION"

sleep 5
export DOCKER_BUILDKIT=1
docker buildx build $DOCKER_ARGS --platform $PLATFORM --build-arg DOCKER_DUCKX_IMAGE_VERSION=${DOCKER_DUCKX_IMAGE_VERSION} -t $IMAGE_NAME:$IMAGE_VERSION ${LATEST_TAG} .
docker buildx create --use
docker buildx build $DOCKER_ARGS --build-arg PYTHON_VERSION=${PYTHON_VERSION} --platform $PLATFORM $TAGS .

#for Macs-m1
#docker buildx build --platform linux/x86_64 -t $IMAGE_NAME:$PROJECT_VERSION -t $IMAGE_NAME:latest .
touch /tmp/version
cat /dev/null > /tmp/version
echo $IMAGE_VERSION > /tmp/version

touch /tmp/duckdbx_version
cat /dev/null > /tmp/duckdbx_version
echo $DOCKER_DUCKX_IMAGE_VERSION > /tmp/duckdbx_version
echo $IMAGE_VERSION > /tmp/version
32 changes: 0 additions & 32 deletions build_ubuntu_duckdbx.sh

This file was deleted.

31 changes: 0 additions & 31 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import configparser
import logging
import os

app_config = configparser.SafeConfigParser()

Expand All @@ -14,35 +12,6 @@ def get_config():
return app_config


# https://spacelift.io/blog/docker-secrets
def configure_sekrets(connection):
"""
sekret aka secret :)
:return: None
"""
sekret_file = get_config().get(
"duck", "sekret_file", fallback="/run/secrets/duck_sekrets"
)

if os.path.isfile(sekret_file):
# if file not there error todo
logging.info("configuring secrets")
with open(sekret_file, "r") as file:
for line in file:
if not line.lstrip().startswith("#"):
k, v = line.strip().split("=", 1)
# duckdb specific
# accesskeys/ secretkeys must have prefix duck_
if k.startswith("duck_"):
parameter = k[len("duck_") :] # noqa: E203
query = "set global {}='{}';".format(parameter, v)
connection.execute(query)
else:
os.environ[k] = v
else:
logging.warning("no secrets file found!")


# https://stackoverflow.com/questions/55179786/is-it-possible-to-provide-secret-to-docker-run
# https://spacelift.io/blog/docker-secrets
# https://towardsdatascience.com/secure-your-docker-images-with-docker-secrets-f2b92ec398a0
File renamed without changes.
File renamed without changes.

0 comments on commit 9d3022f

Please sign in to comment.