1.安裝dockerpython
https://my.oschina.net/ilovetao/blog/3034502mysql
2.docker安裝airflow參考:git
https://github.com/puckel/docker-airflowgithub
SequentialExecutor 執行:web
docker run -d -p 8080:8080 puckel/docker-airflow webserver
LocalExecutor 執行:redis
docker-compose -f docker-compose-LocalExecutor.yml up -d
CeleryExecutor 執行:sql
docker-compose -f docker-compose-CeleryExecutor.yml up -d
airflow.cfg文件配置:docker
docker方式是按照環境變量方式代替airflow.cfgbash
例如: airflow.cfg配置爲: sql_alchemy_conn docker配置爲: AIRFLOW__CORE__SQL_ALCHEMY_CONN
Airflow 依賴python包:post
放在Dockerfile或者yml同級目錄下 $(pwd)/requirements.txt
我用的是CeleryExecutor 模式:
直接經過volumes共享目錄
修改docker-compose-CeleryExecutor.yml
增長:
最後,這個是個人最後修改的:
文件:
docker-compose.yml
version: '2.1' services: redis: image: 'redis:3.2.7' # command: redis-server --requirepass redispass postgres: image: postgres:9.6 environment: - POSTGRES_USER=airflow - POSTGRES_PASSWORD=airflow - POSTGRES_DB=airflow # Uncomment these lines to persist data on the local filesystem. # - PGDATA=/var/lib/postgresql/data/pgdata # volumes: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: image: puckel/docker-airflow:1.10.2 restart: always depends_on: - postgres - redis environment: - LOAD_EX=n - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery #- REDIS_PASSWORD=redispass volumes: - ./dags:/usr/local/airflow/dags - ./requirements.txt:/requirements.txt - ./entrypoint.sh:/entrypoint.sh - ./airflow.cfg:/usr/local/airflow/airflow.cfg # Uncomment to include custom plugins # - ./plugins:/usr/local/airflow/plugins ports: - "8080:8080" command: - webserver healthcheck: test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"] interval: 30s timeout: 30s retries: 3 flower: image: puckel/docker-airflow:1.10.2 restart: always depends_on: - redis environment: - EXECUTOR=Celery #- REDIS_PASSWORD=redispass ports: - "5555:5555" command: flower scheduler: image: puckel/docker-airflow:1.10.2 restart: always depends_on: - webserver volumes: - ./dags:/usr/local/airflow/dags - ./requirements.txt:/requirements.txt - ./entrypoint.sh:/entrypoint.sh - ./airflow.cfg:/usr/local/airflow/airflow.cfg # Uncomment to include custom plugins # - ./plugins:/usr/local/airflow/plugins environment: - LOAD_EX=n - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery #- REDIS_PASSWORD=redispass command: scheduler worker: image: puckel/docker-airflow:1.10.2 restart: always depends_on: - scheduler volumes: - ./dags:/usr/local/airflow/dags - ./requirements.txt:/requirements.txt - ./entrypoint.sh:/entrypoint.sh - ./airflow.cfg:/usr/local/airflow/airflow.cfg # Uncomment to include custom plugins # - ./plugins:/usr/local/airflow/plugins environment: - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery #- REDIS_PASSWORD=redispass command: worker
entrypoint.sh
#!/usr/bin/env bash TRY_LOOP="20" : "${REDIS_HOST:="redis"}" : "${REDIS_PORT:="6379"}" : "${REDIS_PASSWORD:=""}" : "${POSTGRES_HOST:="postgres"}" : "${POSTGRES_PORT:="5432"}" : "${POSTGRES_USER:="airflow"}" : "${POSTGRES_PASSWORD:="airflow"}" : "${POSTGRES_DB:="airflow"}" # Defaults and back-compat : "${AIRFLOW__CORE__FERNET_KEY:=${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}}" : "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR:-Sequential}Executor}" export \ AIRFLOW__CELERY__BROKER_URL \ AIRFLOW__CELERY__RESULT_BACKEND \ AIRFLOW__CORE__EXECUTOR \ AIRFLOW__CORE__FERNET_KEY \ AIRFLOW__CORE__LOAD_EXAMPLES \ AIRFLOW__CORE__SQL_ALCHEMY_CONN \ # Load DAGs exemples (default: Yes) if [[ -z "$AIRFLOW__CORE__LOAD_EXAMPLES" && "${LOAD_EX:=n}" == n ]] then AIRFLOW__CORE__LOAD_EXAMPLES=False fi # Install custom python package if requirements.txt is present if [ -e "/requirements.txt" ]; then $(which pip) install --user -r /requirements.txt fi if [ -n "$REDIS_PASSWORD" ]; then REDIS_PREFIX=:${REDIS_PASSWORD}@ else REDIS_PREFIX= fi wait_for_port() { local name="$1" host="$2" port="$3" local j=0 while ! nc -z "$host" "$port" >/dev/null 2>&1 < /dev/null; do j=$((j+1)) if [ $j -ge $TRY_LOOP ]; then echo >&2 "$(date) - $host:$port still not reachable, giving up" exit 1 fi echo "$(date) - waiting for $name... $j/$TRY_LOOP" sleep 5 done } if [ "$AIRFLOW__CORE__EXECUTOR" != "SequentialExecutor" ]; then AIRFLOW__CORE__SQL_ALCHEMY_CONN="mysql://airflow:123@10.6.107.66/airflowdb2" AIRFLOW__CELERY__RESULT_BACKEND="db+mysql://airflow:123@10.6.107.66/airflowdb2" wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT" fi if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1" wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT" fi case "$1" in webserver) airflow initdb if [ "$AIRFLOW__CORE__EXECUTOR" = "LocalExecutor" ]; then # With the "Local" executor it should all run in one container. airflow scheduler & fi exec airflow webserver ;; worker|scheduler) # To give the webserver time to run initdb. sleep 10 exec airflow "$@" ;; flower) sleep 10 exec airflow "$@" ;; version) exec airflow "$@" ;; *) # The command is something like bash, not an airflow subcommand. Just run it in the right environment. exec "$@" ;; esac