From 83135236cdd5ac7fbbc03bf5b3f8e85ba38984ad Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sat, 23 Mar 2024 19:35:46 +0100 Subject: [PATCH] Turn optional-dependencies in pyproject.toml into dynamic property While currently hatchling and pip nicely supports dynamic replacement of the dependencies even if they are statically defined, this is not proper according to EP 621. When property of the project is set to be dynamic, it also contains static values. It's either static or dynamic. This is not a problem for wheel packages when installed, by any standard tool, because the wheel package has all the metadata added to wheel (and does not contain pyproject.toml) but in various cases (such as installing airflow via Github URL or from sdist, it can make a difference - depending whether the tool installing airflow will use directly pyproject.toml for optimization, or whether it will run build hooks to prepare the dependencies). This change makes all optional dependencies dynamici - rather than bake them in the pyproject.toml, we mark them as dynamic, so that any tool that uses pyproject.toml or sdist PKG-INFO will know that it has to run build hooks to get the actual optional dependencies. There are a few consequences of that: * our pyproject.toml will not contain automatically generated part - which is actually good, as it caused some confusion * all dynamic optional dependencies of ours are either present in hatch_build.py or calculated there - this is a bit new but sounds reasonable - and those dynamic dependencies are not really updated often, so thish is not an issue to maintain them there * the pre-commits that manage the optional dependencies got a lot simpler now - a lot of code has been removed. --- .dockerignore | 1 - .pre-commit-config.yaml | 18 +- Dockerfile | 11 +- Dockerfile.ci | 12 +- INSTALL | 140 +- airflow_pre_installed_providers.txt | 2 +- clients/python/pyproject.toml | 2 +- contributing-docs/07_local_virtualenv.rst | 51 +- contributing-docs/08_static_code_checks.rst | 4 +- .../12_airflow_dependencies_and_extras.rst | 87 +- dev/breeze/README.md | 2 +- dev/breeze/doc/02_customizing.rst | 2 +- dev/breeze/doc/ci/04_selective_checks.md | 139 +- .../doc/images/output_static-checks.svg | 140 +- .../doc/images/output_static-checks.txt | 2 +- dev/breeze/pyproject.toml | 2 +- .../src/airflow_breeze/pre_commit_ids.py | 2 +- .../src/airflow_breeze/utils/packages.py | 3 + .../airflow_breeze/utils/selective_checks.py | 54 +- dev/breeze/tests/test_selective_checks.py | 49 +- docker_tests/requirements.txt | 1 + docker_tests/test_prod_image.py | 10 +- docs/apache-airflow/extra-packages-ref.rst | 2 +- .../installation/installing-from-sources.rst | 6 + hatch_build.py | 716 +++++++++- pyproject.toml | 1217 ++--------------- .../ci/pre_commit/common_precommit_utils.py | 27 +- .../pre_commit_check_extra_packages_ref.py | 19 +- .../pre_commit_check_order_hatch_build.py | 54 + .../pre_commit_check_order_pyproject_toml.py | 104 -- .../ci/pre_commit/pre_commit_insert_extras.py | 86 +- .../pre_commit_sort_installed_providers.py | 2 - .../pre_commit_update_build_dependencies.py | 29 +- ...re_commit_update_providers_dependencies.py | 164 --- ...ll_airflow_dependencies_from_branch_tip.sh | 11 +- 35 files changed, 1331 insertions(+), 1840 deletions(-) create mode 100755 scripts/ci/pre_commit/pre_commit_check_order_hatch_build.py delete mode 100755 scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py diff --git a/.dockerignore b/.dockerignore index 75d9be8960a38..dba7378a3b778 100644 --- a/.dockerignore +++ b/.dockerignore @@ -54,7 +54,6 @@ !Dockerfile !hatch_build.py !prod_image_installed_providers.txt -!airflow_pre_installed_providers.txt # This folder is for you if you want to add any packages to the docker context when you build your own # docker image. most of other files and any new folder you add will be excluded by default diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4fe8be8dc0ddb..e657f871271f1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -432,26 +432,26 @@ repos: additional_dependencies: ['setuptools', 'rich>=12.4.4', 'pyyaml', 'tomli'] - id: check-extra-packages-references name: Checks setup extra packages - description: Checks if all the extras defined in pyproject.toml are listed in extra-packages-ref.rst file + description: Checks if all the extras defined in hatch_build.py are listed in extra-packages-ref.rst file language: python - files: ^docs/apache-airflow/extra-packages-ref\.rst$|^pyproject.toml + files: ^docs/apache-airflow/extra-packages-ref\.rst$|^hatch_build.py pass_filenames: false entry: ./scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py - additional_dependencies: ['rich>=12.4.4', 'tomli', 'tabulate'] - - id: check-pyproject-toml-order - name: Check order of dependencies in pyproject.toml + additional_dependencies: ['rich>=12.4.4', 'hatchling==1.22.4', 'tabulate'] + - id: check-hatch-build-order + name: Check order of dependencies in hatch_build.py language: python - files: ^pyproject\.toml$ + files: ^hatch_build.py$ pass_filenames: false - entry: ./scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py - additional_dependencies: ['rich>=12.4.4'] + entry: ./scripts/ci/pre_commit/pre_commit_check_order_hatch_build.py + additional_dependencies: ['rich>=12.4.4', 'hatchling==1.22.4'] - id: update-extras name: Update extras in documentation entry: ./scripts/ci/pre_commit/pre_commit_insert_extras.py language: python files: ^contributing-docs/12_airflow_dependencies_and_extras.rst$|^INSTALL$|^airflow/providers/.*/provider\.yaml$|^Dockerfile.* pass_filenames: false - additional_dependencies: ['rich>=12.4.4', 'tomli'] + additional_dependencies: ['rich>=12.4.4', 'hatchling==1.22.4'] - id: check-extras-order name: Check order of extras in Dockerfile entry: ./scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py diff --git a/Dockerfile b/Dockerfile index 10fc939093f65..1f72714477b30 100644 --- a/Dockerfile +++ b/Dockerfile @@ -455,13 +455,17 @@ function install_airflow_dependencies_from_branch_tip() { if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} fi + local TEMP_AIRFLOW_DIR + TEMP_AIRFLOW_DIR=$(mktemp -d) # Install latest set of dependencies - without constraints. This is to download a "base" set of # dependencies that we can cache and reuse when installing airflow using constraints and latest # pyproject.toml in the next step (when we install regular airflow). set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "apache-airflow[${AIRFLOW_EXTRAS}] @ https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" + curl -fsSL "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" | \ + tar xvz -C "${TEMP_AIRFLOW_DIR}" --strip 1 + # Make sure editable dependencies are calculated when devel-ci dependencies are installed + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + --editable "${TEMP_AIRFLOW_DIR}[${AIRFLOW_EXTRAS}]" set +x common::install_packaging_tools set -x @@ -477,6 +481,7 @@ function install_airflow_dependencies_from_branch_tip() { set +x ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow set -x + rm -rvf "${TEMP_AIRFLOW_DIR}" # If you want to make sure dependency is removed from cache in your PR when you removed it from # pyproject.toml - please add your dependency here as a list of strings # for example: diff --git a/Dockerfile.ci b/Dockerfile.ci index d52e8909468c8..03c5bbb4e7363 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -402,13 +402,17 @@ function install_airflow_dependencies_from_branch_tip() { if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} fi + local TEMP_AIRFLOW_DIR + TEMP_AIRFLOW_DIR=$(mktemp -d) # Install latest set of dependencies - without constraints. This is to download a "base" set of # dependencies that we can cache and reuse when installing airflow using constraints and latest # pyproject.toml in the next step (when we install regular airflow). set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "apache-airflow[${AIRFLOW_EXTRAS}] @ https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" + curl -fsSL "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" | \ + tar xvz -C "${TEMP_AIRFLOW_DIR}" --strip 1 + # Make sure editable dependencies are calculated when devel-ci dependencies are installed + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + --editable "${TEMP_AIRFLOW_DIR}[${AIRFLOW_EXTRAS}]" set +x common::install_packaging_tools set -x @@ -424,6 +428,7 @@ function install_airflow_dependencies_from_branch_tip() { set +x ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow set -x + rm -rvf "${TEMP_AIRFLOW_DIR}" # If you want to make sure dependency is removed from cache in your PR when you removed it from # pyproject.toml - please add your dependency here as a list of strings # for example: @@ -1309,7 +1314,6 @@ COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/ COPY generated/* ${AIRFLOW_SOURCES}/generated/ COPY constraints/* ${AIRFLOW_SOURCES}/constraints/ COPY LICENSE ${AIRFLOW_SOURCES}/LICENSE -COPY airflow_pre_installed_providers.txt ${AIRFLOW_SOURCES}/ COPY hatch_build.py ${AIRFLOW_SOURCES}/ COPY --from=scripts install_airflow.sh /scripts/docker/ diff --git a/INSTALL b/INSTALL index a1e2034b69937..38434d9192d40 100644 --- a/INSTALL +++ b/INSTALL @@ -1,6 +1,7 @@ -# INSTALL / BUILD instructions for Apache Airflow +INSTALL / BUILD instructions for Apache Airflow -## Basic installation of Airflow from sources and development environment setup +Basic installation of Airflow from sources and development environment setup +============================================================================ This is a generic installation method that requires minimum starndard tools to develop airflow and test it in local virtual environment (using standard CPyhon installation and `pip`). @@ -23,7 +24,18 @@ MacOS (Mojave/Catalina) you might need to to install XCode command line tools an brew install sqlite mysql postgresql -## Downloading and installing Airflow from sources +The `pip` is one of the build packaging front-ends that might be used to install Airflow. It's the one +that we recommend (see below) for reproducible installation of specific versions of Airflow. + +As of version 2.8 Airflow follows PEP 517/518 and uses `pyproject.toml` file to define build dependencies +and build process and it requires relatively modern versions of packaging tools to get airflow built from +local sources or sdist packages, as PEP 517 compliant build hooks are used to determine dynamic build +dependencies. In case of `pip` it means that at least version 22.1.0 is needed (released at the beginning of +2022) to build or install Airflow from sources. This does not affect the ability of installing Airflow from +released wheel packages. + +Downloading and installing Airflow from sources +----------------------------------------------- While you can get Airflow sources in various ways (including cloning https://github.com/apache/airflow/), the canonical way to download it is to fetch the tarball published at https://downloads.apache.org where you can @@ -95,7 +107,8 @@ Airflow project contains some pre-defined virtualenv definitions in ``pyproject. easily used by hatch to create your local venvs. This is not necessary for you to develop and test Airflow, but it is a convenient way to manage your local Python versions and virtualenvs. -## Installing Hatch +Installing Hatch +---------------- You can install hat using various other ways (including Gui installers). @@ -128,19 +141,21 @@ You can see the list of available envs with: This is what it shows currently: -┏━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ -┃ Name ┃ Type ┃ Features ┃ Description ┃ -┡━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -│ default │ virtual │ devel │ Default environment with Python 3.8 for maximum compatibility │ -├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ -│ airflow-38 │ virtual │ │ Environment with Python 3.8. No devel installed. │ -├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ -│ airflow-39 │ virtual │ │ Environment with Python 3.9. No devel installed. │ -├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ -│ airflow-310 │ virtual │ │ Environment with Python 3.10. No devel installed. │ -├─────────────┼─────────┼──────────┼───────────────────────────────────────────────────────────────┤ -│ airflow-311 │ virtual │ │ Environment with Python 3.11. No devel installed │ -└─────────────┴─────────┴──────────┴───────────────────────────────────────────────────────────────┘ +┏━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ Name ┃ Type ┃ Description ┃ +┡━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ default │ virtual │ Default environment with Python 3.8 for maximum compatibility │ +├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-38 │ virtual │ Environment with Python 3.8. No devel installed. │ +├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-39 │ virtual │ Environment with Python 3.9. No devel installed. │ +├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-310 │ virtual │ Environment with Python 3.10. No devel installed. │ +├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-311 │ virtual │ Environment with Python 3.11. No devel installed │ +├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-312 │ virtual │ Environment with Python 3.11. No devel installed │ +└─────────────┴─────────┴───────────────────────────────────────────────────────────────┘ The default env (if you have not used one explicitly) is `default` and it is a Python 3.8 virtualenv for maximum compatibility with `devel` extra installed - this devel extra contains the minimum set @@ -229,7 +244,8 @@ and install to latest supported ones by pure airflow core. pip install -e ".[devel]" \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.8.txt" -## All airflow extras +Airflow extras +============== Airflow has a number of extras that you can install to get additional dependencies. They sometimes install providers, sometimes enable other features where packages are not installed by default. @@ -239,36 +255,69 @@ https://airflow.apache.org/docs/apache-airflow/stable/extra-packages-ref.html The list of available extras is below. -Regular extras that are available for users in the Airflow package. +Core extras +----------- + +Those extras are available as regular core airflow extras - they install optional features of Airflow. + +# START CORE EXTRAS HERE + +aiobotocore, apache-atlas, apache-webhdfs, async, cgroups, deprecated-api, github-enterprise, +google-auth, graphviz, kerberos, ldap, leveldb, otel, pandas, password, pydantic, rabbitmq, s3fs, +saml, sentry, statsd, uv, virtualenv + +# END CORE EXTRAS HERE -# START REGULAR EXTRAS HERE +Provider extras +--------------- -aiobotocore, airbyte, alibaba, all, all-core, all-dbs, amazon, apache-atlas, apache-beam, apache- -cassandra, apache-drill, apache-druid, apache-flink, apache-hdfs, apache-hive, apache-impala, -apache-kafka, apache-kylin, apache-livy, apache-pig, apache-pinot, apache-spark, apache-webhdfs, -apprise, arangodb, asana, async, atlas, atlassian-jira, aws, azure, cassandra, celery, cgroups, -cloudant, cncf-kubernetes, cohere, common-io, common-sql, crypto, databricks, datadog, dbt-cloud, -deprecated-api, dingding, discord, docker, druid, elasticsearch, exasol, fab, facebook, ftp, gcp, -gcp_api, github, github-enterprise, google, google-auth, graphviz, grpc, hashicorp, hdfs, hive, -http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft-azure, -microsoft-mssql, microsoft-psrp, microsoft-winrm, mongo, mssql, mysql, neo4j, odbc, openai, -openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, -pgvector, pinecone, pinot, postgres, presto, pydantic, qdrant, rabbitmq, redis, s3, s3fs, -salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, -spark, sqlite, ssh, statsd, tableau, tabular, telegram, teradata, trino, uv, vertica, virtualenv, -weaviate, webhdfs, winrm, yandex, zendesk +Those extras are available as regular Airflow extras, they install provider packages in standard builds +or dependencies that are necessary to enable the feature in editable build. -# END REGULAR EXTRAS HERE +# START PROVIDER EXTRAS HERE -Devel extras - used to install development-related tools. Only available during editable install. +airbyte, alibaba, amazon, apache.beam, apache.cassandra, apache.drill, apache.druid, apache.flink, +apache.hdfs, apache.hive, apache.impala, apache.kafka, apache.kylin, apache.livy, apache.pig, +apache.pinot, apache.spark, apprise, arangodb, asana, atlassian.jira, celery, cloudant, +cncf.kubernetes, cohere, common.io, common.sql, databricks, datadog, dbt.cloud, dingding, discord, +docker, elasticsearch, exasol, fab, facebook, ftp, github, google, grpc, hashicorp, http, imap, +influxdb, jdbc, jenkins, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, +mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, pagerduty, +papermill, pgvector, pinecone, postgres, presto, qdrant, redis, salesforce, samba, segment, +sendgrid, sftp, singularity, slack, smtp, snowflake, sqlite, ssh, tableau, tabular, telegram, +teradata, trino, vertica, weaviate, yandex, zendesk + +# END PROVIDER EXTRAS HERE + +Devel extras +------------ + +The `devel` extras are not available in the released packages. They are only available when you install +Airflow from sources in `editable` installation - i.e. one that you are usually using to contribute to +Airflow. They provide tools such as `pytest` and `mypy` for general purpose development and testing. # START DEVEL EXTRAS HERE -devel, devel-all, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel- -hadoop, devel-mypy, devel-sentry, devel-static-checks, devel-tests +devel, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel-hadoop, +devel-mypy, devel-sentry, devel-static-checks, devel-tests # END DEVEL EXTRAS HERE +Bundle extras +------------- + +Those extras are bundles dynamically generated from other extras. + +# START BUNDLE EXTRAS HERE + +all, all-core, all-dbs, devel-all, devel-ci + +# END BUNDLE EXTRAS HERE + + +Doc extras +---------- + Doc extras - used to install dependencies that are needed to build documentation. Only available during editable install. @@ -278,7 +327,20 @@ doc, doc-gen # END DOC EXTRAS HERE -## Compiling front end assets +Deprecated extras +----------------- + +The `deprecated` extras are deprecated extras from Airflow 1 that will be removed in future versions. + +# START DEPRECATED EXTRAS HERE + +atlas, aws, azure, cassandra, crypto, druid, gcp, gcp-api, hdfs, hive, kubernetes, mssql, pinot, s3, +spark, webhdfs, winrm + +# END DEPRECATED EXTRAS HERE + +Compiling front end assets +-------------------------- Sometimes you can see that front-end assets are missing and website looks broken. This is because you need to compile front-end assets. This is done automatically when you create a virtualenv diff --git a/airflow_pre_installed_providers.txt b/airflow_pre_installed_providers.txt index 2d38abfce1196..8ea073266ae62 100644 --- a/airflow_pre_installed_providers.txt +++ b/airflow_pre_installed_providers.txt @@ -1,7 +1,7 @@ # List of all the providers that are pre-installed when you run `pip install apache-airflow` without extras common.io common.sql -fab>=1.0.2dev0 +fab>=1.0.2dev1 ftp http imap diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 59022ef9aaa1a..cda4de256d09f 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -16,7 +16,7 @@ # under the License. [build-system] -requires = ["hatchling"] +requires = ["hatchling==1.22.4"] build-backend = "hatchling.build" [project] diff --git a/contributing-docs/07_local_virtualenv.rst b/contributing-docs/07_local_virtualenv.rst index a7a984e47ccaa..373b6d76aaf5a 100644 --- a/contributing-docs/07_local_virtualenv.rst +++ b/contributing-docs/07_local_virtualenv.rst @@ -51,6 +51,16 @@ of required packages. - MacOs with ARM architectures require graphviz for venv setup, refer `here `_ to install graphviz - The helm chart tests need helm to be installed as a pre requisite. Refer `here `_ to install and setup helm +.. note:: + + As of version 2.8 Airflow follows PEP 517/518 and uses ``pyproject.toml`` file to define build dependencies + and build process and it requires relatively modern versions of packaging tools to get airflow built from + local sources or ``sdist`` packages, as PEP 517 compliant build hooks are used to determine dynamic build + dependencies. In case of ``pip`` it means that at least version 22.1.0 is needed (released at the beginning of + 2022) to build or install Airflow from sources. This does not affect the ability of installing Airflow from + released wheel packages. + + Installing Airflow .................. @@ -173,26 +183,31 @@ You can see the list of available envs with: This is what it shows currently: -+-------------+---------+----------+---------------------------------------------------------------+ -| Name | Type | Features | Description | -+=============+=========+==========+===============================================================+ -| default | virtual | devel | Default environment with Python 3.8 for maximum compatibility | -+-------------+---------+----------+---------------------------------------------------------------+ -| airflow-38 | virtual | devel | Environment with Python 3.8 | -+-------------+---------+----------+---------------------------------------------------------------+ -| airflow-39 | virtual | devel | Environment with Python 3.9 | -+-------------+---------+----------+---------------------------------------------------------------+ -| airflow-310 | virtual | devel | Environment with Python 3.10 | -+-------------+---------+----------+---------------------------------------------------------------+ -| airflow-311 | virtual | devel | Environment with Python 3.11 | -+-------------+---------+----------+---------------------------------------------------------------+ -| airflow-312 | virtual | devel | Environment with Python 3.12 | -+-------------+---------+----------+---------------------------------------------------------------+ ++-------------+---------+---------------------------------------------------------------+ +| Name | Type | Description | ++=============+=========+===============================================================+ +| default | virtual | Default environment with Python 3.8 for maximum compatibility | ++-------------+---------+---------------------------------------------------------------+ +| airflow-38 | virtual | Environment with Python 3.8. No devel installed. | ++-------------+---------+---------------------------------------------------------------+ +| airflow-39 | virtual | Environment with Python 3.9. No devel installed. | ++-------------+---------+---------------------------------------------------------------+ +| airflow-310 | virtual | Environment with Python 3.10. No devel installed. | ++-------------+---------+---------------------------------------------------------------+ +| airflow-311 | virtual | Environment with Python 3.11. No devel installed | ++-------------+---------+---------------------------------------------------------------+ +| airflow-312 | virtual | Environment with Python 3.12. No devel installed | ++-------------+---------+---------------------------------------------------------------+ The default env (if you have not used one explicitly) is ``default`` and it is a Python 3.8 -virtualenv for maximum compatibility with ``devel`` extra installed - this devel extra contains the minimum set -of dependencies and tools that should be used during unit testing of core Airflow and running all ``airflow`` -CLI commands - without support for providers or databases. +virtualenv for maximum compatibility. You can install devel set of dependencies with it +by running: + +.. code:: bash + + pip install -e ".[devel]" + +After entering the environment. The other environments are just bare-bones Python virtualenvs with Airflow core requirements only, without any extras installed and without any tools. They are much faster to create than the default diff --git a/contributing-docs/08_static_code_checks.rst b/contributing-docs/08_static_code_checks.rst index a947c7512f291..18222fd601c8d 100644 --- a/contributing-docs/08_static_code_checks.rst +++ b/contributing-docs/08_static_code_checks.rst @@ -170,6 +170,8 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-google-re2-as-dependency | Check google-re2 is declared as dependency when needed | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ +| check-hatch-build-order | Check order of dependencies in hatch_build.py | | ++-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-hooks-apply | Check if all hooks apply to the repository | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-incorrect-use-of-LoggingMixin | Make sure LoggingMixin is not used alone | | @@ -208,8 +210,6 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-pydevd-left-in-code | Check for pydevd debug statements accidentally left | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ -| check-pyproject-toml-order | Check order of dependencies in pyproject.toml | | -+-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-revision-heads-map | Check that the REVISION_HEADS_MAP is up-to-date | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-safe-filter-usage-in-html | Don't use safe in templates | | diff --git a/contributing-docs/12_airflow_dependencies_and_extras.rst b/contributing-docs/12_airflow_dependencies_and_extras.rst index bf16efd91de12..91328a24abb39 100644 --- a/contributing-docs/12_airflow_dependencies_and_extras.rst +++ b/contributing-docs/12_airflow_dependencies_and_extras.rst @@ -156,23 +156,68 @@ the documentation. This is the full list of these extras: + +Core extras +........... + +Those extras are available as regular core airflow extras - they install optional features of Airflow. + + .. START CORE EXTRAS HERE + +aiobotocore, apache-atlas, apache-webhdfs, async, cgroups, deprecated-api, github-enterprise, +google-auth, graphviz, kerberos, ldap, leveldb, otel, pandas, password, pydantic, rabbitmq, s3fs, +saml, sentry, statsd, uv, virtualenv + + .. END CORE EXTRAS HERE + +Provider extras +............... + +Those extras are available as regular Airflow extras, they install provider packages in standard builds +or dependencies that are necessary to enable the feature in editable build. + + .. START PROVIDER EXTRAS HERE + +airbyte, alibaba, amazon, apache.beam, apache.cassandra, apache.drill, apache.druid, apache.flink, +apache.hdfs, apache.hive, apache.impala, apache.kafka, apache.kylin, apache.livy, apache.pig, +apache.pinot, apache.spark, apprise, arangodb, asana, atlassian.jira, celery, cloudant, +cncf.kubernetes, cohere, common.io, common.sql, databricks, datadog, dbt.cloud, dingding, discord, +docker, elasticsearch, exasol, fab, facebook, ftp, github, google, grpc, hashicorp, http, imap, +influxdb, jdbc, jenkins, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, +mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, pagerduty, +papermill, pgvector, pinecone, postgres, presto, qdrant, redis, salesforce, samba, segment, +sendgrid, sftp, singularity, slack, smtp, snowflake, sqlite, ssh, tableau, tabular, telegram, +teradata, trino, vertica, weaviate, yandex, zendesk + + .. END PROVIDER EXTRAS HERE + + Devel extras ............. The ``devel`` extras are not available in the released packages. They are only available when you install Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to -Airflow. They provide tools such as ``pytest`` and ``mypy`` for general purpose development and testing, also -some providers have their own development-related extras tbat allow to install tools necessary to run tests, -where the tools are specific for the provider. - +Airflow. They provide tools such as ``pytest`` and ``mypy`` for general purpose development and testing. .. START DEVEL EXTRAS HERE -devel, devel-all, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel- -hadoop, devel-mypy, devel-sentry, devel-static-checks, devel-tests +devel, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel-hadoop, +devel-mypy, devel-sentry, devel-static-checks, devel-tests .. END DEVEL EXTRAS HERE +Bundle extras +............. + +Those extras are bundles dynamically generated from other extras. + + .. START BUNDLE EXTRAS HERE + +all, all-core, all-dbs, devel-all, devel-ci + + .. END BUNDLE EXTRAS HERE + + Doc extras ........... @@ -189,33 +234,17 @@ doc, doc-gen .. END DOC EXTRAS HERE +Deprecated extras +................. -Regular extras -.............. - -Those extras are available as regular Airflow extras and are targeted to be used by Airflow users and -contributors to select features of Airflow they want to use They might install additional providers or -just install dependencies that are necessary to enable the feature. - - .. START REGULAR EXTRAS HERE +The ``deprecated`` extras are deprecated extras from Airflow 1 that will be removed in future versions. -aiobotocore, airbyte, alibaba, all, all-core, all-dbs, amazon, apache-atlas, apache-beam, apache- -cassandra, apache-drill, apache-druid, apache-flink, apache-hdfs, apache-hive, apache-impala, -apache-kafka, apache-kylin, apache-livy, apache-pig, apache-pinot, apache-spark, apache-webhdfs, -apprise, arangodb, asana, async, atlas, atlassian-jira, aws, azure, cassandra, celery, cgroups, -cloudant, cncf-kubernetes, cohere, common-io, common-sql, crypto, databricks, datadog, dbt-cloud, -deprecated-api, dingding, discord, docker, druid, elasticsearch, exasol, fab, facebook, ftp, gcp, -gcp_api, github, github-enterprise, google, google-auth, graphviz, grpc, hashicorp, hdfs, hive, -http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft-azure, -microsoft-mssql, microsoft-psrp, microsoft-winrm, mongo, mssql, mysql, neo4j, odbc, openai, -openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, -pgvector, pinecone, pinot, postgres, presto, pydantic, qdrant, rabbitmq, redis, s3, s3fs, -salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, -spark, sqlite, ssh, statsd, tableau, tabular, telegram, teradata, trino, uv, vertica, virtualenv, -weaviate, webhdfs, winrm, yandex, zendesk + .. START DEPRECATED EXTRAS HERE - .. END REGULAR EXTRAS HERE +atlas, aws, azure, cassandra, crypto, druid, gcp, gcp-api, hdfs, hive, kubernetes, mssql, pinot, s3, +spark, webhdfs, winrm + .. END DEPRECATED EXTRAS HERE ----- diff --git a/dev/breeze/README.md b/dev/breeze/README.md index 981a7bb85901d..0a6ef086ffd42 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: fb5183650b0efb5ec8241fcd53b6e5cc26c21c8d273fff91d0e8a3716f37703c44c7ae189653dd3fd52624d89c97635e0a7fc09104138ba35cb3ccf45f8efd4b +Package config hash: 64737d477cded72bb31d3b440bb2e5b76d48e865fd5d7ecc3b2cf9d1f0c889a7232e78f74854e9d2d0a1fd0dd653cb3ff81aee7387fea5afddec91f16ee63cd0 --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/doc/02_customizing.rst b/dev/breeze/doc/02_customizing.rst index 78d24f30db7bc..4e41bedd08740 100644 --- a/dev/breeze/doc/02_customizing.rst +++ b/dev/breeze/doc/02_customizing.rst @@ -45,7 +45,7 @@ will be evaluated at entering the environment. The ``files`` folder from your local sources is automatically mounted to the container under ``/files`` path and you can put there any files you want to make available for the Breeze container. -You can also copy any .whl or .sdist packages to dist and when you pass ``--use-packages-from-dist`` flag +You can also copy any .whl or ``sdist`` packages to dist and when you pass ``--use-packages-from-dist`` flag as ``wheel`` or ``sdist`` line parameter, breeze will automatically install the packages found there when you enter Breeze. diff --git a/dev/breeze/doc/ci/04_selective_checks.md b/dev/breeze/doc/ci/04_selective_checks.md index 9a4959dfc5d8c..7df3c6d4c3850 100644 --- a/dev/breeze/doc/ci/04_selective_checks.md +++ b/dev/breeze/doc/ci/04_selective_checks.md @@ -52,11 +52,11 @@ We have the following Groups of files for CI that determine which tests are run: * `API tests files` and `Codegen test files` - those are OpenAPI definition files that impact Open API specification and determine that we should run dedicated API tests. * `Helm files` - change in those files impacts helm "rendering" tests - `chart` folder and `helm_tests` folder. -* `Setup files` - change in the setup files indicates that we should run `upgrade to newer dependencies` - - pyproject.toml and generated dependencies files in `generated` folder. The dependency files and part of - the pyproject.toml are automatically generated from the provider.yaml files in provider by - the `update-providers-dependencies` pre-commit. The provider.yaml is a single source of truth for each - provider. +* `Build files` - change in the files indicates that we should run `upgrade to newer dependencies` - + build dependencies in `pyproject.toml` and generated dependencies files in `generated` folder. + The dependencies are automatically generated from the `provider.yaml` files in provider by + the `hatch_build.py` build hook. The provider.yaml is a single source of truth for each + provider and `hatch_build.py` for all regular dependencies. * `DOC files` - change in those files indicate that we should run documentation builds (both airflow sources and airflow documentation) * `WWW files` - those are files for the WWW part of our UI (useful to determine if UI tests should run) @@ -73,7 +73,6 @@ We have the following Groups of files for CI that determine which tests are run: * `All Docs Python files` - files that are checked by `mypy-docs` static checks * `All Provider Yaml files` - all provider yaml files - We have a number of `TEST_TYPES` that can be selectively disabled/enabled based on the content of the incoming PR. Usually they are limited to a sub-folder of the "tests" folder but there are some exceptions. You can read more about those in `testing.rst `. Those types @@ -114,7 +113,8 @@ together using `pytest-xdist` (pytest-xdist distributes the tests among parallel types to execute. This is done because changes in core might impact all the other test types. * if `CI Image building` is disabled, only basic pre-commits are enabled - no 'image-depending` pre-commits are enabled. -* If there are some setup files changed, `upgrade to newer dependencies` is enabled. +* If there are some build dependencies changed (`hatch_build.py` and updated system dependencies in + the `pyproject.toml` - then `upgrade to newer dependencies` is enabled. * If docs are build, the `docs-list-as-string` will determine which docs packages to build. This is based on several criteria: if any of the airflow core, charts, docker-stack, providers files or docs have changed, then corresponding packages are build (including cross-dependent providers). If any of the core files @@ -164,67 +164,70 @@ separated by spaces. This is to accommodate for the wau how outputs of this kind Github Actions to pass the list of parameters to a command to execute -| Output | Meaning of the output | Example value | List as string | -|------------------------------------|------------------------------------------------------------------------------------------------------|-------------------------------------------|----------------| -| affected-providers-list-as-string | List of providers affected when they are selectively affected. | airbyte http | * | -| all-python-versions | List of all python versions there are available in the form of JSON array | ['3.8', '3.9', '3.10'] | | -| all-python-versions-list-as-string | List of all python versions there are available in the form of space separated string | 3.8 3.9 3.10 | * | -| all-versions | If set to true, then all python, k8s, DB versions are used for tests. | false | | -| basic-checks-only | Whether to run all static checks ("false") or only basic set of static checks ("true") | false | | -| cache-directive | Which cache should be used for images ("registry", "local" , "disabled") | registry | | -| chicken-egg-providers | List of providers that should be considered as "chicken-egg" - expecting development Airflow version | | | -| ci-image-build | Whether CI image build is needed | true | | -| debug-resources | Whether resources usage should be printed during parallel job execution ("true"/ "false") | false | | -| default-branch | Which branch is default for the build ("main" for main branch, "v2-4-test" for 2.4 line etc.) | main | | -| default-constraints-branch | Which branch is default for the build ("constraints-main" for main branch, "constraints-2-4" etc.) | constraints-main | | -| default-helm-version | Which Helm version to use as default | v3.9.4 | | -| default-kind-version | Which Kind version to use as default | v0.16.0 | | -| default-kubernetes-version | Which Kubernetes version to use as default | v1.25.2 | | -| default-mysql-version | Which MySQL version to use as default | 5.7 | | -| default-postgres-version | Which Postgres version to use as default | 10 | | -| default-python-version | Which Python version to use as default | 3.8 | | -| docs-build | Whether to build documentation ("true"/"false") | true | | -| docs-list-as-string | What filter to apply to docs building - based on which documentation packages should be built | apache-airflow helm-chart google | | -| full-tests-needed | Whether this build runs complete set of tests or only subset (for faster PR builds) [1] | false | | -| generated-dependencies-changed | Whether generated dependencies have changed ("true"/"false") | false | | -| helm-version | Which Helm version to use for tests | v3.9.4 | | -| is-airflow-runner | Whether runner used is an airflow or infrastructure runner (true if airflow/false if infrastructure) | false | | -| is-amd-runner | Whether runner used is an AMD one | true | | -| is-arm-runner | Whether runner used is an ARM one | false | | -| is-committer-build | Whether the build is triggered by a committer | false | | -| is-k8s-runner | Whether the build runs on our k8s infrastructure | false | | -| is-self-hosted-runner | Whether the runner is self-hosted | false | | -| is-vm-runner | Whether the runner uses VM to run | true | | -| kind-version | Which Kind version to use for tests | v0.16.0 | | -| kubernetes-combos-list-as-string | All combinations of Python version and Kubernetes version to use for tests as space-separated string | 3.8-v1.25.2 3.9-v1.26.4 | * | -| kubernetes-versions | All Kubernetes versions to use for tests as JSON array | ['v1.25.2'] | | -| kubernetes-versions-list-as-string | All Kubernetes versions to use for tests as space-separated string | v1.25.2 | * | -| mypy-folders | List of folders to be considered for mypy | [] | | -| mysql-exclude | Which versions of MySQL to exclude for tests as JSON array | [] | | -| mysql-versions | Which versions of MySQL to use for tests as JSON array | ['5.7'] | | -| needs-api-codegen | Whether "api-codegen" are needed to run ("true"/"false") | true | | -| needs-api-tests | Whether "api-tests" are needed to run ("true"/"false") | true | | -| needs-helm-tests | Whether Helm tests are needed to run ("true"/"false") | true | | -| needs-javascript-scans | Whether javascript CodeQL scans should be run ("true"/"false") | true | | -| needs-mypy | Whether mypy check is supposed to run in this build | true | | -| needs-python-scans | Whether Python CodeQL scans should be run ("true"/"false") | true | | -| parallel-test-types-list-as-string | Which test types should be run for unit tests | API Always Providers Providers\[-google\] | * | -| postgres-exclude | Which versions of Postgres to exclude for tests as JSON array | [] | | -| postgres-versions | Which versions of Postgres to use for tests as JSON array | ['10'] | | -| prod-image-build | Whether PROD image build is needed | true | | -| prod-image-build | Whether PROD image build is needed | true | | -| providers-compatibility-checks | List of dicts: (python_version, airflow_version, removed_providers) for compatibility checks | [] | | -| python-versions | List of python versions to use for that build | ['3.8'] | * | -| python-versions-list-as-string | Which versions of MySQL to use for tests as space-separated string | 3.8 | * | -| run-amazon-tests | Whether Amazon tests should be run ("true"/"false") | true | | -| run-kubernetes-tests | Whether Kubernetes tests should be run ("true"/"false") | true | | -| run-tests | Whether unit tests should be run ("true"/"false") | true | | -| run-www-tests | Whether WWW tests should be run ("true"/"false") | true | | -| runs-on | List of labels assigned for runners for that build (used to select runners) | ["ubuntu-22.04"] | | -| skip-pre-commits | Which pre-commits should be skipped during the static-checks run | check-provider-yaml-valid,flynt,identity | | -| skip-provider-tests | When provider tests should be skipped (on non-main branch or when no provider changes detected) | true | | -| sqlite-exclude | Which versions of Sqlite to exclude for tests as JSON array | [] | | -| upgrade-to-newer-dependencies | Whether the image build should attempt to upgrade all dependencies (true/false or commit hash) | false | | +| Output | Meaning of the output | Example value | List as string | +|----------------------------------------|------------------------------------------------------------------------------------------------------|-------------------------------------------|----------------| +| affected-providers-list-as-string | List of providers affected when they are selectively affected. | airbyte http | * | +| all-python-versions | List of all python versions there are available in the form of JSON array | ['3.8', '3.9', '3.10'] | | +| all-python-versions-list-as-string | List of all python versions there are available in the form of space separated string | 3.8 3.9 3.10 | * | +| all-versions | If set to true, then all python, k8s, DB versions are used for tests. | false | | +| basic-checks-only | Whether to run all static checks ("false") or only basic set of static checks ("true") | false | | +| build_system_changed_in_pyproject_toml | When builds system dependencies changed in pyproject.toml changed in the PR. | false | | +| cache-directive | Which cache should be used for images ("registry", "local" , "disabled") | registry | | +| chicken-egg-providers | List of providers that should be considered as "chicken-egg" - expecting development Airflow version | | | +| ci-image-build | Whether CI image build is needed | true | | +| debug-resources | Whether resources usage should be printed during parallel job execution ("true"/ "false") | false | | +| default-branch | Which branch is default for the build ("main" for main branch, "v2-4-test" for 2.4 line etc.) | main | | +| default-constraints-branch | Which branch is default for the build ("constraints-main" for main branch, "constraints-2-4" etc.) | constraints-main | | +| default-helm-version | Which Helm version to use as default | v3.9.4 | | +| default-kind-version | Which Kind version to use as default | v0.16.0 | | +| default-kubernetes-version | Which Kubernetes version to use as default | v1.25.2 | | +| default-mysql-version | Which MySQL version to use as default | 5.7 | | +| default-postgres-version | Which Postgres version to use as default | 10 | | +| default-python-version | Which Python version to use as default | 3.8 | | +| docs-build | Whether to build documentation ("true"/"false") | true | | +| docs-list-as-string | What filter to apply to docs building - based on which documentation packages should be built | apache-airflow helm-chart google | | +| full-tests-needed | Whether this build runs complete set of tests or only subset (for faster PR builds) [1] | false | | +| generated-dependencies-changed | Whether generated dependencies have changed ("true"/"false") | false | | +| hatch-build-changed | When hatch build.py changed in the PR. | false | | +| helm-version | Which Helm version to use for tests | v3.9.4 | | +| is-airflow-runner | Whether runner used is an airflow or infrastructure runner (true if airflow/false if infrastructure) | false | | +| is-amd-runner | Whether runner used is an AMD one | true | | +| is-arm-runner | Whether runner used is an ARM one | false | | +| is-committer-build | Whether the build is triggered by a committer | false | | +| is-k8s-runner | Whether the build runs on our k8s infrastructure | false | | +| is-self-hosted-runner | Whether the runner is self-hosted | false | | +| is-vm-runner | Whether the runner uses VM to run | true | | +| kind-version | Which Kind version to use for tests | v0.16.0 | | +| kubernetes-combos-list-as-string | All combinations of Python version and Kubernetes version to use for tests as space-separated string | 3.8-v1.25.2 3.9-v1.26.4 | * | +| kubernetes-versions | All Kubernetes versions to use for tests as JSON array | ['v1.25.2'] | | +| kubernetes-versions-list-as-string | All Kubernetes versions to use for tests as space-separated string | v1.25.2 | * | +| mypy-folders | List of folders to be considered for mypy | [] | | +| mysql-exclude | Which versions of MySQL to exclude for tests as JSON array | [] | | +| mysql-versions | Which versions of MySQL to use for tests as JSON array | ['5.7'] | | +| needs-api-codegen | Whether "api-codegen" are needed to run ("true"/"false") | true | | +| needs-api-tests | Whether "api-tests" are needed to run ("true"/"false") | true | | +| needs-helm-tests | Whether Helm tests are needed to run ("true"/"false") | true | | +| needs-javascript-scans | Whether javascript CodeQL scans should be run ("true"/"false") | true | | +| needs-mypy | Whether mypy check is supposed to run in this build | true | | +| needs-python-scans | Whether Python CodeQL scans should be run ("true"/"false") | true | | +| parallel-test-types-list-as-string | Which test types should be run for unit tests | API Always Providers Providers\[-google\] | * | +| postgres-exclude | Which versions of Postgres to exclude for tests as JSON array | [] | | +| postgres-versions | Which versions of Postgres to use for tests as JSON array | ['10'] | | +| prod-image-build | Whether PROD image build is needed | true | | +| prod-image-build | Whether PROD image build is needed | true | | +| providers-compatibility-checks | List of dicts: (python_version, airflow_version, removed_providers) for compatibility checks | [] | | +| pyproject-toml-changed | When pyproject.toml changed in the PR. | false | | +| python-versions | List of python versions to use for that build | ['3.8'] | * | +| python-versions-list-as-string | Which versions of MySQL to use for tests as space-separated string | 3.8 | * | +| run-amazon-tests | Whether Amazon tests should be run ("true"/"false") | true | | +| run-kubernetes-tests | Whether Kubernetes tests should be run ("true"/"false") | true | | +| run-tests | Whether unit tests should be run ("true"/"false") | true | | +| run-www-tests | Whether WWW tests should be run ("true"/"false") | true | | +| runs-on | List of labels assigned for runners for that build (used to select runners) | ["ubuntu-22.04"] | | +| skip-pre-commits | Which pre-commits should be skipped during the static-checks run | check-provider-yaml-valid,flynt,identity | | +| skip-provider-tests | When provider tests should be skipped (on non-main branch or when no provider changes detected) | true | | +| sqlite-exclude | Which versions of Sqlite to exclude for tests as JSON array | [] | | +| upgrade-to-newer-dependencies | Whether the image build should attempt to upgrade all dependencies (true/false or commit hash) | false | | [1] Note for deciding if `full tests needed` mode is enabled and provider.yaml files. diff --git a/dev/breeze/doc/images/output_static-checks.svg b/dev/breeze/doc/images/output_static-checks.svg index 200d53394d3ec..a6c9ba8bec5ae 100644 --- a/dev/breeze/doc/images/output_static-checks.svg +++ b/dev/breeze/doc/images/output_static-checks.svg @@ -1,4 +1,4 @@ - +
into
and breaks our doc formatting + # By adding a lot of whitespace separation. This limit can be lifted when we update our doc to handle + #
tags for sections + "docutils<0.17,>=0.16", + "sphinx-airflow-theme>=0.0.12", + "sphinx-argparse>=0.4.0", + # sphinx-autoapi fails with astroid 3.0, see: https://github.com/readthedocs/sphinx-autoapi/issues/407 + # This was fixed in sphinx-autoapi 3.0, however it has requirement sphinx>=6.1, but we stuck on 5.x + "sphinx-autoapi>=2.1.1", + "sphinx-copybutton>=0.5.2", + "sphinx-design>=0.5.0", + "sphinx-jinja>=2.0.2", + "sphinx-rtd-theme>=2.0.0", + # Currently we are using sphinx 5 but we need to migrate to Sphinx 7 + "sphinx>=5.3.0,<6.0.0", + "sphinxcontrib-applehelp>=1.0.4", + "sphinxcontrib-devhelp>=1.0.2", + "sphinxcontrib-htmlhelp>=2.0.1", + "sphinxcontrib-httpdomain>=1.8.1", + "sphinxcontrib-jquery>=4.1", + "sphinxcontrib-jsmath>=1.0.1", + "sphinxcontrib-qthelp>=1.0.3", + "sphinxcontrib-redoc>=1.6.0", + "sphinxcontrib-serializinghtml==1.1.5", + "sphinxcontrib-spelling>=8.0.0", + ], + "doc-gen": [ + "apache-airflow[doc]", + "eralchemy2>=1.3.8", + ], + # END OF doc extras +} + +DEVEL_EXTRAS: dict[str, list[str]] = { + # START OF devel extras + "devel-debuggers": [ + "ipdb>=0.13.13", + ], + "devel-devscripts": [ + "click>=8.0", + "gitpython>=3.1.40", + "hatch>=1.9.1", + "pipdeptree>=2.13.1", + "pygithub>=2.1.1", + "restructuredtext-lint>=1.4.0", + "rich-click>=1.7.0", + "semver>=3.0.2", + "towncrier>=23.11.0", + "twine>=4.0.2", + ], + "devel-duckdb": [ + # Python 3.12 support was added in 0.10.0 + "duckdb>=0.10.0; python_version >= '3.12'", + "duckdb>=0.9.0; python_version < '3.12'", + ], + # Mypy 0.900 and above ships only with stubs from stdlib so if we need other stubs, we need to install them + # manually as `types-*`. See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports + # for details. We want to install them explicitly because we want to eventually move to + # mypyd which does not support installing the types dynamically with --install-types + "devel-mypy": [ + # TODO: upgrade to newer versions of MyPy continuously as they are released + # Make sure to upgrade the mypy version in update-common-sql-api-stubs in .pre-commit-config.yaml + # when you upgrade it here !!!! + "mypy==1.9.0", + "types-Deprecated", + "types-Markdown", + "types-PyMySQL", + "types-PyYAML", + "types-aiofiles", + "types-certifi", + "types-croniter", + "types-docutils", + "types-paramiko", + "types-protobuf", + "types-python-dateutil", + "types-python-slugify", + "types-pytz", + "types-redis", + "types-requests", + "types-setuptools", + "types-tabulate", + "types-termcolor", + "types-toml", + ], + "devel-sentry": [ + "blinker>=1.7.0", + ], + "devel-static-checks": [ + "black>=23.12.0", + "pre-commit>=3.5.0", + "ruff==0.3.3", + "yamllint>=1.33.0", + ], + "devel-tests": [ + "aiofiles>=23.2.0", + "aioresponses>=0.7.6", + "backports.zoneinfo>=0.2.1;python_version<'3.9'", + "beautifulsoup4>=4.7.1", + # Coverage 7.4.0 added experimental support for Python 3.12 PEP669 which we use in Airflow + "coverage>=7.4.0", + "pytest-asyncio>=0.23.3", + "pytest-cov>=4.1.0", + "pytest-icdiff>=0.9", + "pytest-instafail>=0.5.0", + "pytest-mock>=3.12.0", + "pytest-rerunfailures>=13.0", + "pytest-timeouts>=1.2.1", + "pytest-xdist>=3.5.0", + # Temporary upper limmit to <8, not all dependencies at that moment ready to use 8.0 + # Internal meta-task for track https://github.com/apache/airflow/issues/37156 + "pytest>=7.4.4,<8.0", + "requests_mock>=1.11.0", + "time-machine>=2.13.0", + "wheel>=0.42.0", + ], + "devel": [ + "apache-airflow[celery]", + "apache-airflow[cncf-kubernetes]", + "apache-airflow[common-io]", + "apache-airflow[common-sql]", + "apache-airflow[devel-debuggers]", + "apache-airflow[devel-devscripts]", + "apache-airflow[devel-duckdb]", + "apache-airflow[devel-mypy]", + "apache-airflow[devel-sentry]", + "apache-airflow[devel-static-checks]", + "apache-airflow[devel-tests]", + "apache-airflow[fab]", + "apache-airflow[ftp]", + "apache-airflow[http]", + "apache-airflow[imap]", + "apache-airflow[sqlite]", + ], + "devel-all-dbs": [ + "apache-airflow[apache-cassandra]", + "apache-airflow[apache-drill]", + "apache-airflow[apache-druid]", + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[apache-pinot]", + "apache-airflow[arangodb]", + "apache-airflow[cloudant]", + "apache-airflow[databricks]", + "apache-airflow[exasol]", + "apache-airflow[influxdb]", + "apache-airflow[microsoft-mssql]", + "apache-airflow[mongo]", + "apache-airflow[mysql]", + "apache-airflow[neo4j]", + "apache-airflow[postgres]", + "apache-airflow[presto]", + "apache-airflow[trino]", + "apache-airflow[vertica]", + ], + "devel-ci": [ + "apache-airflow[devel-all]", + ], + "devel-hadoop": [ + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[devel]", + "apache-airflow[hdfs]", + "apache-airflow[kerberos]", + "apache-airflow[presto]", + ], +} + +BUNDLE_EXTRAS: dict[str, list[str]] = { + "all-dbs": [ + "apache-airflow[apache-cassandra]", + "apache-airflow[apache-drill]", + "apache-airflow[apache-druid]", + "apache-airflow[apache-hdfs]", + "apache-airflow[apache-hive]", + "apache-airflow[apache-impala]", + "apache-airflow[apache-pinot]", + "apache-airflow[arangodb]", + "apache-airflow[cloudant]", + "apache-airflow[databricks]", + "apache-airflow[exasol]", + "apache-airflow[influxdb]", + "apache-airflow[microsoft-mssql]", + "apache-airflow[mongo]", + "apache-airflow[mysql]", + "apache-airflow[neo4j]", + "apache-airflow[postgres]", + "apache-airflow[presto]", + "apache-airflow[trino]", + "apache-airflow[vertica]", + ], +} + +DEPRECATED_EXTRAS: dict[str, list[str]] = { + ######################################################################################################## + # The whole section can be removed in Airflow 3.0 as those old aliases are deprecated in 2.* series + ######################################################################################################## + "atlas": [ + "apache-airflow[apache-atlas]", + ], + "aws": [ + "apache-airflow[amazon]", + ], + "azure": [ + "apache-airflow[microsoft-azure]", + ], + "cassandra": [ + "apache-airflow[apache-cassandra]", + ], + # Empty alias extra just for backward compatibility with Airflow 1.10 + "crypto": [], + "druid": [ + "apache-airflow[apache-druid]", + ], + "gcp": [ + "apache-airflow[google]", + ], + "gcp-api": [ + "apache-airflow[google]", + ], + "hdfs": [ + "apache-airflow[apache-hdfs]", + ], + "hive": [ + "apache-airflow[apache-hive]", + ], + "kubernetes": [ + "apache-airflow[cncf-kubernetes]", + ], + "mssql": [ + "apache-airflow[microsoft-mssql]", + ], + "pinot": [ + "apache-airflow[apache-pinot]", + ], + "s3": [ + "apache-airflow[amazon]", + ], + "spark": [ + "apache-airflow[apache-spark]", + ], + "webhdfs": [ + "apache-airflow[apache-webhdfs]", + ], + "winrm": [ + "apache-airflow[microsoft-winrm]", + ], +} + +# When you remove a dependency from the list, you should also make sure to add the dependency to be removed +# in the scripts/docker/install_airflow_dependencies_from_branch_tip.sh script DEPENDENCIES_TO_REMOVE +# in order to make sure the dependency is not installed in the CI image build process from the main +# of Airflow branch. After your PR is merged, you should remove it from the list there. +DEPENDENCIES = [ + # Alembic is important to handle our migrations in predictable and performant way. It is developed + # together with SQLAlchemy. Our experience with Alembic is that it very stable in minor version + # The 1.13.0 of alembic marked some migration code as SQLAlchemy 2+ only so we limit it to 1.13.1 + "alembic>=1.13.1, <2.0", + "argcomplete>=1.10", + "asgiref", + "attrs>=22.1.0", + # Blinker use for signals in Flask, this is an optional dependency in Flask 2.2 and lower. + # In Flask 2.3 it becomes a mandatory dependency, and flask signals are always available. + "blinker>=1.6.2", + # Colorlog 6.x merges TTYColoredFormatter into ColoredFormatter, breaking backwards compatibility with 4.x + # Update CustomTTYColoredFormatter to remove + "colorlog>=4.0.2, <5.0", + "configupdater>=3.1.1", + # `airflow/www/extensions/init_views` imports `connexion.decorators.validation.RequestBodyValidator` + # connexion v3 has refactored the entire module to middleware, see: /spec-first/connexion/issues/1525 + # Specifically, RequestBodyValidator was removed in: /spec-first/connexion/pull/1595 + # The usage was added in #30596, seemingly only to override and improve the default error message. + # Either revert that change or find another way, preferably without using connexion internals. + # This limit can be removed after https://github.com/apache/airflow/issues/35234 is fixed + "connexion[flask]>=2.10.0,<3.0", + "cron-descriptor>=1.2.24", + "croniter>=2.0.2", + "cryptography>=39.0.0", + "deprecated>=1.2.13", + "dill>=0.2.2", + "flask-caching>=1.5.0", + # Flask-Session 0.6 add new arguments into the SqlAlchemySessionInterface constructor as well as + # all parameters now are mandatory which make AirflowDatabaseSessionInterface incopatible with this version. + "flask-session>=0.4.0,<0.6", + "flask-wtf>=0.15", + # Flask 2.3 is scheduled to introduce a number of deprecation removals - some of them might be breaking + # for our dependencies - notably `_app_ctx_stack` and `_request_ctx_stack` removals. + # We should remove the limitation after 2.3 is released and our dependencies are updated to handle it + "flask>=2.2,<2.3", + "fsspec>=2023.10.0", + "google-re2>=1.0", + "gunicorn>=20.1.0", + "httpx", + 'importlib_metadata>=1.7;python_version<"3.9"', + # Importib_resources 6.2.0-6.3.1 break pytest_rewrite + # see https://github.com/python/importlib_resources/issues/299 + 'importlib_resources>=5.2,!=6.2.0,!=6.3.0,!=6.3.1;python_version<"3.9"', + "itsdangerous>=2.0", + "jinja2>=3.0.0", + "jsonschema>=4.18.0", + "lazy-object-proxy", + "linkify-it-py>=2.0.0", + "lockfile>=0.12.2", + "markdown-it-py>=2.1.0", + "markupsafe>=1.1.1", + "marshmallow-oneofschema>=2.0.1", + "mdit-py-plugins>=0.3.0", + "opentelemetry-api>=1.15.0", + "opentelemetry-exporter-otlp", + "packaging>=14.0", + "pathspec>=0.9.0", + "pendulum>=2.1.2,<4.0", + "pluggy>=1.0", + "psutil>=4.2.0", + "pygments>=2.0.1", + "pyjwt>=2.0.0", + "python-daemon>=3.0.0", + "python-dateutil>=2.3", + "python-nvd3>=0.15.0", + "python-slugify>=5.0", + # Requests 3 if it will be released, will be heavily breaking. + "requests>=2.27.0,<3", + "rfc3339-validator>=0.1.4", + "rich-argparse>=1.0.0", + "rich>=12.4.4", + "setproctitle>=1.1.8", + # We use some deprecated features of sqlalchemy 2.0 and we should replace them before we can upgrade + # See https://sqlalche.me/e/b8d9 for details of deprecated features + # you can set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. + # The issue tracking it is https://github.com/apache/airflow/issues/28723 + "sqlalchemy>=1.4.36,<2.0", + "sqlalchemy-jsonfield>=1.0", + "tabulate>=0.7.5", + "tenacity>=6.2.0,!=8.2.0", + "termcolor>=1.1.0", + # We should remove this dependency when Providers are limited to Airflow 2.7+ + # as we replaced the usage of unicodecsv with csv in Airflow 2.7 + # See https://github.com/apache/airflow/pull/31693 + # We should also remove "licenses/LICENSE-unicodecsv.txt" file when we remove this dependency + "unicodecsv>=0.14.1", + # The Universal Pathlib provides Pathlib-like interface for FSSPEC + "universal-pathlib>=0.2.2", + # Werkzug 3 breaks Flask-Login 0.6.2, also connexion needs to be updated to >= 3.0 + # we should remove this limitation when FAB supports Flask 2.3 and we migrate connexion to 3+ + "werkzeug>=2.0,<3", +] + + +ALL_DYNAMIC_EXTRA_DICTS: list[tuple[dict[str, list[str]], str]] = [ + (CORE_EXTRAS, "Core extras"), + (DOC_EXTRAS, "Doc extras"), + (DEVEL_EXTRAS, "Devel extras"), + (BUNDLE_EXTRAS, "Bundle extras"), + (DEPRECATED_EXTRAS, "Deprecated extras"), ] +ALL_GENERATED_BUNDLE_EXTRAS = ["all", "all-core", "devel-all", "devel-ci"] + + +def normalize_extra(dependency_id: str) -> str: + return dependency_id.replace(".", "-").replace("_", "-") + + +def normalize_requirement(requirement: str): + from packaging.requirements import Requirement + from packaging.utils import NormalizedName, canonicalize_name + + req = Requirement(requirement) + package: NormalizedName = canonicalize_name(req.name) + package_str = str(package) + if req.extras: + # Sort extras by name + package_str += f"[{','.join(sorted([normalize_extra(extra) for extra in req.extras]))}]" + version_required = "" + if req.specifier: + version_required = ",".join(map(str, sorted(req.specifier, key=lambda spec: spec.version))) + if req.marker: + version_required += f"; {req.marker}" + return str(package_str + version_required) + + +ALL_DYNAMIC_EXTRAS: list[str] = sorted( + set( + itertools.chain( + *[d for d, desc in ALL_DYNAMIC_EXTRA_DICTS], + [normalize_extra(provider) for provider in PROVIDER_DEPENDENCIES], + ALL_GENERATED_BUNDLE_EXTRAS, + ) + ) +) + def get_provider_id(provider_spec: str) -> str: # in case provider_spec is "=" @@ -59,17 +562,17 @@ def get_provider_requirement(provider_spec: str) -> str: # if providers are ready, we can preinstall them PREINSTALLED_PROVIDERS = [ get_provider_requirement(provider_spec) - for provider_spec in PREINSTALLED_PROVIDER_SPECS - if DEPENDENCIES[get_provider_id(provider_spec)]["state"] == "ready" + for provider_spec in PRE_INSTALLED_PROVIDERS + if PROVIDER_DEPENDENCIES[get_provider_id(provider_spec)]["state"] == "ready" ] # if provider is in not-ready or pre-release, we need to install its dependencies # however we need to skip apache-airflow itself and potentially any providers that are PREINSTALLED_NOT_READY_DEPS = [] -for provider_spec in PREINSTALLED_PROVIDER_SPECS: +for provider_spec in PRE_INSTALLED_PROVIDERS: provider_id = get_provider_id(provider_spec) - if DEPENDENCIES[provider_id]["state"] not in ["ready", "suspended", "removed"]: - for dependency in DEPENDENCIES[provider_id]["deps"]: + if PROVIDER_DEPENDENCIES[provider_id]["state"] not in ["ready", "suspended", "removed"]: + for dependency in PROVIDER_DEPENDENCIES[provider_id]["deps"]: if dependency.startswith("apache-airflow-providers"): raise Exception( f"The provider {provider_id} is pre-installed and it has as dependency " @@ -159,42 +662,177 @@ def write_git_version(self) -> None: git_version_file.write_text(version) +def _is_devel_extra(extra: str) -> bool: + return extra.startswith("devel") or extra in ["doc", "doc-gen"] + + +GENERATED_DEPENDENCIES_START = "# START OF GENERATED DEPENDENCIES" +GENERATED_DEPENDENCIES_END = "# END OF GENERATED DEPENDENCIES" + + +def convert_to_extra_dependency(dependency: str) -> str: + # if there is version in dependency - remove it as we do not need it in extra specification + # for editable installation + if ">=" in dependency: + dependency = dependency.split(">=")[0] + extra = dependency.replace("apache-airflow-providers-", "").replace("-", "_").replace(".", "_") + return f"apache-airflow[{extra}]" + + +def get_python_exclusion(excluded_python_versions: list[str]): + exclusion = "" + if excluded_python_versions: + separator = ";" + for version in excluded_python_versions: + exclusion += f'{separator}python_version != "{version}"' + separator = " and " + return exclusion + + +def skip_for_editable_build(excluded_python_versions: list[str]) -> bool: + current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}" + if current_python_version in excluded_python_versions: + return True + return False + + class CustomBuildHook(BuildHookInterface[BuilderConfig]): """Custom build hook for Airflow - remove devel extras and adds preinstalled providers.""" + def __init__(self, *args: Any, **kwargs: Any) -> None: + # Stores all dependencies that that any of the airflow extras (including devel) use + self.all_devel_ci_dependencies: set[str] = set() + # All extras that should be included in the wheel package + self.all_non_devel_extras: set[str] = set() + # All extras that should be available in the editable install + self.all_devel_extras: set[str] = set() + self.optional_dependencies: dict[str, list[str]] = {} + self._dependencies: list[str] = [] + super().__init__(*args, **kwargs) + def initialize(self, version: str, build_data: dict[str, Any]) -> None: """ Initialize hook immediately before each build. Any modifications to the build data will be seen by the build target. """ + self._process_all_built_in_extras(version) + self._process_all_provider_extras(version) + + # Adds all-core extras for the extras that are built-in and not devel + self.optional_dependencies["all-core"] = sorted( + set([f"apache-airflow[{extra}]" for extra in CORE_EXTRAS.keys()]) + ) + # Adds "apache-airflow[extra]" for all extras that are not devel extras for wheel and editable builds + self.optional_dependencies["all"] = [ + f"apache-airflow[{extra}]" for extra in sorted(self.all_non_devel_extras) + ] + # Adds all devel extras for the extras that are built-in only for editable builds + if version != "standard": + self.optional_dependencies["devel-all"] = [ + f"apache-airflow[{extra}]" for extra in sorted(self.all_devel_extras) + ] + # This is special dependency that is used to install all possible + # 3rd-party dependencies for airflow for the CI image. It is exposed in the wheel package + # because we want to use for building the image cache from GitHub URL. + self.optional_dependencies["devel-ci"] = sorted(self.all_devel_ci_dependencies) + self._dependencies = DEPENDENCIES + if version == "standard": - all_possible_non_airflow_dependencies = [] - for extra, deps in self.metadata.core.optional_dependencies.items(): - for dep in deps: - if not dep.startswith("apache-airflow"): - all_possible_non_airflow_dependencies.append(dep) - # remove devel dependencies from optional dependencies for standard packages - self.metadata.core._optional_dependencies = { - key: value - for (key, value) in self.metadata.core.optional_dependencies.items() - if not key.startswith("devel") and key not in ["doc", "doc-gen"] - } - # This is the special dependency in wheel package that is used to install all possible - # 3rd-party dependencies for airflow for the CI image. It is exposed in the wheel package - # because we want to use for building the image cache from GitHub URL. - self.metadata.core._optional_dependencies["devel-ci"] = all_possible_non_airflow_dependencies - # Replace editable dependencies with provider dependencies for provider packages - for dependency_id in DEPENDENCIES.keys(): - if DEPENDENCIES[dependency_id]["state"] != "ready": - continue - normalized_dependency_id = dependency_id.replace(".", "-") - self.metadata.core._optional_dependencies[normalized_dependency_id] = [ - f"apache-airflow-providers-{normalized_dependency_id}" - ] # Inject preinstalled providers into the dependencies for standard packages - if self.metadata.core._dependencies: - for provider in PREINSTALLED_PROVIDERS: - self.metadata.core._dependencies.append(provider) - for dependency in PREINSTALLED_NOT_READY_DEPS: - self.metadata.core._dependencies.append(dependency) + for provider in PREINSTALLED_PROVIDERS: + self._dependencies.append(provider) + for not_ready_provider_dependency in PREINSTALLED_NOT_READY_DEPS: + self._dependencies.append(not_ready_provider_dependency) + + # with hatchling, we can modify dependencies dynamically by modifying the build_data + build_data["dependencies"] = self._dependencies + + # unfortunately hatchling currently does not have a way to override optional_dependencies + # via build_data (or so it seem) so we need to modify internal _optional_dependencies + # field in core.metadata until this is possible + self.metadata.core._optional_dependencies = self.optional_dependencies + + def _add_devel_ci_dependencies(self, deps: list[str], python_exclusion: str) -> None: + """ + Add devel_ci_dependencies. + + Adds all external dependencies which are not apache-airflow deps to the list of dependencies + that are going to be added to `devel-ci` extra. + + :param deps: list of dependencies to add + :param version: "standard" or "editable" build. + :param excluded_python_versions: List of python versions to exclude + :param python_exclusion: Python version exclusion string. + """ + for dep in deps: + if not dep.startswith("apache-airflow"): + self.all_devel_ci_dependencies.add(normalize_requirement(dep) + python_exclusion) + + def _process_all_provider_extras(self, version: str) -> None: + """ + Process all provider extras. + + Processes all provider dependencies. This generates dependencies for editable builds + and providers for wheel builds. + + :param version: "standard" or "editable" build. + :return: + """ + for dependency_id in PROVIDER_DEPENDENCIES.keys(): + if PROVIDER_DEPENDENCIES[dependency_id]["state"] != "ready": + continue + excluded_python_versions = PROVIDER_DEPENDENCIES[dependency_id].get("excluded-python-versions") + if version != "standard" and skip_for_editable_build(excluded_python_versions): + continue + normalized_extra_name = normalize_extra(dependency_id) + deps: list[str] = PROVIDER_DEPENDENCIES[dependency_id]["deps"] + + deps = [dep for dep in deps if not dep.startswith("apache-airflow>=")] + devel_deps: list[str] = PROVIDER_DEPENDENCIES[dependency_id].get("devel-deps", []) + + if version == "standard": + # add providers instead of dependencies for wheel builds + self.optional_dependencies[normalized_extra_name] = [ + f"apache-airflow-providers-{normalized_extra_name}" + f"{get_python_exclusion(excluded_python_versions)}" + ] + else: + # for editable packages - add regular + devel dependencies retrieved from provider.yaml + # but convert the provider dependencies to apache-airflow[extras] + # and adding python exclusions where needed + editable_deps = [] + for dep in itertools.chain(deps, devel_deps): + if dep.startswith("apache-airflow-providers-"): + dep = convert_to_extra_dependency(dep) + editable_deps.append(dep) + self.optional_dependencies[normalized_extra_name] = sorted(set(editable_deps)) + self._add_devel_ci_dependencies(editable_deps, python_exclusion="") + self.all_devel_extras.add(normalized_extra_name) + self.all_non_devel_extras.add(normalized_extra_name) + + def _process_all_built_in_extras(self, version: str) -> None: + """ + Process all built-in extras. + + Adds all core extras (for editable builds) minus devel and doc extras (for wheel builds) + to the list of dependencies. It also builds the list of all non-devel built-in extras that will be + used to produce "all" extra. + + :param version: "standard" or "editable" build. + :return: + """ + for dict, _ in ALL_DYNAMIC_EXTRA_DICTS: + for extra, deps in dict.items(): + self.all_devel_extras.add(extra) + self._add_devel_ci_dependencies(deps, python_exclusion="") + if dict not in [DEPRECATED_EXTRAS, DEVEL_EXTRAS, DOC_EXTRAS]: + # do not add deprecated extras to "all" extras + self.all_non_devel_extras.add(extra) + if version == "standard": + # for wheel builds we skip devel and doc extras + if dict not in [DEVEL_EXTRAS, DOC_EXTRAS]: + self.optional_dependencies[extra] = deps + else: + # for editable builds we add all extras + self.optional_dependencies[extra] = deps diff --git a/pyproject.toml b/pyproject.toml index cd6aa8b6a555f..55f9592eccf86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,8 +35,6 @@ build-backend = "hatchling.build" [project] name = "apache-airflow" -dynamic = ["version"] - description = "Programmatically author, schedule and monitor data pipelines" readme = { file = "generated/PYPI_README.md", content-type = "text/markdown" } license-files.globs = ["LICENSE", "3rd-party-licenses/*.txt"] @@ -62,1146 +60,88 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Topic :: System :: Monitoring", ] -# When you remove a dependency from the list, you should also make sure to add the dependency to be removed -# in the scripts/docker/install_airflow_dependencies_from_branch_tip.sh script DEPENDENCIES_TO_REMOVE -# in order to make sure the dependency is not installed in the CI image build process from the main -# of Airflow branch. After your PR is merged, you should remove it from the list there. -dependencies = [ - # Alembic is important to handle our migrations in predictable and performant way. It is developed - # together with SQLAlchemy. Our experience with Alembic is that it very stable in minor version - # The 1.13.0 of alembic marked some migration code as SQLAlchemy 2+ only so we limit it to 1.13.1 - "alembic>=1.13.1, <2.0", - "argcomplete>=1.10", - "asgiref", - "attrs>=22.1.0", - # Blinker use for signals in Flask, this is an optional dependency in Flask 2.2 and lower. - # In Flask 2.3 it becomes a mandatory dependency, and flask signals are always available. - "blinker>=1.6.2", - # Colorlog 6.x merges TTYColoredFormatter into ColoredFormatter, breaking backwards compatibility with 4.x - # Update CustomTTYColoredFormatter to remove - "colorlog>=4.0.2, <5.0", - "configupdater>=3.1.1", - # `airflow/www/extensions/init_views` imports `connexion.decorators.validation.RequestBodyValidator` - # connexion v3 has refactored the entire module to middleware, see: /spec-first/connexion/issues/1525 - # Specifically, RequestBodyValidator was removed in: /spec-first/connexion/pull/1595 - # The usage was added in #30596, seemingly only to override and improve the default error message. - # Either revert that change or find another way, preferably without using connexion internals. - # This limit can be removed after https://github.com/apache/airflow/issues/35234 is fixed - "connexion[flask]>=2.10.0,<3.0", - "cron-descriptor>=1.2.24", - "croniter>=2.0.2", - "cryptography>=39.0.0", - "deprecated>=1.2.13", - "dill>=0.2.2", - "flask-caching>=1.5.0", - # Flask-Session 0.6 add new arguments into the SqlAlchemySessionInterface constructor as well as - # all parameters now are mandatory which make AirflowDatabaseSessionInterface incopatible with this version. - "flask-session>=0.4.0,<0.6", - "flask-wtf>=0.15", - # Flask 2.3 is scheduled to introduce a number of deprecation removals - some of them might be breaking - # for our dependencies - notably `_app_ctx_stack` and `_request_ctx_stack` removals. - # We should remove the limitation after 2.3 is released and our dependencies are updated to handle it - "flask>=2.2,<2.3", - "fsspec>=2023.10.0", - "google-re2>=1.0", - "gunicorn>=20.1.0", - "httpx", - "importlib_metadata>=1.7;python_version<\"3.9\"", - # Importib_resources 6.2.0-6.3.1 break pytest_rewrite - # see https://github.com/python/importlib_resources/issues/299 - "importlib_resources>=5.2,!=6.2.0,!=6.3.0,!=6.3.1;python_version<\"3.9\"", - "itsdangerous>=2.0", - "jinja2>=3.0.0", - "jsonschema>=4.18.0", - "lazy-object-proxy", - "linkify-it-py>=2.0.0", - "lockfile>=0.12.2", - "markdown-it-py>=2.1.0", - "markupsafe>=1.1.1", - "marshmallow-oneofschema>=2.0.1", - "mdit-py-plugins>=0.3.0", - "opentelemetry-api>=1.15.0", - "opentelemetry-exporter-otlp", - "packaging>=14.0", - "pathspec>=0.9.0", - "pendulum>=2.1.2,<4.0", - "pluggy>=1.0", - "psutil>=4.2.0", - "pygments>=2.0.1", - "pyjwt>=2.0.0", - "python-daemon>=3.0.0", - "python-dateutil>=2.3", - "python-nvd3>=0.15.0", - "python-slugify>=5.0", - # Requests 3 if it will be released, will be heavily breaking. - "requests>=2.27.0,<3", - "rfc3339-validator>=0.1.4", - "rich-argparse>=1.0.0", - "rich>=12.4.4", - "setproctitle>=1.1.8", - # We use some deprecated features of sqlalchemy 2.0 and we should replace them before we can upgrade - # See https://sqlalche.me/e/b8d9 for details of deprecated features - # you can set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. - # The issue tracking it is https://github.com/apache/airflow/issues/28723 - "sqlalchemy>=1.4.36,<2.0", - "sqlalchemy-jsonfield>=1.0", - "tabulate>=0.7.5", - "tenacity>=6.2.0,!=8.2.0", - "termcolor>=1.1.0", - # We should remove this dependency when Providers are limited to Airflow 2.7+ - # as we replaced the usage of unicodecsv with csv in Airflow 2.7 - # See https://github.com/apache/airflow/pull/31693 - # We should also remove "licenses/LICENSE-unicodecsv.txt" file when we remove this dependency - "unicodecsv>=0.14.1", - # The Universal Pathlib provides Pathlib-like interface for FSSPEC - "universal-pathlib>=0.2.2", - # Werkzug 3 breaks Flask-Login 0.6.2, also connexion needs to be updated to >= 3.0 - # we should remove this limitation when FAB supports Flask 2.3 and we migrate connexion to 3+ - "werkzeug>=2.0,<3", -] -[project.optional-dependencies] -# Here manually managed extras start -# Those extras are manually managed and should be updated when needed +dynamic = ["version", "optional-dependencies", "dependencies"] + +# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# !!! YOU MIGHT BE SURPRISED NOT SEEING THE DEPENDENCIES AS `project.dependencies` !!!!!!!!! +# !!! AND EXTRAS AS `project.optional-dependencies` !!!!!!!!! +# !!! THEY ARE marked as `dynamic` GENERATED by `hatch_build.py` !!!!!!!!! +# !!! SEE COMMENTS BELOW TO FIND WHERE DEPENDENCIES ARE MAINTAINED !!!!!!!!! +# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # -# START OF core extras +# !!!!!! Those provuders are defined in `hatch_build.py` and should be maintained there !!!!!!! # -# This required for AWS deferrable operators. -# There is conflict between boto3 and aiobotocore dependency botocore. -# TODO: We can remove it once boto3 and aiobotocore both have compatible botocore version or -# boto3 have native aync support and we move away from aio aiobotocore +# Those extras are available as regular core airflow extras - they install optional features of Airflow. # -aiobotocore = [ - "aiobotocore>=2.7.0", -] -async = [ - "eventlet>=0.33.3", - "gevent>=0.13", - "greenlet>=0.4.9", -] -cgroups = [ - # Cgroupspy 0.2.2 added Python 3.10 compatibility - "cgroupspy>=0.2.2", -] -deprecated-api = [ - "requests>=2.27.0,<3", -] -github-enterprise = [ - "apache-airflow[fab]", - "authlib>=1.0.0", -] -google-auth = [ - "apache-airflow[fab]", - "authlib>=1.0.0", -] -graphviz = [ - "graphviz>=0.12", -] -kerberos = [ - "pykerberos>=1.1.13", - "requests-kerberos>=0.10.0", - "thrift-sasl>=0.2.0", -] -ldap = [ - "ldap3>=2.5.1", - "python-ldap", -] -leveldb = [ - "plyvel", -] -otel = [ - "opentelemetry-exporter-prometheus", -] -pandas = [ - # In pandas 2.2 minimal version of the sqlalchemy is 2.0 - # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies - # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 - # In addition FAB also limit sqlalchemy to < 2.0 - "pandas>=1.2.5,<2.2", -] -password = [ - "bcrypt>=2.0.0", - "flask-bcrypt>=0.7.1", -] -pydantic = [ - "pydantic>=2.3.0", -] -rabbitmq = [ - "amqp", -] -s3fs = [ - # This is required for support of S3 file system which uses aiobotocore - # which can have a conflict with boto3 as mentioned in aiobotocore extra - "s3fs>=2023.10.0", -] -saml = [ - # This is required for support of SAML which might be used by some providers (e.g. Amazon) - "python3-saml>=1.16.0", -] -sentry = [ - "blinker>=1.1", - # Sentry SDK 1.33 is broken when greenlets are installed and fails to import - # See https://github.com/getsentry/sentry-python/issues/2473 - "sentry-sdk>=1.32.0,!=1.33.0", -] -statsd = [ - "statsd>=3.3.0", -] -uv = [ - "uv>=0.1.24", -] -virtualenv = [ - "virtualenv", -] -# END OF core extras -# START OF Apache no provider extras -apache-atlas = [ - "atlasclient>=0.1.2", -] -apache-webhdfs = [ - "hdfs[avro,dataframe,kerberos]>=2.0.4", -] -# END OF Apache no provider extras -all-core = [ - "apache-airflow[aiobotocore]", - "apache-airflow[apache-atlas]", - "apache-airflow[async]", - "apache-airflow[cgroups]", - "apache-airflow[deprecated-api]", - "apache-airflow[github-enterprise]", - "apache-airflow[google-auth]", - "apache-airflow[graphviz]", - "apache-airflow[kerberos]", - "apache-airflow[ldap]", - "apache-airflow[leveldb]", - "apache-airflow[otel]", - "apache-airflow[pandas]", - "apache-airflow[password]", - "apache-airflow[pydantic]", - "apache-airflow[rabbitmq]", - "apache-airflow[s3fs]", - "apache-airflow[saml]", - "apache-airflow[sentry]", - "apache-airflow[statsd]", - "apache-airflow[apache-webhdfs]", - "apache-airflow[virtualenv]", -] -# START OF devel extras -devel-debuggers = [ - "ipdb>=0.13.13", -] -devel-devscripts = [ - "click>=8.0", - "gitpython>=3.1.40", - "hatch>=1.9.1", - "pipdeptree>=2.13.1", - "pygithub>=2.1.1", - "restructuredtext-lint>=1.4.0", - "rich-click>=1.7.0", - "semver>=3.0.2", - "towncrier>=23.11.0", - "twine>=4.0.2", -] -devel-duckdb = [ - # Python 3.12 support was added in 0.10.0 - "duckdb>=0.10.0; python_version >= '3.12'", - "duckdb>=0.9.0; python_version < '3.12'", -] -# Mypy 0.900 and above ships only with stubs from stdlib so if we need other stubs, we need to install them -# manually as `types-*`. See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -# for details. We want to install them explicitly because we want to eventually move to -# mypyd which does not support installing the types dynamically with --install-types -devel-mypy = [ - # TODO: upgrade to newer versions of MyPy continuously as they are released - # Make sure to upgrade the mypy version in update-common-sql-api-stubs in .pre-commit-config.yaml - # when you upgrade it here !!!! - "mypy==1.9.0", - "types-Deprecated", - "types-Markdown", - "types-PyMySQL", - "types-PyYAML", - "types-aiofiles", - "types-certifi", - "types-croniter", - "types-docutils", - "types-paramiko", - "types-protobuf", - "types-python-dateutil", - "types-python-slugify", - "types-pytz", - "types-redis", - "types-requests", - "types-setuptools", - "types-tabulate", - "types-termcolor", - "types-toml", -] -devel-sentry = [ - "blinker>=1.7.0", -] -devel-static-checks = [ - "black>=23.12.0", - "pre-commit>=3.5.0", - "ruff==0.3.4", - "yamllint>=1.33.0", -] -devel-tests = [ - "aiofiles>=23.2.0", - "aioresponses>=0.7.6", - "backports.zoneinfo>=0.2.1;python_version<'3.9'", - "beautifulsoup4>=4.7.1", - # Coverage 7.4.0 added experimental support for Python 3.12 PEP669 which we use in Airflow - "coverage>=7.4.0", - "pytest-asyncio>=0.23.3", - "pytest-cov>=4.1.0", - "pytest-icdiff>=0.9", - "pytest-instafail>=0.5.0", - "pytest-mock>=3.12.0", - "pytest-rerunfailures>=13.0", - "pytest-timeouts>=1.2.1", - "pytest-xdist>=3.5.0", - # Temporary upper limmit to <8, not all dependencies at that moment ready to use 8.0 - # Internal meta-task for track https://github.com/apache/airflow/issues/37156 - "pytest>=7.4.4,<8.0", - "requests_mock>=1.11.0", - "time-machine>=2.13.0", - "wheel>=0.42.0", -] -# END OF devel extras -# START OF doc extras -doc = [ - "astroid>=2.12.3,<3.0", - "checksumdir>=1.2.0", - # click 8.1.4 and 8.1.5 generate mypy errors due to typing issue in the upstream package: - # https://github.com/pallets/click/issues/2558 - "click>=8.0,!=8.1.4,!=8.1.5", - # Docutils 0.17.0 converts generated
into
and breaks our doc formatting - # By adding a lot of whitespace separation. This limit can be lifted when we update our doc to handle - #
tags for sections - "docutils<0.17,>=0.16", - "sphinx-airflow-theme>=0.0.12", - "sphinx-argparse>=0.4.0", - # sphinx-autoapi fails with astroid 3.0, see: https://github.com/readthedocs/sphinx-autoapi/issues/407 - # This was fixed in sphinx-autoapi 3.0, however it has requirement sphinx>=6.1, but we stuck on 5.x - "sphinx-autoapi>=2.1.1", - "sphinx-copybutton>=0.5.2", - "sphinx-design>=0.5.0", - "sphinx-jinja>=2.0.2", - "sphinx-rtd-theme>=2.0.0", - # Currently we are using sphinx 5 but we need to migrate to Sphinx 7 - "sphinx>=5.3.0,<6.0.0", - "sphinxcontrib-applehelp>=1.0.4", - "sphinxcontrib-devhelp>=1.0.2", - "sphinxcontrib-htmlhelp>=2.0.1", - "sphinxcontrib-httpdomain>=1.8.1", - "sphinxcontrib-jquery>=4.1", - "sphinxcontrib-jsmath>=1.0.1", - "sphinxcontrib-qthelp>=1.0.3", - "sphinxcontrib-redoc>=1.6.0", - "sphinxcontrib-serializinghtml==1.1.5", - "sphinxcontrib-spelling>=8.0.0", -] -doc-gen = [ - "apache-airflow[doc]", - "eralchemy2>=1.3.8", -] -# END OF doc extras -# START OF bundle extras -all-dbs = [ - "apache-airflow[apache-cassandra]", - "apache-airflow[apache-drill]", - "apache-airflow[apache-druid]", - "apache-airflow[apache-hdfs]", - "apache-airflow[apache-hive]", - "apache-airflow[apache-impala]", - "apache-airflow[apache-pinot]", - "apache-airflow[arangodb]", - "apache-airflow[cloudant]", - "apache-airflow[databricks]", - "apache-airflow[exasol]", - "apache-airflow[influxdb]", - "apache-airflow[microsoft-mssql]", - "apache-airflow[mongo]", - "apache-airflow[mysql]", - "apache-airflow[neo4j]", - "apache-airflow[postgres]", - "apache-airflow[presto]", - "apache-airflow[trino]", - "apache-airflow[vertica]", -] -devel = [ - "apache-airflow[celery]", - "apache-airflow[cncf-kubernetes]", - "apache-airflow[common-io]", - "apache-airflow[common-sql]", - "apache-airflow[devel-debuggers]", - "apache-airflow[devel-devscripts]", - "apache-airflow[devel-duckdb]", - "apache-airflow[devel-mypy]", - "apache-airflow[devel-sentry]", - "apache-airflow[devel-static-checks]", - "apache-airflow[devel-tests]", - "apache-airflow[fab]", - "apache-airflow[ftp]", - "apache-airflow[http]", - "apache-airflow[imap]", - "apache-airflow[sqlite]", -] -devel-all-dbs = [ - "apache-airflow[apache-cassandra]", - "apache-airflow[apache-drill]", - "apache-airflow[apache-druid]", - "apache-airflow[apache-hdfs]", - "apache-airflow[apache-hive]", - "apache-airflow[apache-impala]", - "apache-airflow[apache-pinot]", - "apache-airflow[arangodb]", - "apache-airflow[cloudant]", - "apache-airflow[databricks]", - "apache-airflow[exasol]", - "apache-airflow[influxdb]", - "apache-airflow[microsoft-mssql]", - "apache-airflow[mongo]", - "apache-airflow[mysql]", - "apache-airflow[neo4j]", - "apache-airflow[postgres]", - "apache-airflow[presto]", - "apache-airflow[trino]", - "apache-airflow[vertica]", -] -devel-ci = [ - "apache-airflow[devel-all]", -] -devel-hadoop = [ - "apache-airflow[apache-hdfs]", - "apache-airflow[apache-hive]", - "apache-airflow[apache-impala]", - "apache-airflow[devel]", - "apache-airflow[hdfs]", - "apache-airflow[kerberos]", - "apache-airflow[presto]", -] -# END OF bundle extras -############################################################################################################# -# The whole section can be removed in Airflow 3.0 as those old aliases are deprecated in 2.* series -############################################################################################################# -# START OF deprecated extras -atlas = [ - "apache-airflow[apache-atlas]", -] -aws = [ - "apache-airflow[amazon]", -] -azure = [ - "apache-airflow[microsoft-azure]", -] -cassandra = [ - "apache-airflow[apache-cassandra]", -] -# Empty alias extra just for backward compatibility with Airflow 1.10 -crypto = [ -] -druid = [ - "apache-airflow[apache-druid]", -] -gcp = [ - "apache-airflow[google]", -] -gcp_api = [ - "apache-airflow[google]", -] -hdfs = [ - "apache-airflow[apache-hdfs]", -] -hive = [ - "apache-airflow[apache-hive]", -] -kubernetes = [ - "apache-airflow[cncf-kubernetes]", -] -mssql = [ - "apache-airflow[microsoft-mssql]", -] -pinot = [ - "apache-airflow[apache-pinot]", -] -s3 = [ - "apache-airflow[amazon]", -] -spark = [ - "apache-airflow[apache-spark]", -] -webhdfs = [ - "apache-airflow[apache-webhdfs]", -] -winrm = [ - "apache-airflow[microsoft-winrm]", -] -# END OF deprecated extras -############################################################################################################# -# The whole section below is automatically generated by `update-providers-dependencies` pre-commit based -# on `provider.yaml` files present in the `providers` subdirectories. The `provider.yaml` files are -# A single source of truth for provider dependencies, +# START CORE EXTRAS HERE # -# PLEASE DO NOT MODIFY THIS SECTION MANUALLY. IT WILL BE OVERWRITTEN BY PRE-COMMIT !! -# If you want to modify these - modify the corresponding provider.yaml instead. -############################################################################################################# -# START OF GENERATED DEPENDENCIES -airbyte = [ # source: airflow/providers/airbyte/provider.yaml - "apache-airflow[http]", -] -alibaba = [ # source: airflow/providers/alibaba/provider.yaml - "alibabacloud_adb20211201>=1.0.0", - "alibabacloud_tea_openapi>=0.3.7", - "oss2>=2.14.0", -] -amazon = [ # source: airflow/providers/amazon/provider.yaml - "PyAthena>=3.0.10", - "apache-airflow[common_sql]", - "apache-airflow[http]", - "asgiref", - "boto3>=1.33.0", - "botocore>=1.33.0", - "inflection>=0.5.1", - "jsonpath_ng>=1.5.3", - "redshift_connector>=2.0.918", - "sqlalchemy_redshift>=0.8.6", - "watchtower>=2.0.1,<4", - # Devel dependencies for the amazon provider - "aiobotocore>=2.7.0", - "aws_xray_sdk>=2.12.0", - "moto[cloudformation,glue]>=5.0.0", - "mypy-boto3-appflow>=1.33.0", - "mypy-boto3-rds>=1.33.0", - "mypy-boto3-redshift-data>=1.33.0", - "mypy-boto3-s3>=1.33.0", - "s3fs>=2023.10.0", - "openapi-schema-validator>=0.6.2", - "openapi-spec-validator>=0.7.1", -] -apache-beam = [ # source: airflow/providers/apache/beam/provider.yaml - "apache-beam>=2.53.0;python_version != \"3.12\"", - "pyarrow>=14.0.1;python_version != \"3.12\"", -] -apache-cassandra = [ # source: airflow/providers/apache/cassandra/provider.yaml - "cassandra-driver>=3.29.1", -] -apache-drill = [ # source: airflow/providers/apache/drill/provider.yaml - "apache-airflow[common_sql]", - "sqlalchemy-drill>=1.1.0", -] -apache-druid = [ # source: airflow/providers/apache/druid/provider.yaml - "apache-airflow[common_sql]", - "pydruid>=0.4.1", -] -apache-flink = [ # source: airflow/providers/apache/flink/provider.yaml - "apache-airflow[cncf_kubernetes]", - "cryptography>=2.0.0", -] -apache-hdfs = [ # source: airflow/providers/apache/hdfs/provider.yaml - "hdfs[avro,dataframe,kerberos]>=2.0.4", -] -apache-hive = [ # source: airflow/providers/apache/hive/provider.yaml - "apache-airflow[common_sql]", - "hmsclient>=0.1.0", - "pandas>=1.2.5,<2.2", - "pyhive[hive_pure_sasl]>=0.7.0", - "thrift>=0.9.2", -] -apache-impala = [ # source: airflow/providers/apache/impala/provider.yaml - "impyla>=0.18.0,<1.0", -] -apache-kafka = [ # source: airflow/providers/apache/kafka/provider.yaml - "asgiref", - "confluent-kafka>=1.8.2", -] -apache-kylin = [ # source: airflow/providers/apache/kylin/provider.yaml - "kylinpy>=2.6", -] -apache-livy = [ # source: airflow/providers/apache/livy/provider.yaml - "aiohttp>=3.9.2", - "apache-airflow[http]", - "asgiref", -] -apache-pig = [] # source: airflow/providers/apache/pig/provider.yaml -apache-pinot = [ # source: airflow/providers/apache/pinot/provider.yaml - "apache-airflow[common_sql]", - "pinotdb>=5.1.0", -] -apache-spark = [ # source: airflow/providers/apache/spark/provider.yaml - "grpcio-status>=1.59.0", - "pyspark", -] -apprise = [ # source: airflow/providers/apprise/provider.yaml - "apprise", -] -arangodb = [ # source: airflow/providers/arangodb/provider.yaml - "python-arango>=7.3.2", -] -asana = [ # source: airflow/providers/asana/provider.yaml - "asana>=0.10,<4.0.0", -] -atlassian-jira = [ # source: airflow/providers/atlassian/jira/provider.yaml - "atlassian-python-api>=1.14.2,!=3.41.6", - "beautifulsoup4", -] -celery = [ # source: airflow/providers/celery/provider.yaml - "celery[redis]>=5.3.0,<6,!=5.3.3,!=5.3.2", - "flower>=1.0.0", - "google-re2>=1.0", -] -cloudant = [ # source: airflow/providers/cloudant/provider.yaml - "cloudant>=2.0", -] -cncf-kubernetes = [ # source: airflow/providers/cncf/kubernetes/provider.yaml - "aiofiles>=23.2.0", - "asgiref>=3.5.2", - "cryptography>=2.0.0", - "google-re2>=1.0", - "kubernetes>=28.1.0,<=29.0.0", - "kubernetes_asyncio>=28.1.0,<=29.0.0", -] -cohere = [ # source: airflow/providers/cohere/provider.yaml - "cohere>=4.37,<5", -] -common-io = [] # source: airflow/providers/common/io/provider.yaml -common-sql = [ # source: airflow/providers/common/sql/provider.yaml - "more-itertools>=9.0.0", - "sqlparse>=0.4.2", -] -databricks = [ # source: airflow/providers/databricks/provider.yaml - "aiohttp>=3.9.2, <4", - "apache-airflow[common_sql]", - "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0", - "requests>=2.27.0,<3", - # Devel dependencies for the databricks provider - "deltalake>=0.12.0", -] -datadog = [ # source: airflow/providers/datadog/provider.yaml - "datadog>=0.14.0", -] -dbt-cloud = [ # source: airflow/providers/dbt/cloud/provider.yaml - "aiohttp>=3.9.2", - "apache-airflow[http]", - "asgiref", -] -dingding = [ # source: airflow/providers/dingding/provider.yaml - "apache-airflow[http]", -] -discord = [ # source: airflow/providers/discord/provider.yaml - "apache-airflow[http]", -] -docker = [ # source: airflow/providers/docker/provider.yaml - "docker>=6", - "python-dotenv>=0.21.0", -] -elasticsearch = [ # source: airflow/providers/elasticsearch/provider.yaml - "apache-airflow[common_sql]", - "elasticsearch>=8.10,<9", -] -exasol = [ # source: airflow/providers/exasol/provider.yaml - "apache-airflow[common_sql]", - "pandas>=1.2.5,<2.2", - "pyexasol>=0.5.1", -] -fab = [ # source: airflow/providers/fab/provider.yaml - "flask-appbuilder==4.4.1", - "flask-login>=0.6.2", - "flask>=2.2,<2.3", - "google-re2>=1.0", -] -facebook = [ # source: airflow/providers/facebook/provider.yaml - "facebook-business>=6.0.2", -] -ftp = [] # source: airflow/providers/ftp/provider.yaml -github = [ # source: airflow/providers/github/provider.yaml - "PyGithub!=1.58", -] -google = [ # source: airflow/providers/google/provider.yaml - "PyOpenSSL", - "apache-airflow[common_sql]", - "asgiref>=3.5.2", - "gcloud-aio-auth>=4.0.0,<5.0.0", - "gcloud-aio-bigquery>=6.1.2", - "gcloud-aio-storage>=9.0.0", - "gcsfs>=2023.10.0", - "google-ads>=23.1.0", - "google-analytics-admin", - "google-api-core>=2.11.0,!=2.16.0", - "google-api-python-client>=1.6.0", - "google-auth-httplib2>=0.0.1", - "google-auth>=1.0.0", - "google-cloud-aiplatform>=1.42.1", - "google-cloud-batch>=0.13.0", - "google-cloud-bigquery-datatransfer>=3.13.0", - "google-cloud-bigtable>=2.17.0", - "google-cloud-build>=3.22.0", - "google-cloud-compute>=1.10.0", - "google-cloud-container>=2.17.4", - "google-cloud-datacatalog>=3.11.1", - "google-cloud-dataflow-client>=0.8.6", - "google-cloud-dataform>=0.5.0", - "google-cloud-dataplex>=1.10.0", - "google-cloud-dataproc-metastore>=1.12.0", - "google-cloud-dataproc>=5.8.0", - "google-cloud-dlp>=3.12.0", - "google-cloud-kms>=2.15.0", - "google-cloud-language>=2.9.0", - "google-cloud-logging>=3.5.0", - "google-cloud-memcache>=1.7.0", - "google-cloud-monitoring>=2.18.0", - "google-cloud-orchestration-airflow>=1.10.0", - "google-cloud-os-login>=2.9.1", - "google-cloud-pubsub>=2.19.0", - "google-cloud-redis>=2.12.0", - "google-cloud-run>=0.9.0", - "google-cloud-secret-manager>=2.16.0", - "google-cloud-spanner>=3.11.1", - "google-cloud-speech>=2.18.0", - "google-cloud-storage-transfer>=1.4.1", - "google-cloud-storage>=2.7.0", - "google-cloud-tasks>=2.13.0", - "google-cloud-texttospeech>=2.14.1", - "google-cloud-translate>=3.11.0", - "google-cloud-videointelligence>=2.11.0", - "google-cloud-vision>=3.4.0", - "google-cloud-workflows>=1.10.0", - "grpcio-gcp>=0.2.2", - "httpx", - "json-merge-patch>=0.2", - "looker-sdk>=22.2.0", - "pandas-gbq", - "pandas>=1.2.5,<2.2", - "proto-plus>=1.19.6", - "python-slugify>=5.0", - "sqlalchemy-bigquery>=1.2.1", - "sqlalchemy-spanner>=1.6.2", -] -grpc = [ # source: airflow/providers/grpc/provider.yaml - "google-auth-httplib2>=0.0.1", - "google-auth>=1.0.0, <3.0.0", - "grpcio>=1.15.0", -] -hashicorp = [ # source: airflow/providers/hashicorp/provider.yaml - "hvac>=1.1.0", -] -http = [ # source: airflow/providers/http/provider.yaml - "aiohttp>=3.9.2", - "asgiref", - "requests>=2.27.0,<3", - "requests_toolbelt", -] -imap = [] # source: airflow/providers/imap/provider.yaml -influxdb = [ # source: airflow/providers/influxdb/provider.yaml - "influxdb-client>=1.19.0", - "requests>=2.27.0,<3", -] -jdbc = [ # source: airflow/providers/jdbc/provider.yaml - "apache-airflow[common_sql]", - "jaydebeapi>=1.1.1", -] -jenkins = [ # source: airflow/providers/jenkins/provider.yaml - "python-jenkins>=1.0.0", -] -microsoft-azure = [ # source: airflow/providers/microsoft/azure/provider.yaml - "adal>=1.2.7", - "adlfs>=2023.10.0", - "azure-batch>=8.0.0", - "azure-cosmos>=4.0.0,<4.6.0", - "azure-datalake-store>=0.0.45", - "azure-identity>=1.3.1", - "azure-keyvault-secrets>=4.1.0", - "azure-kusto-data>=4.1.0", - "azure-mgmt-containerinstance>=9.0.0", - "azure-mgmt-containerregistry>=8.0.0", - "azure-mgmt-cosmosdb", - "azure-mgmt-datafactory>=2.0.0", - "azure-mgmt-datalake-store>=0.5.0", - "azure-mgmt-resource>=2.2.0", - "azure-mgmt-storage>=16.0.0", - "azure-servicebus>=7.12.1", - "azure-storage-blob>=12.14.0", - "azure-storage-file-datalake>=12.9.1", - "azure-storage-file-share", - "azure-synapse-artifacts>=0.17.0", - "azure-synapse-spark", - # Devel dependencies for the microsoft.azure provider - "pywinrm", -] -microsoft-mssql = [ # source: airflow/providers/microsoft/mssql/provider.yaml - "apache-airflow[common_sql]", - "pymssql>=2.1.8", -] -microsoft-psrp = [ # source: airflow/providers/microsoft/psrp/provider.yaml - "pypsrp>=0.8.0", -] -microsoft-winrm = [ # source: airflow/providers/microsoft/winrm/provider.yaml - "pywinrm>=0.4", -] -mongo = [ # source: airflow/providers/mongo/provider.yaml - "dnspython>=1.13.0", - "pymongo>=3.6.0", - # Devel dependencies for the mongo provider - "mongomock", -] -mysql = [ # source: airflow/providers/mysql/provider.yaml - "apache-airflow[common_sql]", - "mysql-connector-python>=8.0.29", - "mysqlclient>=1.3.6", -] -neo4j = [ # source: airflow/providers/neo4j/provider.yaml - "neo4j>=4.2.1", -] -odbc = [ # source: airflow/providers/odbc/provider.yaml - "apache-airflow[common_sql]", - "pyodbc", -] -openai = [ # source: airflow/providers/openai/provider.yaml - "openai[datalib]>=1.0", -] -openfaas = [] # source: airflow/providers/openfaas/provider.yaml -openlineage = [ # source: airflow/providers/openlineage/provider.yaml - "apache-airflow[common_sql]", - "attrs>=22.2", - "openlineage-integration-common>=0.28.0", - "openlineage-python>=0.28.0", -] -opensearch = [ # source: airflow/providers/opensearch/provider.yaml - "opensearch-py>=2.2.0", -] -opsgenie = [ # source: airflow/providers/opsgenie/provider.yaml - "opsgenie-sdk>=2.1.5", -] -oracle = [ # source: airflow/providers/oracle/provider.yaml - "apache-airflow[common_sql]", - "oracledb>=1.0.0", -] -pagerduty = [ # source: airflow/providers/pagerduty/provider.yaml - "pdpyras>=4.1.2", -] -papermill = [ # source: airflow/providers/papermill/provider.yaml - "ipykernel;python_version != \"3.12\"", - "papermill[all]>=2.4.0;python_version != \"3.12\"", - "scrapbook[all];python_version != \"3.12\"", -] -pgvector = [ # source: airflow/providers/pgvector/provider.yaml - "apache-airflow[postgres]", - "pgvector>=0.2.3", -] -pinecone = [ # source: airflow/providers/pinecone/provider.yaml - "pinecone-client>=2.2.4,<3.0", -] -postgres = [ # source: airflow/providers/postgres/provider.yaml - "apache-airflow[common_sql]", - "psycopg2-binary>=2.8.0", -] -presto = [ # source: airflow/providers/presto/provider.yaml - "apache-airflow[common_sql]", - "pandas>=1.2.5,<2.2", - "presto-python-client>=0.8.4", -] -qdrant = [ # source: airflow/providers/qdrant/provider.yaml - "qdrant_client>=1.7.0", -] -redis = [ # source: airflow/providers/redis/provider.yaml - "redis>=4.5.2,!=4.5.5,!=5.0.2", -] -salesforce = [ # source: airflow/providers/salesforce/provider.yaml - "pandas>=1.2.5,<2.2", - "simple-salesforce>=1.0.0", -] -samba = [ # source: airflow/providers/samba/provider.yaml - "smbprotocol>=1.5.0", -] -segment = [ # source: airflow/providers/segment/provider.yaml - "analytics-python>=1.2.9", -] -sendgrid = [ # source: airflow/providers/sendgrid/provider.yaml - "sendgrid>=6.0.0", -] -sftp = [ # source: airflow/providers/sftp/provider.yaml - "apache-airflow[ssh]", - "asyncssh>=2.12.0", - "paramiko>=2.8.0", -] -singularity = [ # source: airflow/providers/singularity/provider.yaml - "spython>=0.0.56", -] -slack = [ # source: airflow/providers/slack/provider.yaml - "apache-airflow[common_sql]", - "slack_sdk>=3.19.0", -] -smtp = [] # source: airflow/providers/smtp/provider.yaml -snowflake = [ # source: airflow/providers/snowflake/provider.yaml - "apache-airflow[common_sql]", - "snowflake-connector-python>=2.7.8", - "snowflake-sqlalchemy>=1.1.0", -] -sqlite = [ # source: airflow/providers/sqlite/provider.yaml - "apache-airflow[common_sql]", -] -ssh = [ # source: airflow/providers/ssh/provider.yaml - "paramiko>=2.6.0", - "sshtunnel>=0.3.2", -] -tableau = [ # source: airflow/providers/tableau/provider.yaml - "tableauserverclient", -] -tabular = [ # source: airflow/providers/tabular/provider.yaml - # Devel dependencies for the tabular provider - "pyiceberg>=0.5.0", -] -telegram = [ # source: airflow/providers/telegram/provider.yaml - "python-telegram-bot>=20.2", -] -teradata = [ # source: airflow/providers/teradata/provider.yaml - "apache-airflow[common_sql]", - "teradatasql>=17.20.0.28", - "teradatasqlalchemy>=17.20.0.0", -] -trino = [ # source: airflow/providers/trino/provider.yaml - "apache-airflow[common_sql]", - "pandas>=1.2.5,<2.2", - "trino>=0.318.0", -] -vertica = [ # source: airflow/providers/vertica/provider.yaml - "apache-airflow[common_sql]", - "vertica-python>=0.5.1", -] -weaviate = [ # source: airflow/providers/weaviate/provider.yaml - "pandas>=1.2.5,<2.2", - "weaviate-client>=3.24.2", -] -yandex = [ # source: airflow/providers/yandex/provider.yaml - "yandex-query-client>=0.1.2", - "yandexcloud>=0.228.0", -] -zendesk = [ # source: airflow/providers/zendesk/provider.yaml - "zenpy>=2.0.40", -] -all = [ - # core extras - "apache-airflow[aiobotocore]", - "apache-airflow[async]", - "apache-airflow[cgroups]", - "apache-airflow[deprecated-api]", - "apache-airflow[github-enterprise]", - "apache-airflow[google-auth]", - "apache-airflow[graphviz]", - "apache-airflow[kerberos]", - "apache-airflow[ldap]", - "apache-airflow[leveldb]", - "apache-airflow[otel]", - "apache-airflow[pandas]", - "apache-airflow[password]", - "apache-airflow[pydantic]", - "apache-airflow[rabbitmq]", - "apache-airflow[s3fs]", - "apache-airflow[saml]", - "apache-airflow[sentry]", - "apache-airflow[statsd]", - "apache-airflow[uv]", - "apache-airflow[virtualenv]", - # Apache no provider extras - "apache-airflow[apache-atlas]", - "apache-airflow[apache-webhdfs]", - "apache-airflow[all-core]", - # Provider extras - "apache-airflow[airbyte]", - "apache-airflow[alibaba]", - "apache-airflow[amazon]", - "apache-airflow[apache-beam]", - "apache-airflow[apache-cassandra]", - "apache-airflow[apache-drill]", - "apache-airflow[apache-druid]", - "apache-airflow[apache-flink]", - "apache-airflow[apache-hdfs]", - "apache-airflow[apache-hive]", - "apache-airflow[apache-impala]", - "apache-airflow[apache-kafka]", - "apache-airflow[apache-kylin]", - "apache-airflow[apache-livy]", - "apache-airflow[apache-pig]", - "apache-airflow[apache-pinot]", - "apache-airflow[apache-spark]", - "apache-airflow[apprise]", - "apache-airflow[arangodb]", - "apache-airflow[asana]", - "apache-airflow[atlassian-jira]", - "apache-airflow[celery]", - "apache-airflow[cloudant]", - "apache-airflow[cncf-kubernetes]", - "apache-airflow[cohere]", - "apache-airflow[common-io]", - "apache-airflow[common-sql]", - "apache-airflow[databricks]", - "apache-airflow[datadog]", - "apache-airflow[dbt-cloud]", - "apache-airflow[dingding]", - "apache-airflow[discord]", - "apache-airflow[docker]", - "apache-airflow[elasticsearch]", - "apache-airflow[exasol]", - "apache-airflow[fab]", - "apache-airflow[facebook]", - "apache-airflow[ftp]", - "apache-airflow[github]", - "apache-airflow[google]", - "apache-airflow[grpc]", - "apache-airflow[hashicorp]", - "apache-airflow[http]", - "apache-airflow[imap]", - "apache-airflow[influxdb]", - "apache-airflow[jdbc]", - "apache-airflow[jenkins]", - "apache-airflow[microsoft-azure]", - "apache-airflow[microsoft-mssql]", - "apache-airflow[microsoft-psrp]", - "apache-airflow[microsoft-winrm]", - "apache-airflow[mongo]", - "apache-airflow[mysql]", - "apache-airflow[neo4j]", - "apache-airflow[odbc]", - "apache-airflow[openai]", - "apache-airflow[openfaas]", - "apache-airflow[openlineage]", - "apache-airflow[opensearch]", - "apache-airflow[opsgenie]", - "apache-airflow[oracle]", - "apache-airflow[pagerduty]", - "apache-airflow[papermill]", - "apache-airflow[pgvector]", - "apache-airflow[pinecone]", - "apache-airflow[postgres]", - "apache-airflow[presto]", - "apache-airflow[qdrant]", - "apache-airflow[redis]", - "apache-airflow[salesforce]", - "apache-airflow[samba]", - "apache-airflow[segment]", - "apache-airflow[sendgrid]", - "apache-airflow[sftp]", - "apache-airflow[singularity]", - "apache-airflow[slack]", - "apache-airflow[smtp]", - "apache-airflow[snowflake]", - "apache-airflow[sqlite]", - "apache-airflow[ssh]", - "apache-airflow[tableau]", - "apache-airflow[tabular]", - "apache-airflow[telegram]", - "apache-airflow[teradata]", - "apache-airflow[trino]", - "apache-airflow[vertica]", - "apache-airflow[weaviate]", - "apache-airflow[yandex]", - "apache-airflow[zendesk]", -] -devel-all = [ - "apache-airflow[all]", - "apache-airflow[devel]", - "apache-airflow[doc]", - "apache-airflow[doc-gen]", - "apache-airflow[saml]", - # Apache no provider extras - "apache-airflow[apache-atlas]", - "apache-airflow[apache-webhdfs]", - "apache-airflow[all-core]", - # Include all provider deps - "apache-airflow[airbyte]", - "apache-airflow[alibaba]", - "apache-airflow[amazon]", - "apache-airflow[apache-beam]", - "apache-airflow[apache-cassandra]", - "apache-airflow[apache-drill]", - "apache-airflow[apache-druid]", - "apache-airflow[apache-flink]", - "apache-airflow[apache-hdfs]", - "apache-airflow[apache-hive]", - "apache-airflow[apache-impala]", - "apache-airflow[apache-kafka]", - "apache-airflow[apache-kylin]", - "apache-airflow[apache-livy]", - "apache-airflow[apache-pig]", - "apache-airflow[apache-pinot]", - "apache-airflow[apache-spark]", - "apache-airflow[apprise]", - "apache-airflow[arangodb]", - "apache-airflow[asana]", - "apache-airflow[atlassian-jira]", - "apache-airflow[celery]", - "apache-airflow[cloudant]", - "apache-airflow[cncf-kubernetes]", - "apache-airflow[cohere]", - "apache-airflow[common-io]", - "apache-airflow[common-sql]", - "apache-airflow[databricks]", - "apache-airflow[datadog]", - "apache-airflow[dbt-cloud]", - "apache-airflow[dingding]", - "apache-airflow[discord]", - "apache-airflow[docker]", - "apache-airflow[elasticsearch]", - "apache-airflow[exasol]", - "apache-airflow[fab]", - "apache-airflow[facebook]", - "apache-airflow[ftp]", - "apache-airflow[github]", - "apache-airflow[google]", - "apache-airflow[grpc]", - "apache-airflow[hashicorp]", - "apache-airflow[http]", - "apache-airflow[imap]", - "apache-airflow[influxdb]", - "apache-airflow[jdbc]", - "apache-airflow[jenkins]", - "apache-airflow[microsoft-azure]", - "apache-airflow[microsoft-mssql]", - "apache-airflow[microsoft-psrp]", - "apache-airflow[microsoft-winrm]", - "apache-airflow[mongo]", - "apache-airflow[mysql]", - "apache-airflow[neo4j]", - "apache-airflow[odbc]", - "apache-airflow[openai]", - "apache-airflow[openfaas]", - "apache-airflow[openlineage]", - "apache-airflow[opensearch]", - "apache-airflow[opsgenie]", - "apache-airflow[oracle]", - "apache-airflow[pagerduty]", - "apache-airflow[papermill]", - "apache-airflow[pgvector]", - "apache-airflow[pinecone]", - "apache-airflow[postgres]", - "apache-airflow[presto]", - "apache-airflow[qdrant]", - "apache-airflow[redis]", - "apache-airflow[salesforce]", - "apache-airflow[samba]", - "apache-airflow[segment]", - "apache-airflow[sendgrid]", - "apache-airflow[sftp]", - "apache-airflow[singularity]", - "apache-airflow[slack]", - "apache-airflow[smtp]", - "apache-airflow[snowflake]", - "apache-airflow[sqlite]", - "apache-airflow[ssh]", - "apache-airflow[tableau]", - "apache-airflow[tabular]", - "apache-airflow[telegram]", - "apache-airflow[teradata]", - "apache-airflow[trino]", - "apache-airflow[vertica]", - "apache-airflow[weaviate]", - "apache-airflow[yandex]", - "apache-airflow[zendesk]", -] -# END OF GENERATED DEPENDENCIES -############################################################################################################# -# The rest of the pyproject.toml file should be manually maintained -############################################################################################################# +# aiobotocore, apache-atlas, apache-webhdfs, async, cgroups, deprecated-api, github-enterprise, +# google-auth, graphviz, kerberos, ldap, leveldb, otel, pandas, password, pydantic, rabbitmq, s3fs, +# saml, sentry, statsd, uv, virtualenv +# +# END CORE EXTRAS HERE +# +# The ``devel`` extras are not available in the released packages. They are only available when you install +# Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to +# Airflow. They provide tools such as ``pytest`` and ``mypy`` for general purpose development and testing. +# +# START DEVEL EXTRAS HERE +# +# devel, devel-all-dbs, devel-ci, devel-debuggers, devel-devscripts, devel-duckdb, devel-hadoop, +# devel-mypy, devel-sentry, devel-static-checks, devel-tests +# +# END DEVEL EXTRAS HERE +# +# Those extras are bundles dynamically generated from other extras. +# +# START BUNDLE EXTRAS HERE +# +# all, all-core, all-dbs, devel-all, devel-ci +# +# END BUNDLE EXTRAS HERE +# +# The ``doc`` extras are not available in the released packages. They are only available when you install +# Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to +# Airflow. They provide tools needed when you want to build Airflow documentation (note that you also need +# ``devel`` extras installed for airflow and providers in order to build documentation for airflow and +# provider packages respectively). The ``doc`` package is enough to build regular documentation, where +# ``doc_gen`` is needed to generate ER diagram we have describing our database. +# +# START DOC EXTRAS HERE +# +# doc, doc-gen +# +# END DOC EXTRAS HERE +# +# The `deprecated` extras are deprecated extras from Airflow 1 that will be removed in future versions. +# +# START DEPRECATED EXTRAS HERE +# +# atlas, aws, azure, cassandra, crypto, druid, gcp, gcp-api, hdfs, hive, kubernetes, mssql, pinot, s3, +# spark, webhdfs, winrm +# +# END DEPRECATED EXTRAS HERE +# +# !!!!!! Those provuders are defined in the `airflow/providers//provider.yaml` files !!!!!!! +# +# Those extras are available as regular Airflow extras, they install provider packages in standard builds +# or dependencies that are necessary to enable the feature in editable build. +# START PROVIDER EXTRAS HERE +# +# airbyte, alibaba, amazon, apache.beam, apache.cassandra, apache.drill, apache.druid, apache.flink, +# apache.hdfs, apache.hive, apache.impala, apache.kafka, apache.kylin, apache.livy, apache.pig, +# apache.pinot, apache.spark, apprise, arangodb, asana, atlassian.jira, celery, cloudant, +# cncf.kubernetes, cohere, common.io, common.sql, databricks, datadog, dbt.cloud, dingding, discord, +# docker, elasticsearch, exasol, fab, facebook, ftp, github, google, grpc, hashicorp, http, imap, +# influxdb, jdbc, jenkins, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, +# mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, pagerduty, +# papermill, pgvector, pinecone, postgres, presto, qdrant, redis, salesforce, samba, segment, +# sendgrid, sftp, singularity, slack, smtp, snowflake, sqlite, ssh, tableau, tabular, telegram, +# teradata, trino, vertica, weaviate, yandex, zendesk +# +# END PROVIDER EXTRAS HERE + [project.scripts] airflow = "airflow.__main__:main" [project.urls] @@ -1219,7 +159,7 @@ YouTube = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" python = "3.8" platforms = ["linux", "macos"] description = "Default environment with Python 3.8 for maximum compatibility" -features = ["devel"] +features = [] [tool.hatch.envs.airflow-38] python = "3.8" @@ -1277,7 +217,6 @@ artifacts = [ "/airflow/www/static/dist/", "/airflow/git_version", "/generated/", - "/airflow_pre_installed_providers.txt", ] diff --git a/scripts/ci/pre_commit/common_precommit_utils.py b/scripts/ci/pre_commit/common_precommit_utils.py index 8926bc1823b20..41bc3a5eeaf93 100644 --- a/scripts/ci/pre_commit/common_precommit_utils.py +++ b/scripts/ci/pre_commit/common_precommit_utils.py @@ -73,21 +73,36 @@ def pre_process_files(files: list[str]) -> list[str]: return result -def insert_documentation(file_path: Path, content: list[str], header: str, footer: str): - text = file_path.read_text().splitlines(keepends=True) +def insert_documentation( + file_path: Path, content: list[str], header: str, footer: str, add_comment: bool = False +) -> bool: + found = False + old_content = file_path.read_text() + lines = old_content.splitlines(keepends=True) replacing = False result: list[str] = [] - for line in text: + for line in lines: if line.strip().startswith(header.strip()): replacing = True + found = True result.append(line) - result.extend(content) + if add_comment: + result.extend(["# " + line if line != "\n" else "#\n" for line in content]) + else: + result.extend(content) if line.strip().startswith(footer.strip()): replacing = False if not replacing: result.append(line) - src = "".join(result) - file_path.write_text(src) + new_content = "".join(result) + if not found: + print(f"Header {header} not found in {file_path}") + sys.exit(1) + if new_content != old_content: + file_path.write_text(new_content) + console.print(f"Updated {file_path}") + return True + return False def initialize_breeze_precommit(name: str, file: str): diff --git a/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py b/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py index dbeca287de4cb..6e5c410338ec3 100755 --- a/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py +++ b/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py @@ -28,24 +28,17 @@ from tabulate import tabulate -# tomllib is available in Python 3.11+ and before that tomli offers same interface for parsing TOML files -try: - import tomllib -except ImportError: - import tomli as tomllib - - AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() +COMMON_PRECOMMIT_PATH = Path(__file__).parent.resolve() EXTRA_PACKAGES_REF_FILE = AIRFLOW_ROOT_PATH / "docs" / "apache-airflow" / "extra-packages-ref.rst" PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" -sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported - +sys.path.insert(0, COMMON_PRECOMMIT_PATH.as_posix()) # make sure common_precommit_utils is imported from common_precommit_utils import console -pyproject_toml_content = tomllib.loads(PYPROJECT_TOML_FILE_PATH.read_text()) +sys.path.insert(0, AIRFLOW_ROOT_PATH.as_posix()) # make sure airflow root is imported +from hatch_build import ALL_DYNAMIC_EXTRAS -optional_dependencies: dict[str, list[str]] = pyproject_toml_content["project"]["optional-dependencies"] doc_ref_content = EXTRA_PACKAGES_REF_FILE.read_text() errors: list[str] = [] @@ -55,7 +48,7 @@ suggestions_devel: list[tuple] = [] suggestions_providers: list[tuple] = [] -for dependency in optional_dependencies: +for dependency in ALL_DYNAMIC_EXTRAS: console.print(f"[bright_blue]Checking if {dependency} is mentioned in refs[/]") find_matching = re.search(rf"^\| {dependency} *\|", doc_ref_content, flags=re.MULTILINE) if not find_matching: @@ -95,4 +88,4 @@ console.print(tabulate(suggestions_providers, headers=HEADERS, tablefmt="grid"), markup=False) sys.exit(1) else: - console.print(f"[green]Checked: {len(optional_dependencies)} dependencies are mentioned[/]") + console.print(f"[green]Checked: {len(ALL_DYNAMIC_EXTRAS)} dependencies are mentioned[/]") diff --git a/scripts/ci/pre_commit/pre_commit_check_order_hatch_build.py b/scripts/ci/pre_commit/pre_commit_check_order_hatch_build.py new file mode 100755 index 0000000000000..9208d3331f93e --- /dev/null +++ b/scripts/ci/pre_commit/pre_commit_check_order_hatch_build.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Test for an order of dependencies in setup.py +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +from rich import print + +errors: list[str] = [] + +AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() +HATCH_BUILD_PATH = AIRFLOW_ROOT_PATH / "hatch_build.py" + +sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported +from common_precommit_utils import check_list_sorted + +sys.path.insert(0, str(AIRFLOW_ROOT_PATH)) # make sure airflow root is imported +from hatch_build import ALL_DYNAMIC_EXTRA_DICTS + +if __name__ == "__main__": + file_contents = HATCH_BUILD_PATH.read_text() + + for extra_dict, description in ALL_DYNAMIC_EXTRA_DICTS: + for extra, extra_list in extra_dict.items(): + check_list_sorted(extra_list, f"Order of extra: {description}:{extra}", errors) + print() + for error in errors: + print(error) + + print() + + if errors: + sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py b/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py deleted file mode 100755 index 46fa056c537f5..0000000000000 --- a/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -Test for an order of dependencies in setup.py -""" - -from __future__ import annotations - -import re -import sys -from pathlib import Path - -from rich import print - -errors: list[str] = [] - -AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() -PYPROJECT_TOML_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" - -sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported -from common_precommit_utils import check_list_sorted - - -def check_extras(type: str, extra: str, extras: list[str]) -> None: - r""" - Test for an order of dependencies in extra defined - `^dependent_group_name = [.*?]\n` in setup.py - """ - print(f"[info]Checking {type}:{extra}[/]") - extras = [extra.replace("[", "\\[") for extra in extras] - check_list_sorted(extras, f"Order of extra: {type}:{extra}", errors) - - -def extract_deps(content: str, extra: str) -> list[str]: - deps: list[str] = [] - extracting = False - for line in content.splitlines(): - line = line.strip() - if line.startswith("#"): - continue - if not extracting and line == f"{extra} = [": - extracting = True - elif extracting and line == "]": - break - elif extracting: - deps.append(line.strip().strip(",").strip('"')) - return deps - - -def check_type(pyproject_toml_contents: str, type: str) -> None: - """ - Test for an order of dependencies groups between mark - '# Start dependencies group' and '# End dependencies group' in setup.py - """ - print(f"[info]Checking {type}[/]") - pattern_type = re.compile(f"# START OF {type}\n(.*)# END OF {type}", re.DOTALL) - parsed_type_content = pattern_type.findall(pyproject_toml_contents)[0] - # strip comments - parsed_type_content = ( - "\n".join([line for line in parsed_type_content.splitlines() if not line.startswith("#")]) + "\n" - ) - pattern_extra_name = re.compile(r" = \[.*?]\n", re.DOTALL) - type_content = pattern_extra_name.sub(",", parsed_type_content) - - list_extra_names = type_content.strip(",").split(",") - check_list_sorted(list_extra_names, "Order of dependencies", errors) - for extra in list_extra_names: - deps_list = extract_deps(parsed_type_content, extra) - check_extras(type, extra, deps_list) - - -if __name__ == "__main__": - file_contents = PYPROJECT_TOML_PATH.read_text() - check_type(file_contents, "core extras") - check_type(file_contents, "Apache no provider extras") - check_type(file_contents, "devel extras") - check_type(file_contents, "doc extras") - check_type(file_contents, "bundle extras") - check_type(file_contents, "deprecated extras") - - print() - for error in errors: - print(error) - - print() - - if errors: - sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_insert_extras.py b/scripts/ci/pre_commit/pre_commit_insert_extras.py index d64cd6cd5589a..e32ad199b43e8 100755 --- a/scripts/ci/pre_commit/pre_commit_insert_extras.py +++ b/scripts/ci/pre_commit/pre_commit_insert_extras.py @@ -19,89 +19,79 @@ import sys import textwrap -from enum import Enum from pathlib import Path -# tomllib is available in Python 3.11+ and before that tomli offers same interface for parsing TOML files -try: - import tomllib -except ImportError: - import tomli as tomllib - AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported from common_precommit_utils import insert_documentation - -class ExtraType(Enum): - DEVEL = "DEVEL" - DOC = "DOC" - REGULAR = "REGULAR" +sys.path.insert(0, AIRFLOW_ROOT_PATH.as_posix()) # make sure airflow root is imported +from hatch_build import ( + ALL_DYNAMIC_EXTRA_DICTS, + ALL_GENERATED_BUNDLE_EXTRAS, + BUNDLE_EXTRAS, + PROVIDER_DEPENDENCIES, +) -def get_header_and_footer(extra_type: ExtraType, file_format: str) -> tuple[str, str]: +def get_header_and_footer(extra_type: str, file_format: str) -> tuple[str, str]: if file_format == "rst": - return f" .. START {extra_type.value} EXTRAS HERE", f" .. END {extra_type.value} EXTRAS HERE" + return f" .. START {extra_type.upper()} HERE", f" .. END {extra_type.upper()} HERE" elif file_format == "txt": - return f"# START {extra_type.value} EXTRAS HERE", f"# END {extra_type.value} EXTRAS HERE" + return f"# START {extra_type.upper()} HERE", f"# END {extra_type.upper()} HERE" else: raise Exception(f"Bad format {format} passed. Only rst and txt are supported") -def get_wrapped_list(extras_set: set[str]) -> list[str]: +def get_wrapped_list(extras_set: list[str]) -> list[str]: array = [line + "\n" for line in textwrap.wrap(", ".join(sorted(extras_set)), 100)] array.insert(0, "\n") array.append("\n") return array -def get_extra_types_dict(extras: dict[str, list[str]]) -> dict[ExtraType, tuple[set[str], list[str]]]: +def get_extra_types_dict() -> dict[str, list[str]]: """ Split extras into four types. :return: dictionary of extra types with tuple of two set,list - set of extras and text-wrapped list """ - extra_type_dict: dict[ExtraType, tuple[set[str], list[str]]] = {} - - for extra_type in ExtraType: - extra_type_dict[extra_type] = (set(), []) - - for key, value in extras.items(): - if key.startswith("devel"): - extra_type_dict[ExtraType.DEVEL][0].add(key) - elif key in ["doc", "doc-gen"]: - extra_type_dict[ExtraType.DOC][0].add(key) - else: - extra_type_dict[ExtraType.REGULAR][0].add(key) - - for extra_type in ExtraType: - extra_type_dict[extra_type][1].extend(get_wrapped_list(extra_type_dict[extra_type][0])) - + extra_type_dict: dict[str, list[str]] = {} + + for extra_dict, extra_description in ALL_DYNAMIC_EXTRA_DICTS: + extra_list = sorted(extra_dict) + if extra_dict == BUNDLE_EXTRAS: + extra_list = sorted(extra_list + ALL_GENERATED_BUNDLE_EXTRAS) + extra_type_dict[extra_description] = get_wrapped_list(extra_list) + extra_type_dict["Provider extras"] = get_wrapped_list(PROVIDER_DEPENDENCIES) return extra_type_dict -def get_extras_from_pyproject_toml() -> dict[str, list[str]]: - pyproject_toml_content = tomllib.loads(PYPROJECT_TOML_FILE_PATH.read_text()) - return pyproject_toml_content["project"]["optional-dependencies"] - - -FILES_TO_UPDATE = [ - (AIRFLOW_ROOT_PATH / "INSTALL", "txt"), - (AIRFLOW_ROOT_PATH / "contributing-docs" / "12_airflow_dependencies_and_extras.rst", "rst"), +FILES_TO_UPDATE: list[tuple[Path, str, bool]] = [ + (AIRFLOW_ROOT_PATH / "INSTALL", "txt", False), + (AIRFLOW_ROOT_PATH / "contributing-docs" / "12_airflow_dependencies_and_extras.rst", "rst", False), + (AIRFLOW_ROOT_PATH / "pyproject.toml", "txt", True), ] -def process_documentation_files(): - extra_type_dict = get_extra_types_dict(get_extras_from_pyproject_toml()) - for file, file_format in FILES_TO_UPDATE: +def process_documentation_files() -> bool: + changed = False + extra_type_dict = get_extra_types_dict() + for file, file_format, add_comment in FILES_TO_UPDATE: if not file.exists(): raise Exception(f"File {file} does not exist") - for extra_type in ExtraType: - header, footer = get_header_and_footer(extra_type, file_format) - insert_documentation(file, extra_type_dict[extra_type][1], header, footer) + for extra_type_description, extra_list in extra_type_dict.items(): + header, footer = get_header_and_footer(extra_type_description, file_format) + if insert_documentation( + file, extra_type_dict[extra_type_description], header, footer, add_comment + ): + changed = True + return changed if __name__ == "__main__": - process_documentation_files() + if process_documentation_files(): + print("Some files were updated. Please commit them.") + sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py b/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py index 897fddf2ef061..c97addf50178b 100755 --- a/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py +++ b/scripts/ci/pre_commit/pre_commit_sort_installed_providers.py @@ -45,6 +45,4 @@ def sort_file(path: Path): if __name__ == "__main__": prod_image_installed_providers_path = AIRFLOW_SOURCES / "prod_image_installed_providers.txt" - airflow_pre_installed_providers_path = AIRFLOW_SOURCES / "airflow_pre_installed_providers.txt" sort_file(prod_image_installed_providers_path) - sort_file(airflow_pre_installed_providers_path) diff --git a/scripts/ci/pre_commit/pre_commit_update_build_dependencies.py b/scripts/ci/pre_commit/pre_commit_update_build_dependencies.py index 64c96a90e4777..af0916bbd3ebe 100755 --- a/scripts/ci/pre_commit/pre_commit_update_build_dependencies.py +++ b/scripts/ci/pre_commit/pre_commit_update_build_dependencies.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +import re import shutil import subprocess import sys @@ -26,8 +27,19 @@ AIRFLOW_SOURCES = Path(__file__).parents[3].resolve() PYPROJECT_TOML_FILE = AIRFLOW_SOURCES / "pyproject.toml" +HATCHLING_MATCH = re.compile(r"hatchling==[0-9.]*") + +FILES_TO_REPLACE_HATCHLING_IN = [ + AIRFLOW_SOURCES / ".pre-commit-config.yaml", + AIRFLOW_SOURCES / "clients" / "python" / "pyproject.toml", + AIRFLOW_SOURCES / "docker_tests" / "requirements.txt", +] + +files_changed = False + if __name__ == "__main__": temp_dir = Path(tempfile.mkdtemp()) + hatchling_spec = "" try: subprocess.check_call([sys.executable, "-m", "venv", temp_dir.as_posix()]) venv_python = temp_dir / "bin" / "python" @@ -47,10 +59,25 @@ if dep.startswith("tomli=="): dep = dep + "; python_version < '3.11'" result.append(f' "{dep}",') + if dep.startswith("hatchling=="): + hatchling_spec = dep if skipping and line == "]": skipping = False result.append(line) result.append("") - PYPROJECT_TOML_FILE.write_text("\n".join(result)) + new_pyproject_toml_file_content = "\n".join(result) + if new_pyproject_toml_file_content != pyproject_toml_content: + files_changed = True + PYPROJECT_TOML_FILE.write_text(new_pyproject_toml_file_content) + for file_to_replace_hatchling in FILES_TO_REPLACE_HATCHLING_IN: + old_file_content = file_to_replace_hatchling.read_text() + new_file_content = HATCHLING_MATCH.sub(hatchling_spec, old_file_content, re.MULTILINE) + if new_file_content != old_file_content: + files_changed = True + file_to_replace_hatchling.write_text(new_file_content) finally: shutil.rmtree(temp_dir) + + if files_changed: + print("Some files changed. Please commit the changes.") + sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py index 72d9acb524fad..ca1d36aed2961 100755 --- a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py +++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py @@ -22,7 +22,6 @@ import sys from ast import Import, ImportFrom, NodeVisitor, parse from collections import defaultdict -from enum import Enum from pathlib import Path from typing import Any, List @@ -182,154 +181,6 @@ def check_if_different_provider_used(file_path: Path) -> None: FOUND_EXTRAS: dict[str, list[str]] = defaultdict(list) - -class ParsedDependencyTypes(Enum): - CORE_EXTRAS = "core extras" - APACHE_NO_PROVIDER_EXTRAS = "Apache no provider extras" - DEVEL_EXTRAS = "devel extras" - DOC_EXTRAS = "doc extras" - BUNDLE_EXTRAS = "bundle extras" - DEPRECATED_EXTRAS = "deprecated extras" - MANUAL_EXTRAS = "manual extras" - - -GENERATED_DEPENDENCIES_START = "# START OF GENERATED DEPENDENCIES" -GENERATED_DEPENDENCIES_END = "# END OF GENERATED DEPENDENCIES" - - -def normalize_extra(dependency: str) -> str: - return dependency.replace(".", "-").replace("_", "-") - - -def normalize_package_name(dependency: str) -> str: - return f"apache-airflow-providers-{dependency.replace('.', '-').replace('_', '-')}" - - -def convert_to_extra_dependency(dependency: str) -> str: - # if there is version in dependency - remove it as we do not need it in extra specification - # for editable installation - if ">=" in dependency: - dependency = dependency.split(">=")[0] - extra = dependency.replace("apache-airflow-providers-", "").replace("-", "_").replace(".", "_") - return f"apache-airflow[{extra}]" - - -def generate_dependencies( - result_content: list[str], - dependencies: dict[str, dict[str, list[str] | str]], -): - def generate_parsed_extras(type: ParsedDependencyTypes): - result_content.append(f" # {type.value}") - for extra in FOUND_EXTRAS[type.value]: - result_content.append(f' "apache-airflow[{extra}]",') - - def get_python_exclusion(dependency_info: dict[str, list[str] | str]): - excluded_python_versions = dependency_info.get("excluded-python-versions") - exclusion = "" - if excluded_python_versions: - separator = ";" - for version in excluded_python_versions: - exclusion += f'{separator}python_version != \\"{version}\\"' - separator = " and " - return exclusion - - for dependency, dependency_info in dependencies.items(): - if dependency_info["state"] in ["suspended", "removed"]: - continue - deps = dependency_info["deps"] - deps = [dep for dep in deps if not dep.startswith("apache-airflow>=")] - devel_deps = dependency_info.get("devel-deps") - if not deps and not devel_deps: - result_content.append( - f"{normalize_extra(dependency)} = [] " - f"# source: airflow/providers/{dependency.replace('.', '/')}/provider.yaml" - ) - continue - result_content.append( - f"{normalize_extra(dependency)} = " - f"[ # source: airflow/providers/{dependency.replace('.', '/')}/provider.yaml" - ) - if not isinstance(deps, list): - raise TypeError(f"Wrong type of 'deps' {deps} for {dependency} in {DEPENDENCIES_JSON_FILE_PATH}") - for dep in deps: - if dep.startswith("apache-airflow-providers-"): - dep = convert_to_extra_dependency(dep) - result_content.append(f' "{dep}{get_python_exclusion(dependency_info)}",') - if devel_deps: - result_content.append(f" # Devel dependencies for the {dependency} provider") - for dep in devel_deps: - result_content.append(f' "{dep}{get_python_exclusion(dependency_info)}",') - result_content.append("]") - result_content.append("all = [") - generate_parsed_extras(ParsedDependencyTypes.CORE_EXTRAS) - generate_parsed_extras(ParsedDependencyTypes.APACHE_NO_PROVIDER_EXTRAS) - result_content.append(" # Provider extras") - for dependency, dependency_info in dependencies.items(): - result_content.append(f' "apache-airflow[{normalize_extra(dependency)}]",') - result_content.append("]") - result_content.append("devel-all = [") - result_content.append(' "apache-airflow[all]",') - result_content.append(' "apache-airflow[devel]",') - result_content.append(' "apache-airflow[doc]",') - result_content.append(' "apache-airflow[doc-gen]",') - result_content.append(' "apache-airflow[saml]",') - generate_parsed_extras(ParsedDependencyTypes.APACHE_NO_PROVIDER_EXTRAS) - result_content.append(" # Include all provider deps") - for dependency, dependency_info in dependencies.items(): - result_content.append(f' "apache-airflow[{normalize_extra(dependency)}]",') - result_content.append("]") - - -def get_dependency_type(dependency_type: str) -> ParsedDependencyTypes | None: - for dep_type in ParsedDependencyTypes: - if dep_type.value == dependency_type: - return dep_type - return None - - -def update_pyproject_toml(dependencies: dict[str, dict[str, list[str] | str]]) -> bool: - file_content = PYPROJECT_TOML_FILE_PATH.read_text() - result_content: list[str] = [] - copying = True - current_type: str | None = None - line_count: int = 0 - for line in file_content.splitlines(): - if copying: - result_content.append(line) - if line.strip().startswith(GENERATED_DEPENDENCIES_START): - copying = False - generate_dependencies(result_content, dependencies) - elif line.strip().startswith(GENERATED_DEPENDENCIES_END): - copying = True - result_content.append(line) - elif line.strip().startswith("# START OF "): - current_type = line.strip().replace("# START OF ", "") - type_enum = get_dependency_type(current_type) - if type_enum is None: - console.print( - f"[red]Wrong start of section '{current_type}' in {PYPROJECT_TOML_FILE_PATH} " - f"at line {line_count}: Unknown section type" - ) - sys.exit(1) - elif line.strip().startswith("# END OF "): - end_type = line.strip().replace("# END OF ", "") - if end_type != current_type: - console.print( - f"[red]Wrong end of section {end_type} in {PYPROJECT_TOML_FILE_PATH} at line {line_count}" - ) - sys.exit(1) - if current_type: - if line.strip().endswith(" = ["): - FOUND_EXTRAS[current_type].append(line.split(" = [")[0].strip()) - line_count += 1 - result_content.append("") - new_file_content = "\n".join(result_content) - if file_content != new_file_content: - PYPROJECT_TOML_FILE_PATH.write_text(new_file_content) - return True - return False - - if __name__ == "__main__": find_all_providers_and_provider_files() num_files = len(ALL_PROVIDER_FILES) @@ -395,19 +246,4 @@ def update_pyproject_toml(dependencies: dict[str, dict[str, list[str] | str]]) - ) console.print(f"Written {DEPENDENCIES_JSON_FILE_PATH}") console.print() - if update_pyproject_toml(unique_sorted_dependencies): - if os.environ.get("CI"): - console.print(f"There is a need to regenerate {PYPROJECT_TOML_FILE_PATH}") - console.print( - f"[red]You need to run the following command locally and commit generated " - f"{PYPROJECT_TOML_FILE_PATH.relative_to(AIRFLOW_SOURCES_ROOT)} file:\n" - ) - console.print("breeze static-checks --type update-providers-dependencies --all-files") - console.print() - console.print() - console.print("[yellow]Make sure to rebase your changes on the latest main branch!") - console.print() - sys.exit(1) - else: - console.print(f"Written {PYPROJECT_TOML_FILE_PATH}") console.print() diff --git a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh index 8158ab5886aa2..41389ccda6675 100644 --- a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh +++ b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh @@ -45,13 +45,17 @@ function install_airflow_dependencies_from_branch_tip() { if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} fi + local TEMP_AIRFLOW_DIR + TEMP_AIRFLOW_DIR=$(mktemp -d) # Install latest set of dependencies - without constraints. This is to download a "base" set of # dependencies that we can cache and reuse when installing airflow using constraints and latest # pyproject.toml in the next step (when we install regular airflow). set -x - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} \ - ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - "apache-airflow[${AIRFLOW_EXTRAS}] @ https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" + curl -fsSL "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" | \ + tar xvz -C "${TEMP_AIRFLOW_DIR}" --strip 1 + # Make sure editable dependencies are calculated when devel-ci dependencies are installed + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} \ + --editable "${TEMP_AIRFLOW_DIR}[${AIRFLOW_EXTRAS}]" set +x common::install_packaging_tools set -x @@ -67,6 +71,7 @@ function install_airflow_dependencies_from_branch_tip() { set +x ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow set -x + rm -rvf "${TEMP_AIRFLOW_DIR}" # If you want to make sure dependency is removed from cache in your PR when you removed it from # pyproject.toml - please add your dependency here as a list of strings # for example: