Skip to content
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure... how did you make this? The diff view in my browser shows a full diff like all deleted and re-created. Can you please rebase to have a diff only?

Original file line number Diff line number Diff line change
@@ -1,142 +1,187 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Example DAG demonstrating the usage of the branching TaskFlow API decorators.

It shows how to use standard Python ``@task.branch`` as well as the external Python
version ``@task.branch_external_python`` which calls an external Python interpreter and
the ``@task.branch_virtualenv`` which builds a temporary Python virtual environment.
"""

from __future__ import annotations

import random
import sys
import tempfile

import pendulum

from airflow.providers.common.compat.sdk import TriggerRule
from airflow.providers.standard.operators.empty import EmptyOperator
from airflow.sdk import DAG, Label, task

PATH_TO_PYTHON_BINARY = sys.executable

with DAG(
dag_id="example_branch_python_operator_decorator",
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
catchup=False,
schedule="@daily",
tags=["example", "example2"],
) as dag:
run_this_first = EmptyOperator(task_id="run_this_first")

options = ["a", "b", "c", "d"]

# Example branching on standard Python tasks

# [START howto_operator_branch_python]
@task.branch()
def branching(choices: list[str]) -> str:
return f"branch_{random.choice(choices)}"

# [END howto_operator_branch_python]

random_choice_instance = branching(choices=options)

run_this_first >> random_choice_instance

join = EmptyOperator(task_id="join", trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)

for option in options:

@task(task_id=f"branch_{option}")
def some_task():
print("doing something in Python")

t = some_task()
empty = EmptyOperator(task_id=f"follow_{option}")

# Label is optional here, but it can help identify more complex branches
random_choice_instance >> Label(option) >> t >> empty >> join

# Example the same with external Python calls

# [START howto_operator_branch_ext_py]
@task.branch_external_python(python=PATH_TO_PYTHON_BINARY)
def branching_ext_python(choices) -> str:
import random

return f"ext_py_{random.choice(choices)}"

# [END howto_operator_branch_ext_py]

random_choice_ext_py = branching_ext_python(choices=options)

join >> random_choice_ext_py

join_ext_py = EmptyOperator(task_id="join_ext_py", trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)

for option in options:

@task.external_python(task_id=f"ext_py_{option}", python=PATH_TO_PYTHON_BINARY)
def some_ext_py_task():
print("doing something in external Python")

t = some_ext_py_task()

# Label is optional here, but it can help identify more complex branches
random_choice_ext_py >> Label(option) >> t >> join_ext_py

# Example the same with Python virtual environments

# [START howto_operator_branch_virtualenv]
# Note: Passing a caching dir allows to keep the virtual environment over multiple runs
# Run the example a second time and see that it reuses it and is faster.
VENV_CACHE_PATH = tempfile.gettempdir()

@task.branch_virtualenv(requirements=["numpy~=1.26.0"], venv_cache_path=VENV_CACHE_PATH)
def branching_virtualenv(choices) -> str:
import random

import numpy as np

print(f"Some numpy stuff: {np.arange(6)}")
return f"venv_{random.choice(choices)}"

# [END howto_operator_branch_virtualenv]

random_choice_venv = branching_virtualenv(choices=options)

join_ext_py >> random_choice_venv

join_venv = EmptyOperator(task_id="join_venv", trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)

for option in options:

@task.virtualenv(
task_id=f"venv_{option}", requirements=["numpy~=1.26.0"], venv_cache_path=VENV_CACHE_PATH
)
def some_venv_task():
import numpy as np

print(f"Some numpy stuff: {np.arange(6)}")

t = some_venv_task()

# Label is optional here, but it can help identify more complex branches
random_choice_venv >> Label(option) >> t >> join_venv
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Example DAG demonstrating the usage of branching TaskFlow API decorators.

This example shows how to use standard Python `@task.branch`, as well as
`@task.branch_external_python` and `@task.branch_virtualenv` for branching
logic executed in external Python interpreters or isolated virtual
environments.
"""

from __future__ import annotations

import random
import sys
import tempfile

import pendulum

from airflow.providers.common.compat.sdk import TriggerRule
from airflow.providers.standard.operators.empty import EmptyOperator
from airflow.sdk import DAG, Label, task

PATH_TO_PYTHON_BINARY = sys.executable


with DAG(
dag_id="example_branch_python_operator_decorator",
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
catchup=False,
schedule="@daily",
tags=["example", "example2"],
doc_md="""
### Branch Decorator: Runtime Path Selection

Branching enables conditional execution paths within a DAG by selecting
which downstream task(s) should run at runtime, while all other paths
are marked as skipped. This allows mutually exclusive workflows to be
expressed cleanly within a single DAG definition.

**How branching selects execution paths:**
- A branch task returns the `task_id` (or list of `task_id`s) corresponding
to the next task(s) that should execute
- Only the returned downstream task(s) are executed; all other immediate
downstream tasks are marked as skipped
- Skipped branches do not fail the DAG run and are treated as a normal
execution outcome

**Handling skipped branches downstream:**
- Tasks that follow a branching point must use trigger rules that account
for skipped upstream tasks (for example, `NONE_FAILED_MIN_ONE_SUCCESS`)
- Without appropriate trigger rules, downstream tasks may not execute
as expected due to skipped upstream states
- This behavior differs from short-circuiting, where all downstream
execution may be prevented entirely

**Common use cases:**
- Conditional data processing based on runtime characteristics
(for example, small vs. large datasets)
- Environment-driven workflows where different paths are selected
dynamically
- Optional enrichment or validation steps that should only run when needed
- Mutually exclusive downstream actions within a single DAG

**Branching vs. Python if/else:**
Branching is not equivalent to a Python `if/else` statement. All possible
branches exist in the DAG graph at parse time, and the branch task selects
which path is taken during execution.

📖 **Related documentation**
https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/dags.html#branching
""",
) as dag:
run_this_first = EmptyOperator(task_id="run_this_first")

options = ["a", "b", "c", "d"]

# Example branching with standard Python tasks

# [START howto_operator_branch_python]
@task.branch()
def branching(choices: list[str]) -> str:
return f"branch_{random.choice(choices)}"

# [END howto_operator_branch_python]

random_choice_instance = branching(choices=options)

run_this_first >> random_choice_instance

join = EmptyOperator(task_id="join", trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)

for option in options:

@task(task_id=f"branch_{option}")
def some_task():
print("doing something in Python")

t = some_task()
empty = EmptyOperator(task_id=f"follow_{option}")

# Label is optional here, but it can help identify more complex branches
random_choice_instance >> Label(option) >> t >> empty >> join

# Example branching with external Python execution

# [START howto_operator_branch_ext_py]
@task.branch_external_python(python=PATH_TO_PYTHON_BINARY)
def branching_ext_python(choices) -> str:
import random

return f"ext_py_{random.choice(choices)}"

# [END howto_operator_branch_ext_py]

random_choice_ext_py = branching_ext_python(choices=options)

join >> random_choice_ext_py

join_ext_py = EmptyOperator(task_id="join_ext_py", trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)

for option in options:

@task.external_python(task_id=f"ext_py_{option}", python=PATH_TO_PYTHON_BINARY)
def some_ext_py_task():
print("doing something in external Python")

t = some_ext_py_task()

# Label is optional here, but it can help identify more complex branches
random_choice_ext_py >> Label(option) >> t >> join_ext_py

# Example branching with Python virtual environments

# [START howto_operator_branch_virtualenv]
# Passing a cache directory allows the virtual environment to be reused
# across runs, reducing setup overhead on subsequent executions.
VENV_CACHE_PATH = tempfile.gettempdir()

@task.branch_virtualenv(requirements=["numpy~=1.26.0"], venv_cache_path=VENV_CACHE_PATH)
def branching_virtualenv(choices) -> str:
import random

import numpy as np

print(f"Some numpy stuff: {np.arange(6)}")
return f"venv_{random.choice(choices)}"

# [END howto_operator_branch_virtualenv]

random_choice_venv = branching_virtualenv(choices=options)

join_ext_py >> random_choice_venv

join_venv = EmptyOperator(task_id="join_venv", trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)

for option in options:

@task.virtualenv(
task_id=f"venv_{option}",
requirements=["numpy~=1.26.0"],
venv_cache_path=VENV_CACHE_PATH,
)
def some_venv_task():
import numpy as np

print(f"Some numpy stuff: {np.arange(6)}")

t = some_venv_task()

# Label is optional here, but it can help identify more complex branches
random_choice_venv >> Label(option) >> t >> join_venv
Loading
Loading