import json import pendulum from airflow.sdk import dag, task from airflow.providers.cncf.kubernetes.operators.pod import ( KubernetesPodOperator, ) from stackit_workflows.kubernetes import POD_NAMESPACE, K8S from stackit_workflows.airflow_plugin.decorators import stackit @dag( schedule=None, start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, tags=["demo","stackit-demo"], dag_id="stackit_03_kubernetes_operator", ) def kubernetes_operator(): # You may use the KubernetesPodOperator to launch any image you want! # You may even launch additional pods from your pod - the default serviceAccount # has permissions to mange pods, configmaps and services of type ClusterIP. # Not all fields may be set freely on the pod. There is an admission controller in place # that modifies taints and other fields. Pods run in a dedicated namespace. k = KubernetesPodOperator( name="hello-dry-run", image="debian", #image="unknown_image", cmds=["bash", "-c"], arguments=["echo hello world"], labels={"foo": "bar"}, task_id="dry_run_demo", log_events_on_failure=True, do_xcom_push=True, get_logs=True, #Container resources requests and limits. Mandatory to set container_resources=K8S.V1ResourceRequirements( requests={"cpu": "100m", "memory": "50Mi"}, #optional: "ephemeral-storage": "1Gi" limits={"cpu": "200m", "memory": "100Mi"}, #optional "ephemeral-storage": "2Gi" ), # All pods launched must run as non-root users. Otherwise they won't start. security_context=K8S.V1PodSecurityContext(run_as_user=100), ) # The stackit python kubernetes decorator is an extension of the kubernetes decorator. # It does: # - Specify a default up-to-date data transformation image with python, pandas and other libraries pre-installed # - Set the namespace correctly # - Provide the Airflow "context" as environment variables # - Synchronizes this repository into the launched pod so that you can use imports @stackit.python_kubernetes_task() def stackit_python_kubernetes(): import os # This is not possible with the KubernetesPodOperator unless you # clone the DAG repo yourself! from scripts.my_tools.say_hello import say_hello say_hello() # We provide the most common Airflow context variables as environment variables task_id = os.environ["AIRFLOW__CONTEXT__TASK__TASK_ID"] print(f"Task ID: {task_id}") # Lets print all available environment variables env_vars = { key: value for key, value in os.environ.items() if key.startswith("AIRFLOW__CONTEXT") or key.startswith("STACKIT__") } for key, value in sorted(env_vars.items()): print(f"{key}: {value}") stackit_python_kubernetes() kubernetes_operator()