import pendulum from airflow.sdk import dag from stackit_workflows.airflow_plugin.decorators import stackit default_kwargs = { "image": "schwarzit-xx-sit-dp-customer-artifactory-docker-local.jfrog.io/stackit-spark:spark3.5.3-0.1.2" } @dag( schedule=None, start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, tags=["demo","stackit-demo"], dag_id="stackit_07_extra_packages", ) def packages(): # In the stackit provided images, the python environment is owned by the runtime user. # Thus, packages can be installed on the fly using `pip`, `conda`, or `mamba`. # We recommend using `mamba` whenever possible, as mamba ships optimized binaries and # resolves dependencies instead of just warning about incompatibilities. # This process takes compute resources every time the image is started. If you need # to run this task frequently, we recommend to build a custom image with the required packages. @stackit.spark_kubernetes_task(**default_kwargs) def tell_jokes(): import subprocess import sys subprocess.check_call([sys.executable, "-m", "pip", "install", "Joking"]) import Joking print(Joking.random_joke()) tell_jokes() packages()