Initial commit

2026-05-28 17:44:11 +02:00 · 2026-05-28 17:44:11 +02:00 · a578239c4f
commit a578239c4f
parent d4028fca11
32 changed files with 2559 additions and 0 deletions
--- a/Demo/stackit-14-stackit-operators.py
+++ b/Demo/stackit-14-stackit-operators.py
@ -0,0 +1,141 @@
+import pendulum
+
+from airflow.sdk import dag
+from stackit_workflows.airflow_plugin.operators import STACKITSparkScriptOperator, STACKITPythonScriptOperator
+from stackit_workflows.airflow_plugin.decorators import stackit
+
+# -------------------------------------------------------------------------
+# DEMO DAG: How to use the STACKIT Operators
+# -------------------------------------------------------------------------
+# This DAG demonstrates the different ways to run workloads on Kubernetes
+# using the STACKIT operator suite. It covers three main scenarios:
+#
+# 1. Running Python code directly with the @stackit decorator
+# 2. Running Spark workloads via the @stackit Spark decorator
+# 3. Running standalone Python or Spark scripts with the ScriptOperators
+#
+# Each operator abstracts away boilerplate (like syncing the repo,
+# configuring the namespace, injecting context, mounting secrets, etc.),
+# so you can focus only on your workload.
+# -------------------------------------------------------------------------
+
+# It is good practice (but not required) to specify the image you want to run on.
+# By default, the STACKIT Spark image contains Spark, Java, Python, and common libs.
+default_kwargs = {
+    "image": "schwarzit-xx-sit-dp-customer-artifactory-docker-local.jfrog.io/stackit-spark:spark3.5.3-0.1.2"
+}
+
+
+@dag(
+    schedule=None,
+    start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
+    catchup=False,
+    tags=["demo","stackit-demo"],
+    dag_id="stackit_14_stackit_operators",
+)
+def stackit_operators():
+    # ---------------------------------------------------------------------
+    # 1) PYTHON DECORATOR
+    # ---------------------------------------------------------------------
+    # The @stackit.python_kubernetes_task decorator is the simplest way
+    # to run Python code in Kubernetes.
+    #
+    # Features provided out-of-the-box:
+    # - Uses a prebuilt Python image with pandas, requests, etc.
+    # - Injects Airflow context (dag_id, run_id, task_id, etc.) as ENV vars
+    # - Git-syncs your DAG repo into the pod (so imports just work)
+    # - Runs in the correct namespace with STACKIT defaults
+    #
+    # This is the preferred way if you want to write Python directly inside
+    # your DAG file without worrying about container setup.
+    @stackit.python_kubernetes_task()
+    def stackit_decorated_python_kubernetes():
+        import os
+
+        # Example: import a helper module from the repo
+        from scripts.my_tools.say_hello import say_hello
+        say_hello()
+
+        # Show how Airflow context is injected as environment variables
+        task_id = os.environ["AIRFLOW__CONTEXT__TASK__TASK_ID"]
+        print(f"Task ID: {task_id}")
+
+        # Print all relevant injected ENV variables
+        env_vars = {
+            key: value
+            for key, value in os.environ.items()
+            if key.startswith("AIRFLOW__CONTEXT") or key.startswith("STACKIT__")
+        }
+        for key, value in sorted(env_vars.items()):
+            print(f"{key}: {value}")
+
+    # ---------------------------------------------------------------------
+    # 2) SPARK DECORATOR
+    # ---------------------------------------------------------------------
+    # The @stackit.spark_kubernetes_task decorator makes it easy to run
+    # Spark jobs from Airflow, without setting up Spark-on-K8s manually.
+    #
+    # Features:
+    # - Starts a single-node Spark cluster by default
+    # - Includes the stackit_spark helper library for easy SparkSession setup
+    # - Allows you to scale CPU/memory via parameters
+    #
+    # This is the preferred way to run Spark transformations as functions.
+    @stackit.spark_kubernetes_task(**default_kwargs)
+    def stackit_decorated_spark_kubernetes(random_number: int):
+        import stackit_spark
+        import pandas as pd
+
+        spark = stackit_spark.get_spark()
+        df = spark.createDataFrame(pd.DataFrame({"random_number": [random_number]}))
+        df.show()
+
+    # ---------------------------------------------------------------------
+    # 3) PYTHON SCRIPT OPERATOR
+    # ---------------------------------------------------------------------
+    # If you already have a Python script in your repository (instead of
+    # embedding code directly in the DAG), use STACKITPythonScriptOperator.
+    #
+    # Features:
+    # - Executes a .py file from your DAG repo
+    # - Still benefits from repo sync and injected Airflow context
+    # - Works with any image that has Python installed
+    #
+    # Note: Unlike the Spark operator, this does not provide the
+    # stackit_spark helper module, unless your image includes it.
+    stackit_python_kubernetes = STACKITPythonScriptOperator(
+        task_id="stackit_python_kubernetes",
+        script="Demo/scripts/basic_python_script.py",  # relative to repo root
+    )
+
+    # ---------------------------------------------------------------------
+    # 4) SPARK SCRIPT OPERATOR
+    # ---------------------------------------------------------------------
+    # Use STACKITSparkScriptOperator if you have Spark jobs stored as
+    # standalone scripts (in this repo or in another git repo).
+    #
+    # Features:
+    # - Executes a .py Spark job script
+    # - Supports imports inside the script (thanks to git-sync)
+    # - Runs on the specified Spark image with configurable resources
+    #
+    # For cross-repo jobs, you can set `git_repo`, `git_ref`, etc.
+    stackit_spark_kubernetes = STACKITSparkScriptOperator(
+        task_id="stackit_spark_kubernetes",
+        script="Demo/scripts/my_spark_job_with_imports.py",  # relative to repo root
+        cpu=2,
+        **default_kwargs,
+    )
+
+    # ---------------------------------------------------------------------
+    # DAG FLOW
+    # ---------------------------------------------------------------------
+    # First run some inline Python, then a Spark function,
+    # then trigger the script-based operators.
+    stackit_decorated_python_kubernetes() >> stackit_decorated_spark_kubernetes(12) >> [
+        stackit_python_kubernetes,
+        stackit_spark_kubernetes,
+    ]
+
+
+stackit_operators()