Initial commit
This commit is contained in:
parent
d4028fca11
commit
a578239c4f
32 changed files with 2559 additions and 0 deletions
141
Demo/stackit-14-stackit-operators.py
Normal file
141
Demo/stackit-14-stackit-operators.py
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
import pendulum
|
||||
|
||||
from airflow.sdk import dag
|
||||
from stackit_workflows.airflow_plugin.operators import STACKITSparkScriptOperator, STACKITPythonScriptOperator
|
||||
from stackit_workflows.airflow_plugin.decorators import stackit
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# DEMO DAG: How to use the STACKIT Operators
|
||||
# -------------------------------------------------------------------------
|
||||
# This DAG demonstrates the different ways to run workloads on Kubernetes
|
||||
# using the STACKIT operator suite. It covers three main scenarios:
|
||||
#
|
||||
# 1. Running Python code directly with the @stackit decorator
|
||||
# 2. Running Spark workloads via the @stackit Spark decorator
|
||||
# 3. Running standalone Python or Spark scripts with the ScriptOperators
|
||||
#
|
||||
# Each operator abstracts away boilerplate (like syncing the repo,
|
||||
# configuring the namespace, injecting context, mounting secrets, etc.),
|
||||
# so you can focus only on your workload.
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
# It is good practice (but not required) to specify the image you want to run on.
|
||||
# By default, the STACKIT Spark image contains Spark, Java, Python, and common libs.
|
||||
default_kwargs = {
|
||||
"image": "schwarzit-xx-sit-dp-customer-artifactory-docker-local.jfrog.io/stackit-spark:spark3.5.3-0.1.2"
|
||||
}
|
||||
|
||||
|
||||
@dag(
|
||||
schedule=None,
|
||||
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
|
||||
catchup=False,
|
||||
tags=["demo","stackit-demo"],
|
||||
dag_id="stackit_14_stackit_operators",
|
||||
)
|
||||
def stackit_operators():
|
||||
# ---------------------------------------------------------------------
|
||||
# 1) PYTHON DECORATOR
|
||||
# ---------------------------------------------------------------------
|
||||
# The @stackit.python_kubernetes_task decorator is the simplest way
|
||||
# to run Python code in Kubernetes.
|
||||
#
|
||||
# Features provided out-of-the-box:
|
||||
# - Uses a prebuilt Python image with pandas, requests, etc.
|
||||
# - Injects Airflow context (dag_id, run_id, task_id, etc.) as ENV vars
|
||||
# - Git-syncs your DAG repo into the pod (so imports just work)
|
||||
# - Runs in the correct namespace with STACKIT defaults
|
||||
#
|
||||
# This is the preferred way if you want to write Python directly inside
|
||||
# your DAG file without worrying about container setup.
|
||||
@stackit.python_kubernetes_task()
|
||||
def stackit_decorated_python_kubernetes():
|
||||
import os
|
||||
|
||||
# Example: import a helper module from the repo
|
||||
from scripts.my_tools.say_hello import say_hello
|
||||
say_hello()
|
||||
|
||||
# Show how Airflow context is injected as environment variables
|
||||
task_id = os.environ["AIRFLOW__CONTEXT__TASK__TASK_ID"]
|
||||
print(f"Task ID: {task_id}")
|
||||
|
||||
# Print all relevant injected ENV variables
|
||||
env_vars = {
|
||||
key: value
|
||||
for key, value in os.environ.items()
|
||||
if key.startswith("AIRFLOW__CONTEXT") or key.startswith("STACKIT__")
|
||||
}
|
||||
for key, value in sorted(env_vars.items()):
|
||||
print(f"{key}: {value}")
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# 2) SPARK DECORATOR
|
||||
# ---------------------------------------------------------------------
|
||||
# The @stackit.spark_kubernetes_task decorator makes it easy to run
|
||||
# Spark jobs from Airflow, without setting up Spark-on-K8s manually.
|
||||
#
|
||||
# Features:
|
||||
# - Starts a single-node Spark cluster by default
|
||||
# - Includes the stackit_spark helper library for easy SparkSession setup
|
||||
# - Allows you to scale CPU/memory via parameters
|
||||
#
|
||||
# This is the preferred way to run Spark transformations as functions.
|
||||
@stackit.spark_kubernetes_task(**default_kwargs)
|
||||
def stackit_decorated_spark_kubernetes(random_number: int):
|
||||
import stackit_spark
|
||||
import pandas as pd
|
||||
|
||||
spark = stackit_spark.get_spark()
|
||||
df = spark.createDataFrame(pd.DataFrame({"random_number": [random_number]}))
|
||||
df.show()
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# 3) PYTHON SCRIPT OPERATOR
|
||||
# ---------------------------------------------------------------------
|
||||
# If you already have a Python script in your repository (instead of
|
||||
# embedding code directly in the DAG), use STACKITPythonScriptOperator.
|
||||
#
|
||||
# Features:
|
||||
# - Executes a .py file from your DAG repo
|
||||
# - Still benefits from repo sync and injected Airflow context
|
||||
# - Works with any image that has Python installed
|
||||
#
|
||||
# Note: Unlike the Spark operator, this does not provide the
|
||||
# stackit_spark helper module, unless your image includes it.
|
||||
stackit_python_kubernetes = STACKITPythonScriptOperator(
|
||||
task_id="stackit_python_kubernetes",
|
||||
script="Demo/scripts/basic_python_script.py", # relative to repo root
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# 4) SPARK SCRIPT OPERATOR
|
||||
# ---------------------------------------------------------------------
|
||||
# Use STACKITSparkScriptOperator if you have Spark jobs stored as
|
||||
# standalone scripts (in this repo or in another git repo).
|
||||
#
|
||||
# Features:
|
||||
# - Executes a .py Spark job script
|
||||
# - Supports imports inside the script (thanks to git-sync)
|
||||
# - Runs on the specified Spark image with configurable resources
|
||||
#
|
||||
# For cross-repo jobs, you can set `git_repo`, `git_ref`, etc.
|
||||
stackit_spark_kubernetes = STACKITSparkScriptOperator(
|
||||
task_id="stackit_spark_kubernetes",
|
||||
script="Demo/scripts/my_spark_job_with_imports.py", # relative to repo root
|
||||
cpu=2,
|
||||
**default_kwargs,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# DAG FLOW
|
||||
# ---------------------------------------------------------------------
|
||||
# First run some inline Python, then a Spark function,
|
||||
# then trigger the script-based operators.
|
||||
stackit_decorated_python_kubernetes() >> stackit_decorated_spark_kubernetes(12) >> [
|
||||
stackit_python_kubernetes,
|
||||
stackit_spark_kubernetes,
|
||||
]
|
||||
|
||||
|
||||
stackit_operators()
|
||||
Loading…
Add table
Add a link
Reference in a new issue