Initial commit
This commit is contained in:
parent
d4028fca11
commit
a578239c4f
32 changed files with 2559 additions and 0 deletions
80
Demo/stackit-12-schedule-on-gpu-node.py
Normal file
80
Demo/stackit-12-schedule-on-gpu-node.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
import json
|
||||
|
||||
import pendulum
|
||||
|
||||
from airflow.sdk import dag, task
|
||||
from airflow.providers.cncf.kubernetes.operators.pod import (
|
||||
KubernetesPodOperator,
|
||||
)
|
||||
|
||||
from stackit_workflows.kubernetes import POD_NAMESPACE, K8S
|
||||
|
||||
from stackit_workflows.airflow_plugin.operators import STACKITSparkScriptOperator
|
||||
from stackit_workflows.models import GpuConfig, MLFramework
|
||||
|
||||
@dag(
|
||||
schedule=None,
|
||||
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
|
||||
catchup=False,
|
||||
tags=["demo","stackit-demo"],
|
||||
dag_id="stackit_12_on_gpu_node",
|
||||
)
|
||||
def kubernetes_operator():
|
||||
|
||||
gpu_task = KubernetesPodOperator(
|
||||
name="gpu-task",
|
||||
#image=<use spark image with GPU support>,
|
||||
image="debian",
|
||||
cmds=["echo"],
|
||||
arguments=["This task is running in a gpu node"],
|
||||
task_id="run_task_on_gpu",
|
||||
|
||||
# Use affinity parameter to schedule pods on dedicated gpu nodes. In this example the node we want to
|
||||
# schedule pods on is labelled dedicated=gpu. In this case the pod is REQUIRED to be scheduled on the
|
||||
# specified node, but it can also be a more relaxed criteria where the schedule is PREFERRED (
|
||||
# preferred_during_scheduling_ignored_during_execution).
|
||||
affinity=K8S.V1Affinity(
|
||||
node_affinity=K8S.V1NodeAffinity(
|
||||
required_during_scheduling_ignored_during_execution=K8S.V1NodeSelector(
|
||||
node_selector_terms=[
|
||||
K8S.V1NodeSelectorTerm(
|
||||
match_expressions=[
|
||||
K8S.V1NodeSelectorRequirement(
|
||||
key='dedicated',
|
||||
operator='In',
|
||||
values=['gpu']
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
)
|
||||
),
|
||||
# When nodes we want to schedule pods on have taints, we need to add the respective tolerations. In this
|
||||
# example the gpu node has the taint dedicated=gpu:NoSchedule. You can pass a list of such tolerations that
|
||||
# enables the pod to be scheduled on these tainted nodes dedicated for specific workload.
|
||||
tolerations=[
|
||||
K8S.V1Toleration(
|
||||
key='dedicated',
|
||||
operator='Equal',
|
||||
value='gpu',
|
||||
effect='NoSchedule'
|
||||
)
|
||||
],
|
||||
# All pods launched must run as non-root users. Otherwise they won't start.
|
||||
security_context=K8S.V1PodSecurityContext(run_as_user=100),
|
||||
|
||||
)
|
||||
|
||||
gpu_flag_test = STACKITSparkScriptOperator(
|
||||
task_id="gpu_flag",
|
||||
gpu=GpuConfig(ml_framework=MLFramework.PYTORCH),
|
||||
script="Demo/scripts/basic_python_script.py",
|
||||
image="python:3.12-slim-bullseye",
|
||||
security_context={"runAsUser": 1000},
|
||||
)
|
||||
|
||||
gpu_task
|
||||
gpu_flag_test
|
||||
|
||||
kubernetes_operator()
|
||||
Loading…
Add table
Add a link
Reference in a new issue