Initial commit
This commit is contained in:
parent
d4028fca11
commit
a578239c4f
32 changed files with 2559 additions and 0 deletions
8
Demo/scripts/basic_python_script.py
Normal file
8
Demo/scripts/basic_python_script.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
# Print current date
|
||||
import datetime
|
||||
|
||||
# You can libraries baked into your own Docker image here!
|
||||
# import awesome_lib
|
||||
|
||||
print("Hello from basic_python_script.py")
|
||||
print(f"The current date is {datetime.datetime.now()}")
|
||||
42
Demo/scripts/my_dremio_query.py
Normal file
42
Demo/scripts/my_dremio_query.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
"""
|
||||
Copyright (C) 2017-2021 Dremio Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
from dremio.arguments.parse import options_default_validator
|
||||
from dremio.flight.endpoint import DremioFlightEndpoint
|
||||
|
||||
def execute_query(dremio_host: str, dremio_port: int, username: str, token: str, query: str):
|
||||
|
||||
args = {
|
||||
'hostname': dremio_host,
|
||||
'port': dremio_port,
|
||||
'tls': True,
|
||||
'username': username,
|
||||
'token': token,
|
||||
'query': query
|
||||
}
|
||||
|
||||
config = options_default_validator["default"] | args
|
||||
|
||||
# Instantiate DremioFlightEndpoint object
|
||||
dremio_flight_endpoint = DremioFlightEndpoint(config)
|
||||
|
||||
# Connect to Dremio Arrow Flight server endpoint.
|
||||
flight_client = dremio_flight_endpoint.connect()
|
||||
|
||||
# Get reader
|
||||
reader = dremio_flight_endpoint.get_reader(flight_client)
|
||||
|
||||
# Print out the data as a dataframe
|
||||
print(reader.read_pandas())
|
||||
9
Demo/scripts/my_spark_job.py
Normal file
9
Demo/scripts/my_spark_job.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
import stackit_spark
|
||||
import time
|
||||
import pandas as pd
|
||||
|
||||
spark = stackit_spark.get_spark()
|
||||
data = pd.DataFrame({"number": [10]})
|
||||
df = spark.createDataFrame(data)
|
||||
time.sleep(30)
|
||||
df.show()
|
||||
10
Demo/scripts/my_spark_job_with_imports.py
Normal file
10
Demo/scripts/my_spark_job_with_imports.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
import stackit_spark
|
||||
import pandas as pd
|
||||
from my_tools.say_hello import say_hello
|
||||
|
||||
say_hello()
|
||||
|
||||
spark = stackit_spark.get_spark()
|
||||
data = pd.DataFrame({"number": [10]})
|
||||
df = spark.createDataFrame(data)
|
||||
df.show()
|
||||
72
Demo/scripts/my_spark_notebook.ipynb
Normal file
72
Demo/scripts/my_spark_notebook.ipynb
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from stackit_spark import get_spark\n",
|
||||
"\n",
|
||||
"from my_tools.catalog_spark import get_nessie_token\n",
|
||||
"\n",
|
||||
"if \"STACKIT__PAPERMILL\" in os.environ:\n",
|
||||
" # Create Spark Session with Iceberg Rest Credentials for Dremio Enterprise Catalog\n",
|
||||
" catalog_name_in_spark = \"stackit\"\n",
|
||||
" spark = get_spark()\n",
|
||||
" spark.sql(f\"USE {catalog_name_in_spark}\")\n",
|
||||
"\n",
|
||||
" sdf = spark.sql(f\"SELECT * FROM DEMO.user\")\n",
|
||||
" sdf.show()\n",
|
||||
" \n",
|
||||
"else:\n",
|
||||
" tokenendpoint = os.environ[\"TOKEN_ENDPOINT\"]\n",
|
||||
" catalogendpoint = os.environ[\"CATALOG_ENDPOINT\"]\n",
|
||||
" password = os.environ[\"DREMIO_PAT\"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" nessie_token = get_nessie_token(tokenendpoint, password)\n",
|
||||
"\n",
|
||||
" # Create Spark Session with Iceberg Rest Credentials for Dremio Enterprise Catalog\n",
|
||||
" catalog_name_in_spark = \"stackit\"\n",
|
||||
" spark = get_spark(\n",
|
||||
" additional_config={\n",
|
||||
" \"spark.jars.packages\": \"org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.6.1,org.apache.iceberg:iceberg-aws-bundle:1.6.1\",\n",
|
||||
" f\"spark.sql.catalog.{catalog_name_in_spark}\": \"org.apache.iceberg.spark.SparkCatalog\",\n",
|
||||
" f\"spark.sql.catalog.{catalog_name_in_spark}.type\": \"rest\",\n",
|
||||
" f\"spark.sql.catalog.{catalog_name_in_spark}.warehouse\": \"catalog-s3\",\n",
|
||||
" f\"spark.sql.catalog.{catalog_name_in_spark}.uri\": catalogendpoint,\n",
|
||||
" f\"spark.sql.catalog.{catalog_name_in_spark}.token\": nessie_token,\n",
|
||||
" }\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" spark.sql(f\"USE {catalog_name_in_spark}\")\n",
|
||||
"\n",
|
||||
" sdf = spark.sql(f\"SELECT * FROM DEMO.user\")\n",
|
||||
" sdf.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "stackit-papermill",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
35
Demo/scripts/my_tools/catalog_spark.py
Normal file
35
Demo/scripts/my_tools/catalog_spark.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
def get_nessie_token(tokenendpoint, password):
|
||||
import requests
|
||||
|
||||
# Exchange Dremio PAT to Nessie token
|
||||
token_request_body = {
|
||||
"grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
|
||||
"scope": "dremio.all",
|
||||
"subject_token_type": "urn:ietf:params:oauth:token-type:dremio:personal-access-token",
|
||||
"subject_token": password,
|
||||
}
|
||||
x = requests.post(tokenendpoint, data=token_request_body)
|
||||
x.raise_for_status()
|
||||
return x.json()["access_token"]
|
||||
|
||||
|
||||
def get_spark_session(host, nessie_token):
|
||||
import stackit_spark
|
||||
|
||||
catalog_name_in_spark = "stackit"
|
||||
return stackit_spark.get_spark(
|
||||
additional_config={
|
||||
"spark.jars.packages": "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.6.1,org.apache.iceberg:iceberg-aws-bundle:1.6.1",
|
||||
f"spark.sql.catalog.{catalog_name_in_spark}": "org.apache.iceberg.spark.SparkCatalog",
|
||||
f"spark.sql.catalog.{catalog_name_in_spark}.type": "rest",
|
||||
f"spark.sql.catalog.{catalog_name_in_spark}.uri": host,
|
||||
f"spark.sql.catalog.{catalog_name_in_spark}.token": nessie_token,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
get_nessie_token(
|
||||
"https://dremio-internal.data-platform-dev.stackit.run/oauth/token",
|
||||
"xxxxx",
|
||||
)
|
||||
65
Demo/scripts/my_tools/read_write_data.py
Normal file
65
Demo/scripts/my_tools/read_write_data.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
|
||||
def read_and_write_data():
|
||||
import stackit_spark
|
||||
import pandas as pd
|
||||
import os
|
||||
import boto3
|
||||
import pyarrow.parquet as pq
|
||||
from io import BytesIO
|
||||
|
||||
# Initialize the S3 client
|
||||
s3 = boto3.client(
|
||||
's3',
|
||||
aws_access_key_id='J902D929PQ1BC1HG93ZS',
|
||||
aws_secret_access_key='J7x29/4s6eDQ0T1UywB8xo1byWTetwXrCdvYoudH',
|
||||
endpoint_url='https://object.storage.eu01.onstackit.cloud'
|
||||
)
|
||||
|
||||
bucket_name = 'data-tpcds'
|
||||
folder_names = [
|
||||
's1000/time_dim',
|
||||
's1000/item',
|
||||
's1000/date_dim',
|
||||
's1000/customer_demographics',
|
||||
's1000/web_sales_',
|
||||
's1000/customer1'
|
||||
]
|
||||
|
||||
result_dfs = {}
|
||||
spark = stackit_spark.get_spark()
|
||||
spark.sql("USE lakehouse")
|
||||
spark.sql("CREATE NAMESPACE IF NOT EXISTS DEMO_USECASE_AIRFLOW")
|
||||
|
||||
for folder_name in folder_names:
|
||||
objs = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)['Contents']
|
||||
|
||||
df_list = []
|
||||
for obj in objs:
|
||||
key = obj['Key']
|
||||
try:
|
||||
# Get object from S3
|
||||
response = s3.get_object(Bucket=bucket_name, Key=key)
|
||||
body = response['Body'].read() # Read the object's bytes
|
||||
# Read Parquet file using PyArrow
|
||||
table = pq.read_table(BytesIO(body))
|
||||
df_part = table.to_pandas()
|
||||
|
||||
df_list.append(df_part)
|
||||
except Exception as e:
|
||||
print(f"Error processing file {key} in folder {folder_name}: {e}")
|
||||
|
||||
# Concatenate the DataFrames for the current folder
|
||||
if df_list:
|
||||
df = pd.concat(df_list, ignore_index=True)
|
||||
result_dfs[folder_name] = df
|
||||
else:
|
||||
result_dfs[folder_name] = None
|
||||
|
||||
for df in result_dfs:
|
||||
table_name=f"DEMO_USECASE_AIRFLOW.{df.replace('/', '_')}"
|
||||
spark_df = spark.createDataFrame(result_dfs[df])
|
||||
# Save Spark DataFrame as an Iceberg table
|
||||
spark_df.write.mode("overwrite").saveAsTable(table_name)
|
||||
|
||||
|
||||
read_and_write_data()
|
||||
65
Demo/scripts/my_tools/read_write_table.py
Normal file
65
Demo/scripts/my_tools/read_write_table.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
|
||||
def read_and_write_data():
|
||||
import stackit_spark
|
||||
import pandas as pd
|
||||
import os
|
||||
import boto3
|
||||
import pyarrow.parquet as pq
|
||||
from io import BytesIO
|
||||
|
||||
# Initialize the S3 client
|
||||
s3 = boto3.client(
|
||||
's3',
|
||||
aws_access_key_id='J902D929PQ1BC1HG93ZS',
|
||||
aws_secret_access_key='J7x29/4s6eDQ0T1UywB8xo1byWTetwXrCdvYoudH',
|
||||
endpoint_url='https://object.storage.eu01.onstackit.cloud'
|
||||
)
|
||||
|
||||
bucket_name = 'data-tpcds'
|
||||
folder_name = 's1000/customer_address'
|
||||
objs = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)['Contents']
|
||||
objs = objs[:1]
|
||||
|
||||
df_list = []
|
||||
for obj in objs:
|
||||
key = obj['Key']
|
||||
# Get object from S3
|
||||
response = s3.get_object(Bucket=bucket_name, Key=key)
|
||||
body = response['Body'].read() # Read the object's bytes
|
||||
table = pq.read_table(BytesIO(body))
|
||||
df_part = table.to_pandas()
|
||||
|
||||
df_list.append(df_part)
|
||||
|
||||
df = pd.concat(df_list, ignore_index=True)
|
||||
|
||||
|
||||
spark = stackit_spark.get_spark()
|
||||
spark.sql("USE lakehouse")
|
||||
|
||||
spark.sql("CREATE NAMESPACE IF NOT EXISTS DEMO_USECASE_AIRFLOW")
|
||||
spark_df = spark.createDataFrame(df)
|
||||
# Save Spark DataFrame as an Iceberg table
|
||||
spark_df.write.mode("overwrite").saveAsTable("DEMO_USECASE_AIRFLOW.customer_address")
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#from dotenv import load_dotenv
|
||||
import os
|
||||
#load_dotenv()
|
||||
#for key, value in os.environ.items():
|
||||
# print(f'{key}: {value}')
|
||||
|
||||
|
||||
os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__S3__ACCESS-KEY-ID'] = 'DJ6STN2PRVEH6XIMP56V'
|
||||
os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__CATALOG-IMPL'] = 'org.apache.iceberg.nessie.NessieCatalog'
|
||||
os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__S3__SECRET-ACCESS-KEY'] = 'nG/AheODBfMcEhZL/cR+BQrQmO79Hia7nqweMu+n'
|
||||
os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__IO-IMPL'] = 'org.apache.iceberg.aws.s3.S3FileIO'
|
||||
os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE'] = 'org.apache.iceberg.spark.SparkCatalog'
|
||||
os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__URI'] = 'http://nessie-internal.nessie-ns.svc.cluster.local:19120/api/v1'
|
||||
os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__S3__ENDPOINT'] = 'https://object.storage.eu01.onstackit.cloud'
|
||||
os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__WAREHOUSE'] = 's3://data-platform-playground-internal/warehouse'
|
||||
|
||||
read_and_write_data()
|
||||
2
Demo/scripts/my_tools/say_hello.py
Normal file
2
Demo/scripts/my_tools/say_hello.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
def say_hello():
|
||||
print("Hello from say_hello()")
|
||||
182
Demo/scripts/sales_prediction.ipynb
Normal file
182
Demo/scripts/sales_prediction.ipynb
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8aae8ae7-4cb4-4b66-b325-aa3eabdeb455",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_date = \"2025-05-08\"\n",
|
||||
"input_sales = \"local.retail_sales\"\n",
|
||||
"input_inventory = \"local.retail_inventory\"\n",
|
||||
"output_table = \"local.restock_plan\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3598df66-6027-4f02-b7bd-c279b9d20c32",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create spark session with authentication to the catalog\n",
|
||||
"\n",
|
||||
"def get_catalog_token(tokenendpoint, password):\n",
|
||||
" import requests\n",
|
||||
"\n",
|
||||
" # Exchange Dremio PAT to Nessie token\n",
|
||||
" token_request_body = {\n",
|
||||
" \"grant_type\": \"urn:ietf:params:oauth:grant-type:token-exchange\",\n",
|
||||
" \"scope\": \"dremio.all\",\n",
|
||||
" \"subject_token_type\": \"urn:ietf:params:oauth:token-type:dremio:personal-access-token\",\n",
|
||||
" \"subject_token\": password,\n",
|
||||
" }\n",
|
||||
" x = requests.post(tokenendpoint, data=token_request_body)\n",
|
||||
" x.raise_for_status()\n",
|
||||
" return x.json()[\"access_token\"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_spark_session(host, catalog_token):\n",
|
||||
" import stackit_spark\n",
|
||||
"\n",
|
||||
" catalog_name_in_spark = \"stackit\"\n",
|
||||
" return stackit_spark.get_spark(\n",
|
||||
" additional_config={\n",
|
||||
" \"spark.jars.packages\": \"org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.6.1,org.apache.iceberg:iceberg-aws-bundle:1.6.1\",\n",
|
||||
" f\"spark.sql.catalog.{catalog_name_in_spark}\": \"org.apache.iceberg.spark.SparkCatalog\",\n",
|
||||
" f\"spark.sql.catalog.{catalog_name_in_spark}.warehouse\": \"catalog-s3\",\n",
|
||||
" f\"spark.sql.catalog.{catalog_name_in_spark}.type\": \"rest\",\n",
|
||||
" f\"spark.sql.catalog.{catalog_name_in_spark}.uri\": host,\n",
|
||||
" f\"spark.sql.catalog.{catalog_name_in_spark}.token\": catalog_token,\n",
|
||||
" }\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f9071bfb-1767-47b3-907f-723ce4389d9e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"catalog_token = get_catalog_token(\"https://dremio-internal.data-platform.stackit.run/oauth/token\", \"JnHzzS1LRFeZw4HIJQNP+iGJereEuCehcZwyGwSxcZPSrX4H7NL6FGqOxf/lRw==\")\n",
|
||||
"spark = get_spark_session(\"https://dremio-internal-catalog.data-platform.stackit.run/iceberg/main\", catalog_token)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8c250ade-1565-4c95-b1ae-134724a2e77b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spark.sql(f\"USE stackit\")\n",
|
||||
"# Step 1: Create a namespace\n",
|
||||
"spark.sql(\"CREATE NAMESPACE IF NOT EXISTS retail_demo\")\n",
|
||||
"\n",
|
||||
"# Step 2: Create synthetic sales data\n",
|
||||
"sales_df = spark.range(100).selectExpr(\n",
|
||||
" \"date_add('2025-05-01', cast(rand() * 7 as int)) as sale_date\",\n",
|
||||
" \"cast(rand() * 10 + 1 as int) as units_sold\",\n",
|
||||
" \"case when cast(rand() * 3 as int) = 0 then 'A123' \"\n",
|
||||
" \" when cast(rand() * 3 as int) = 1 then 'B456' \"\n",
|
||||
" \" else 'C789' end as product_id\",\n",
|
||||
" \"case when cast(rand() * 2 as int) = 0 then '101' else '102' end as store_id\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Step 3: Create synthetic inventory data\n",
|
||||
"inventory_df = spark.range(10).selectExpr(\n",
|
||||
" \"'2025-05-08' as inventory_date\",\n",
|
||||
" \"cast(rand() * 20 + 10 as int) as current_stock\",\n",
|
||||
" \"case when cast(rand() * 3 as int) = 0 then 'A123' \"\n",
|
||||
" \" when cast(rand() * 3 as int) = 1 then 'B456' \"\n",
|
||||
" \" else 'C789' end as product_id\",\n",
|
||||
" \"case when cast(rand() * 2 as int) = 0 then '101' else '102' end as store_id\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Step 4: Write to Iceberg tables\n",
|
||||
"sales_df.write.mode(\"overwrite\").saveAsTable(\"retail_demo.sales_data\")\n",
|
||||
"inventory_df.write.mode(\"overwrite\").saveAsTable(\"retail_demo.inventory_data\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5bd53006-ddbf-4ecf-bf3b-fa173cf9893b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pyspark.sql.functions import expr\n",
|
||||
"\n",
|
||||
"# Load sales and inventory data\n",
|
||||
"sales = spark.sql(\"\"\"\n",
|
||||
" SELECT store_id, product_id, sale_date, units_sold\n",
|
||||
" FROM retail_demo.sales_data\n",
|
||||
"\"\"\")\n",
|
||||
"\n",
|
||||
"inventory = spark.sql(\"\"\"\n",
|
||||
" SELECT store_id, product_id, inventory_date, current_stock\n",
|
||||
" FROM retail_demo.inventory_data\n",
|
||||
"\"\"\")\n",
|
||||
"\n",
|
||||
"# Step 2: Aggregate demand per store/product (e.g. last 7 days)\n",
|
||||
"demand = sales.groupBy(\"store_id\", \"product_id\") \\\n",
|
||||
" .agg({\"units_sold\": \"avg\"}) \\\n",
|
||||
" .withColumnRenamed(\"avg(units_sold)\", \"predicted_demand\")\n",
|
||||
"\n",
|
||||
"# Step 3: Join with inventory\n",
|
||||
"restock_plan = demand.join(inventory, on=[\"store_id\", \"product_id\"], how=\"inner\")\n",
|
||||
"\n",
|
||||
"# Step 4: Calculate restock quantity\n",
|
||||
"restock_plan = restock_plan.withColumn(\n",
|
||||
" \"restock_qty\",\n",
|
||||
" expr(\"CASE WHEN predicted_demand - current_stock > 0 THEN int(predicted_demand - current_stock) ELSE 0 END\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Step 5: Add metadata\n",
|
||||
"restock_plan = restock_plan.withColumn(\"run_date\", expr(\"current_date()\"))\n",
|
||||
"\n",
|
||||
"# Step 6: Save to Iceberg\n",
|
||||
"restock_plan.write.mode(\"overwrite\").saveAsTable(\"retail_demo.restock_plan\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5fea9096-b7b3-44d4-91aa-189ac9ea4a33",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Convert the Spark DataFrame to a Pandas DataFrame for better display in Jupyter\n",
|
||||
"restock_df = restock_plan.toPandas()\n",
|
||||
"\n",
|
||||
"# Display the restock plan nicely in Jupyter using HTML table format\n",
|
||||
"import IPython.display as display\n",
|
||||
"display.display(display.HTML(restock_df.to_html(index=False)))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue