def read_and_write_data(): import stackit_spark import pandas as pd import os import boto3 import pyarrow.parquet as pq from io import BytesIO # Initialize the S3 client s3 = boto3.client( 's3', aws_access_key_id='J902D929PQ1BC1HG93ZS', aws_secret_access_key='J7x29/4s6eDQ0T1UywB8xo1byWTetwXrCdvYoudH', endpoint_url='https://object.storage.eu01.onstackit.cloud' ) bucket_name = 'data-tpcds' folder_name = 's1000/customer_address' objs = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)['Contents'] objs = objs[:1] df_list = [] for obj in objs: key = obj['Key'] # Get object from S3 response = s3.get_object(Bucket=bucket_name, Key=key) body = response['Body'].read() # Read the object's bytes table = pq.read_table(BytesIO(body)) df_part = table.to_pandas() df_list.append(df_part) df = pd.concat(df_list, ignore_index=True) spark = stackit_spark.get_spark() spark.sql("USE lakehouse") spark.sql("CREATE NAMESPACE IF NOT EXISTS DEMO_USECASE_AIRFLOW") spark_df = spark.createDataFrame(df) # Save Spark DataFrame as an Iceberg table spark_df.write.mode("overwrite").saveAsTable("DEMO_USECASE_AIRFLOW.customer_address") if __name__ == "__main__": #from dotenv import load_dotenv import os #load_dotenv() #for key, value in os.environ.items(): # print(f'{key}: {value}') os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__S3__ACCESS-KEY-ID'] = 'DJ6STN2PRVEH6XIMP56V' os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__CATALOG-IMPL'] = 'org.apache.iceberg.nessie.NessieCatalog' os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__S3__SECRET-ACCESS-KEY'] = 'nG/AheODBfMcEhZL/cR+BQrQmO79Hia7nqweMu+n' os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__IO-IMPL'] = 'org.apache.iceberg.aws.s3.S3FileIO' os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE'] = 'org.apache.iceberg.spark.SparkCatalog' os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__URI'] = 'http://nessie-internal.nessie-ns.svc.cluster.local:19120/api/v1' os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__S3__ENDPOINT'] = 'https://object.storage.eu01.onstackit.cloud' os.environ['STACKIT__SPARK__SQL__CATALOG__LAKEHOUSE__WAREHOUSE'] = 's3://data-platform-playground-internal/warehouse' read_and_write_data()