import polars as pl import s3fs # MinIO S3 endpoint s3_endpoint_url = "https://minio.wayl.one" # Use http:// if MinIO is not using SSL # Create S3 filesystem instance fs = s3fs.S3FileSystem(client_kwargs={"endpoint_url": s3_endpoint_url}) fs.ls("duckdb-playground") # S3 file path s3_path = "s3://duckdb-playground/hotel_bookings.csv" # Define schema overrides (make 'children' nullable) schema_overrides = {"children": pl.Int64} # Open the file with s3fs and scan lazily with fs.open(s3_path, mode="rb") as f: df = pl.scan_csv( f, infer_schema_length=10000, # Increase schema inference length schema_overrides=schema_overrides, # Override 'children' type null_values=["NA"], # Treat "NA" as null ) # Fetch and show first few rows print(df.fetch(5)) row_count = df.select(pl.len()).collect() print(row_count)