From 3dff4cfa15e4200fbc51a42b58465d64c3c39858 Mon Sep 17 00:00:00 2001 From: JasonAlbertEinstien Date: Sat, 16 Aug 2025 23:36:01 +0800 Subject: [PATCH 1/2] house price --- examples/house_prices.py | 42 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/examples/house_prices.py b/examples/house_prices.py index eea50e86..d5bd2cd3 100644 --- a/examples/house_prices.py +++ b/examples/house_prices.py @@ -19,28 +19,14 @@ import pandas as pd import plexe +from plexe import ModelBuilder from plexe.internal.common.provider import ProviderConfig from plexe.callbacks import MLFlowCallback # Step 1: Define the model # Note: for conciseness we leave the input schema empty and let plexe infer it -model = plexe.Model( - intent=( - "With 79 explanatory variables describing aspects of residential homes in Ames, Iowa, predict " - "the final price of each home. Use only linear regression and decision tree models, no ensembling. " - "The models must be extremely simple and quickly trainable on extremely constrained hardware." - ), - output_schema={ - "SalePrice": float, - }, -) - -# Step 2: Build the model using the training dataset -# 2B: Build the model with the dataset -# NOTE: In order to run this example, you will need to download the dataset from Kaggle -model.build( - datasets=[pd.read_csv("examples/datasets/house-prices-train.csv")], +model = ModelBuilder( provider=ProviderConfig( default_provider="openai/gpt-4o", orchestrator_provider="anthropic/claude-sonnet-4-20250514", @@ -49,10 +35,26 @@ ops_provider="anthropic/claude-3-7-sonnet-20250219", tool_provider="openai/gpt-4o", ), + verbose=False, + +) + +# Step 2: Build the model using the training dataset +# 2B: Build the model with the dataset +# NOTE: In order to run this example, you will need to download the dataset from Kaggle +m = model.build( + datasets=[pd.read_csv("examples/datasets/house-prices-train.csv")], + intent=( + "With 79 explanatory variables describing aspects of residential homes in Ames, Iowa, predict " + "the final price of each home. Use only linear regression and decision tree models, no ensembling. " + "The models must be extremely simple and quickly trainable on extremely constrained hardware." + ), + output_schema={ + "SalePrice": float, + }, max_iterations=2, timeout=1800, # 30 minute timeout run_timeout=180, - verbose=False, callbacks=[ MLFlowCallback( tracking_uri="http://127.0.0.1:8080", @@ -62,15 +64,15 @@ ) # Step 3: Save the model -plexe.save_model(model, "house-prices.tar.gz") +plexe.save_model(m, "house-prices.tar.gz") # Step 4: Run a prediction on the built model test_df = pd.read_csv("examples/datasets/house-prices-test.csv").sample(10) -predictions = pd.DataFrame.from_records([model.predict(x) for x in test_df.to_dict(orient="records")]) +predictions = pd.DataFrame.from_records([m.predict(x) for x in test_df.to_dict(orient="records")]) # Step 5: print a sample of predictions print(predictions) # Step 6: Print model description -description = model.describe() +description = m.describe() print(description.as_text()) From a0b951382af5cef8a47d14fe54e3220c9d61332d Mon Sep 17 00:00:00 2001 From: JasonAlbertEinstien Date: Wed, 20 Aug 2025 00:31:34 +0800 Subject: [PATCH 2/2] formatting --- examples/house_prices.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/house_prices.py b/examples/house_prices.py index d5bd2cd3..e721eaaf 100644 --- a/examples/house_prices.py +++ b/examples/house_prices.py @@ -36,7 +36,6 @@ tool_provider="openai/gpt-4o", ), verbose=False, - ) # Step 2: Build the model using the training dataset