Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 21 additions & 20 deletions examples/house_prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,14 @@
import pandas as pd

import plexe
from plexe import ModelBuilder
from plexe.internal.common.provider import ProviderConfig
from plexe.callbacks import MLFlowCallback


# Step 1: Define the model
# Note: for conciseness we leave the input schema empty and let plexe infer it
model = plexe.Model(
intent=(
"With 79 explanatory variables describing aspects of residential homes in Ames, Iowa, predict "
"the final price of each home. Use only linear regression and decision tree models, no ensembling. "
"The models must be extremely simple and quickly trainable on extremely constrained hardware."
),
output_schema={
"SalePrice": float,
},
)

# Step 2: Build the model using the training dataset
# 2B: Build the model with the dataset
# NOTE: In order to run this example, you will need to download the dataset from Kaggle
model.build(
datasets=[pd.read_csv("examples/datasets/house-prices-train.csv")],
model = ModelBuilder(
provider=ProviderConfig(
default_provider="openai/gpt-4o",
orchestrator_provider="anthropic/claude-sonnet-4-20250514",
Expand All @@ -49,10 +35,25 @@
ops_provider="anthropic/claude-3-7-sonnet-20250219",
tool_provider="openai/gpt-4o",
),
verbose=False,
)

# Step 2: Build the model using the training dataset
# 2B: Build the model with the dataset
# NOTE: In order to run this example, you will need to download the dataset from Kaggle
m = model.build(
datasets=[pd.read_csv("examples/datasets/house-prices-train.csv")],
intent=(
"With 79 explanatory variables describing aspects of residential homes in Ames, Iowa, predict "
"the final price of each home. Use only linear regression and decision tree models, no ensembling. "
"The models must be extremely simple and quickly trainable on extremely constrained hardware."
),
output_schema={
"SalePrice": float,
},
max_iterations=2,
timeout=1800, # 30 minute timeout
run_timeout=180,
verbose=False,
callbacks=[
MLFlowCallback(
tracking_uri="http://127.0.0.1:8080",
Expand All @@ -62,15 +63,15 @@
)

# Step 3: Save the model
plexe.save_model(model, "house-prices.tar.gz")
plexe.save_model(m, "house-prices.tar.gz")

# Step 4: Run a prediction on the built model
test_df = pd.read_csv("examples/datasets/house-prices-test.csv").sample(10)
predictions = pd.DataFrame.from_records([model.predict(x) for x in test_df.to_dict(orient="records")])
predictions = pd.DataFrame.from_records([m.predict(x) for x in test_df.to_dict(orient="records")])

# Step 5: print a sample of predictions
print(predictions)

# Step 6: Print model description
description = model.describe()
description = m.describe()
print(description.as_text())