Update churn notebook

whoseoyster · whoseoyster · commit 995a3064c5cd · 2023-01-31T19:21:04.000+05:30
diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,7 @@ server-tests.ipynb
 dependencies/
 *.bin
 *.csv
+*.yaml
 
 # Ignore everything in examples/ except the task dirs
 !examples
diff --git a/examples/tabular-classification/sklearn/churn-classifier/churn-classifier-sklearn.ipynb b/examples/tabular-classification/sklearn/churn-classifier/churn-classifier-sklearn.ipynb
@@ -276,7 +276,7 @@
     "openlayer.api.STORAGE = openlayer.api.StorageType.ONPREM\n",
     "openlayer.api.OPENLAYER_ENDPOINT = \"http://localhost:8080/v1\"\n",
     "\n",
-    "client = openlayer.OpenlayerClient(\"YOUR_API_KEY_HERE\")"
+    "client = openlayer.OpenlayerClient(\"YOUR_API_KEY\")"
    ]
   },
   {
@@ -329,6 +329,78 @@
     "training_set['churn'] = y_train.values"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2d842da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "val_preds_df = pd.DataFrame({\"predictions\": sklearn_model.predict_proba(x_val_one_hot).tolist()})\n",
+    "validation_set = validation_set.copy().reset_index(drop=True)\n",
+    "validation_set[\"preds\"] = val_preds_df[\"predictions\"]\n",
+    "validation_set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62969755",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_preds_df = pd.DataFrame({\"predictions\": sklearn_model.predict_proba(x_train_one_hot).tolist()})\n",
+    "training_set = training_set.copy().reset_index(drop=True)\n",
+    "training_set[\"preds\"] = train_preds_df\n",
+    "training_set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03688a2f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import yaml \n",
+    "\n",
+    "validation_dataset_config = {\n",
+    "    \"label\": \"validation\",\n",
+    "    \"classNames\": class_names,\n",
+    "    \"categoricalFeatureNames\": [\"Gender\", \"Geography\"],\n",
+    "    \"featureNames\":feature_names,\n",
+    "    \"columnNames\":list(validation_set.columns),\n",
+    "    \"labelColumnName\": \"churn\",\n",
+    "    \"predictionsColumnName\": \"preds\",\n",
+    "}\n",
+    "\n",
+    "with open('validation_dataset_config.yaml', 'w') as dataset_config_file:\n",
+    "    yaml.dump(validation_dataset_config, dataset_config_file, default_flow_style=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e7257a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import yaml \n",
+    "\n",
+    "training_dataset_config = {\n",
+    "    \"label\": \"training\",\n",
+    "    \"classNames\": class_names,\n",
+    "    \"categoricalFeatureNames\": [\"Gender\", \"Geography\"],\n",
+    "    \"featureNames\":feature_names,\n",
+    "    \"columnNames\":list(training_set.columns),\n",
+    "    \"labelColumnName\": \"churn\",\n",
+    "    \"predictionsColumnName\": \"preds\",\n",
+    "}\n",
+    "\n",
+    "with open('training_dataset_config.yaml', 'w') as dataset_config_file:\n",
+    "    yaml.dump(training_dataset_config, dataset_config_file, default_flow_style=False)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -341,11 +413,7 @@
     "# Validation set\n",
     "project.add_dataframe(\n",
     "    df=validation_set,\n",
-    "    dataset_type=DatasetType.Validation,\n",
-    "    class_names=class_names,\n",
-    "    label_column_name='churn',\n",
-    "    feature_names=feature_names,\n",
-    "    categorical_feature_names=[\"Gender\", \"Geography\"],\n",
+    "    dataset_config_file_path='validation_dataset_config.yaml',\n",
     ")"
    ]
   },
@@ -359,11 +427,7 @@
     "# Training set\n",
     "project.add_dataframe(\n",
     "    df=training_set,\n",
-    "    dataset_type=DatasetType.Training,\n",
-    "    class_names=class_names,\n",
-    "    label_column_name='churn',\n",
-    "    feature_names=feature_names,\n",
-    "    categorical_feature_names=[\"Gender\", \"Geography\"],\n",
+    "    dataset_config_file_path='training_dataset_config.yaml',\n",
     ")"
    ]
   },
@@ -538,10 +602,13 @@
     "\n",
     "model_config = {\n",
     "    \"name\": \"Churn prediction model\",\n",
-    "    \"model_type\": \"sklearn\",\n",
-    "    \"class_names\": class_names,\n",
-    "    \"categorical_feature_names\": [\"Gender\", \"Geography\"],\n",
-    "    \"feature_names\":feature_names\n",
+    "    \"architectureType\": \"sklearn\",\n",
+    "    \"classNames\": class_names,\n",
+    "    \"categoricalFeatureNames\": [\"Gender\", \"Geography\"],\n",
+    "    \"featureNames\":feature_names,\n",
+    "    \"metadata\": {\n",
+    "        \"test\": \"name\"\n",
+    "    }\n",
     "}\n",
     "\n",
     "with open('model_package/model_config.yaml', 'w') as model_config_file:\n",
@@ -567,7 +634,8 @@
     "\n",
     "model_validator = ModelValidator(\n",
     "    model_package_dir=\"model_package\", \n",
-    "    sample_data = x_val.iloc[:10, :]\n",
+    "    model_config_file_path='model_package/model_config.yaml',\n",
+    "    sample_data = x_val.iloc[:10, :],\n",
     ")\n",
     "model_validator.validate()"
    ]
@@ -589,7 +657,8 @@
    "source": [
     "project.add_model(\n",
     "    model_package_dir=\"model_package\",\n",
-    "    sample_data=x_val.iloc[:10, :]\n",
+    "    model_config_file_path='model_package/model_config.yaml',\n",
+    "    sample_data=x_val.iloc[:10, :],\n",
     ")"
    ]
   },