From d61ea02be1708b8e98856b0a221f17d1ef40db23 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 10:41:15 +0000 Subject: [PATCH 1/2] Initial plan From 80a14de5017d708559d60d8d429b31c877e84558 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 10:43:21 +0000 Subject: [PATCH 2/2] Add March Machine Learning Mania 2026 Kaggle submission notebook Co-authored-by: Siddik73 <112853532+Siddik73@users.noreply.github.com> --- ..._2026_Kaggle_Submission_Marktechpost.ipynb | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 March_ML_Mania_2026_Kaggle_Submission_Marktechpost.ipynb diff --git a/March_ML_Mania_2026_Kaggle_Submission_Marktechpost.ipynb b/March_ML_Mania_2026_Kaggle_Submission_Marktechpost.ipynb new file mode 100644 index 0000000..34325e8 --- /dev/null +++ b/March_ML_Mania_2026_Kaggle_Submission_Marktechpost.ipynb @@ -0,0 +1,176 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# March Machine Learning Mania 2026 — Kaggle Submission Notebook\n", + "\n", + "**Competition:** [March Machine Learning Mania 2026](https://www.kaggle.com/competitions/march-machine-learning-mania-2026)\n", + "\n", + "This notebook generates a valid baseline submission for both the Men's and Women's 2026 NCAA Basketball Tournament predictions.\n", + "\n", + "> **Assumption:** The competition data files are available at `/kaggle/input/march-machine-learning-mania-2026/`. \n", + "> We use `SampleSubmissionStage2.csv` as the template for the current-season submission." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kaggle Notebook Code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "\n", + "# ------------------------------------------------------------------\n", + "# Path to competition data (standard Kaggle input directory)\n", + "# ------------------------------------------------------------------\n", + "DATA_DIR = \"/kaggle/input/march-machine-learning-mania-2026\"\n", + "OUTPUT_DIR = \"/kaggle/working\" # Kaggle saves output files here\n", + "\n", + "# ------------------------------------------------------------------\n", + "# Load the Stage 2 sample submission\n", + "# Stage 2 lists every possible 2026 team matchup that needs a prediction\n", + "# ------------------------------------------------------------------\n", + "sample_sub_path = os.path.join(DATA_DIR, \"SampleSubmissionStage2.csv\")\n", + "submission = pd.read_csv(sample_sub_path)\n", + "\n", + "print(f\"Sample submission shape: {submission.shape}\")\n", + "print(submission.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ------------------------------------------------------------------\n", + "# Validate the required columns are present\n", + "# The competition requires: ID (string) and Pred (float 0–1)\n", + "# ------------------------------------------------------------------\n", + "assert \"ID\" in submission.columns, \"Missing required column: ID\"\n", + "assert \"Pred\" in submission.columns, \"Missing required column: Pred\"\n", + "\n", + "print(f\"Required columns present: {list(submission.columns)}\")\n", + "print(f\"Total predictions required: {len(submission)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ------------------------------------------------------------------\n", + "# Baseline model: predict 0.5 for every matchup (coin flip)\n", + "# This gives a valid first submission scored at 0.0 before tournaments\n", + "# Pred = probability that the LOWER TeamID team wins the matchup\n", + "# ------------------------------------------------------------------\n", + "submission[\"Pred\"] = 0.5\n", + "\n", + "print(\"Prediction column set to 0.5 (baseline) for all matchups.\")\n", + "print(submission.head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ------------------------------------------------------------------\n", + "# Quick sanity checks before saving\n", + "# ------------------------------------------------------------------\n", + "# Check all IDs follow the format SSSS_XXXX_YYYY\n", + "assert submission[\"ID\"].str.match(r\"\\d{4}_\\d{4}_\\d{4}\").all(), \\\n", + " \"Some IDs do not match expected format SSSS_XXXX_YYYY\"\n", + "\n", + "# Check predictions are within [0, 1]\n", + "assert submission[\"Pred\"].between(0, 1).all(), \\\n", + " \"Some predictions are outside the valid range [0, 1]\"\n", + "\n", + "# Confirm no duplicate IDs\n", + "assert submission[\"ID\"].is_unique, \"Duplicate IDs found in submission\"\n", + "\n", + "print(\"All sanity checks passed!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ------------------------------------------------------------------\n", + "# Save the submission file to the Kaggle working directory\n", + "# ------------------------------------------------------------------\n", + "output_path = os.path.join(OUTPUT_DIR, \"submission.csv\")\n", + "submission.to_csv(output_path, index=False)\n", + "\n", + "print(f\"Submission saved to: {output_path}\")\n", + "print(f\"File size: {os.path.getsize(output_path) / 1024:.1f} KB\")\n", + "print(\"\\nFinal submission preview:\")\n", + "print(submission.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kaggle Submit Command\n", + "\n", + "After the notebook finishes, submit the output file using the Kaggle CLI:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run this cell to submit directly from the notebook (or paste into a terminal)\n", + "# Make sure your Kaggle API credentials are configured (~/.kaggle/kaggle.json)\n", + "\n", + "!kaggle competitions submit \\\n", + " -c march-machine-learning-mania-2026 \\\n", + " -f /kaggle/working/submission.csv \\\n", + " -m \"Baseline 0.5 submission\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Notes\n", + "\n", + "- **File used:** `SampleSubmissionStage2.csv` — contains all 2026 season matchup IDs (both Men's and Women's).\n", + "- **Baseline:** Every matchup is predicted with probability `0.5` (equal chance for either team). This scores **0.0** before the tournament begins, as stated in the competition rules.\n", + "- **Pred column meaning:** Probability that the team with the **lower** `TeamID` wins. Men's TeamIDs are 1000–1999; Women's TeamIDs are 3000–3999.\n", + "- **Submission deadline:** March 19, 2026 4PM UTC. Select your best submission manually before the deadline — do not rely on automatic selection.\n", + "- To improve on the baseline, replace the `submission[\"Pred\"] = 0.5` line with model-predicted probabilities derived from the historical game data provided." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}