PredictPlot/csv_regression_plot.py at main · mitanshhh/PredictPlot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


print("Welcome to Data Prediction and Visualizer\n")

while True:
    csv_name = input("Please Enter the path of CSV File: ")
    if csv_name.lower() == "q":
        print("Exitting...")
        exit()
    try:
        df = pd.read_csv(csv_name)
        print(f"Shape of dataset: {df.shape}")
        print(f"Columns in dataset: {df.columns}\n")
        break
    except Exception as e:
        print("Enter a valid file path")
        print(f"Error: {e}\n")

while True:
    try:
        target_col_name = input("Enter the Target column name: ")
        X = df.drop(target_col_name,axis=1)
        X = X.drop(columns=X.select_dtypes(include=['object']).columns)
        X = X.fillna(X.mean())

        y = df[target_col_name]
        break
    except Exception as col_name_error:
        print("Invalid Column Name")
        print(f"Error: {col_name_error}\n")


numeric_df = df.select_dtypes(include=['number'])

print("\n")
if target_col_name not in numeric_df.columns:
    print(f"⚠ Target column {target_col_name} is not numeric. Correlation cannot be computed.")
else:
    print("Correlation with target column:")
    print(numeric_df.corr()[target_col_name].sort_values(ascending=False), "\n")


feature_col_name = str(input("Enter the feature column name: "))
if feature_col_name not in X.columns:
    print(f"⚠ {feature_col_name} is not a valid numeric feature.")
    exit()


X_train,X_test,y_train,y_test = train_test_split(X[[feature_col_name]],y,random_state=42,train_size=0.8)
model = LinearRegression()
model.fit(X_train,y_train)
predicted_values = model.predict(X_test[[feature_col_name]])

try:
    plt.scatter(X_test[feature_col_name], y_test, color="blue", label="Actual",s=8)
    plt.scatter(X_test[feature_col_name], predicted_values, color="red", label="Predicted",s=10)
    plt.ylabel(target_col_name)
    plt.xlabel(feature_col_name)
    plt.title(f"Prediction of {target_col_name} using {feature_col_name}")
    plt.legend()
    plt.show()
except Exception as plot_error:
    print("Error occurred while plotting graph")
    print(f"Error: ",plot_error)

print("\n")
print("Mean Squared Error: ",mean_squared_error(y_test,predicted_values))
print("R2 score: ",r2_score(y_test,predicted_values))


save = input("Do you want to save predictions to CSV? (y/n): ")
if save.lower() == "y":
    result_df = pd.DataFrame({
        feature_col_name: X_test[feature_col_name].values,
        'Actual': y_test.values,
        'Predicted': predicted_values
    })
    result_df.to_csv("predictions.csv", index=False)
    print("✅ Predictions saved to predictions.csv")