-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsv_regression_plot.py
More file actions
87 lines (68 loc) · 2.83 KB
/
csv_regression_plot.py
File metadata and controls
87 lines (68 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
print("Welcome to Data Prediction and Visualizer\n")
while True:
csv_name = input("Please Enter the path of CSV File: ")
if csv_name.lower() == "q":
print("Exitting...")
exit()
try:
df = pd.read_csv(csv_name)
print(f"Shape of dataset: {df.shape}")
print(f"Columns in dataset: {df.columns}\n")
break
except Exception as e:
print("Enter a valid file path")
print(f"Error: {e}\n")
while True:
try:
target_col_name = input("Enter the Target column name: ")
X = df.drop(target_col_name,axis=1)
X = X.drop(columns=X.select_dtypes(include=['object']).columns)
X = X.fillna(X.mean())
y = df[target_col_name]
break
except Exception as col_name_error:
print("Invalid Column Name")
print(f"Error: {col_name_error}\n")
numeric_df = df.select_dtypes(include=['number'])
print("\n")
if target_col_name not in numeric_df.columns:
print(f"⚠ Target column {target_col_name} is not numeric. Correlation cannot be computed.")
else:
print("Correlation with target column:")
print(numeric_df.corr()[target_col_name].sort_values(ascending=False), "\n")
feature_col_name = str(input("Enter the feature column name: "))
if feature_col_name not in X.columns:
print(f"⚠ {feature_col_name} is not a valid numeric feature.")
exit()
X_train,X_test,y_train,y_test = train_test_split(X[[feature_col_name]],y,random_state=42,train_size=0.8)
model = LinearRegression()
model.fit(X_train,y_train)
predicted_values = model.predict(X_test[[feature_col_name]])
try:
plt.scatter(X_test[feature_col_name], y_test, color="blue", label="Actual",s=8)
plt.scatter(X_test[feature_col_name], predicted_values, color="red", label="Predicted",s=10)
plt.ylabel(target_col_name)
plt.xlabel(feature_col_name)
plt.title(f"Prediction of {target_col_name} using {feature_col_name}")
plt.legend()
plt.show()
except Exception as plot_error:
print("Error occurred while plotting graph")
print(f"Error: ",plot_error)
print("\n")
print("Mean Squared Error: ",mean_squared_error(y_test,predicted_values))
print("R2 score: ",r2_score(y_test,predicted_values))
save = input("Do you want to save predictions to CSV? (y/n): ")
if save.lower() == "y":
result_df = pd.DataFrame({
feature_col_name: X_test[feature_col_name].values,
'Actual': y_test.values,
'Predicted': predicted_values
})
result_df.to_csv("predictions.csv", index=False)
print("✅ Predictions saved to predictions.csv")