import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
# Step 1: Generate a simple synthetic dataset for house prices
# Let's simulate data with the following features: area (in sqft), number of rooms, and price (target variable)
data = {
'Area (sqft)' : [ 1500 , 1800 , 2400 , 3000 , 3500 , 4000 , 4500 , 5000 , 5500 , 6000 ] ,
'Num Rooms' : [ 3 , 3 , 4 , 4 , 5 , 5 , 6 , 6 , 7 , 7 ] ,
'Price ($)' : [ 400000 , 450000 , 500000 , 600000 , 650000 , 700000 , 750000 , 800000 , 850000 , 900000 ]
}
# Create a pandas DataFrame
df = pd.DataFrame ( data)
# Step 2: Explore the dataset (optional)
print ( "First few rows of the dataset:" )
print ( df.head ( ) )
# Step 3: Preprocess the data
X = df[ [ 'Area (sqft)' , 'Num Rooms' ] ] # Features: Area and Number of Rooms
y = df[ 'Price ($)' ] # Target: House Price
# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size= 0.2 , random_state= 42 )
# Step 4: Train a Linear Regression model
model = LinearRegression( )
model.fit ( X_train, y_train)
# Step 5: Make predictions on the test set
y_pred = model.predict ( X_test)
# Step 6: Evaluate the model
mae = mean_absolute_error( y_test, y_pred)
r2 = r2_score( y_test, y_pred)
print ( f"\n Model Evaluation:" )
print ( f"Mean Absolute Error (MAE): ${mae:.2f}" )
print ( f"R-squared (R2): {r2:.2f}" )
# Step 7: Visualize the results (optional)
# Plotting the actual vs predicted prices
plt.figure ( figsize= ( 8 , 6 ) )
plt.scatter ( y_test, y_pred, color= 'blue' , marker= 'o' , edgecolor= 'k' )
plt.plot ( [ min ( y_test) , max ( y_test) ] , [ min ( y_test) , max ( y_test) ] , color= 'red' , linewidth= 2 , linestyle= '--' )
plt.xlabel ( 'Actual Prices ($)' )
plt.ylabel ( 'Predicted Prices ($)' )
plt.title ( 'Actual vs Predicted House Prices' )
plt.show ( )
# Step 8: Predicting on new data (optional)
# Let's say we have a new house with 2800 sqft and 4 rooms
new_data = pd.DataFrame ( [ [ 2800 , 4 ] ] , columns= [ 'Area (sqft)' , 'Num Rooms' ] )
new_price = model.predict ( new_data)
print ( f"\n Predicted price for a house with 2800 sqft and 4 rooms: ${new_price[0]:.2f}" )
aW1wb3J0IHBhbmRhcyBhcyBwZAppbXBvcnQgbnVtcHkgYXMgbnAKaW1wb3J0IG1hdHBsb3RsaWIucHlwbG90IGFzIHBsdApmcm9tIHNrbGVhcm4ubW9kZWxfc2VsZWN0aW9uIGltcG9ydCB0cmFpbl90ZXN0X3NwbGl0CmZyb20gc2tsZWFybi5saW5lYXJfbW9kZWwgaW1wb3J0IExpbmVhclJlZ3Jlc3Npb24KZnJvbSBza2xlYXJuLm1ldHJpY3MgaW1wb3J0IG1lYW5fYWJzb2x1dGVfZXJyb3IsIHIyX3Njb3JlCgojIFN0ZXAgMTogR2VuZXJhdGUgYSBzaW1wbGUgc3ludGhldGljIGRhdGFzZXQgZm9yIGhvdXNlIHByaWNlcwojIExldCdzIHNpbXVsYXRlIGRhdGEgd2l0aCB0aGUgZm9sbG93aW5nIGZlYXR1cmVzOiBhcmVhIChpbiBzcWZ0KSwgbnVtYmVyIG9mIHJvb21zLCBhbmQgcHJpY2UgKHRhcmdldCB2YXJpYWJsZSkKZGF0YSA9IHsKICAgICdBcmVhIChzcWZ0KSc6IFsxNTAwLCAxODAwLCAyNDAwLCAzMDAwLCAzNTAwLCA0MDAwLCA0NTAwLCA1MDAwLCA1NTAwLCA2MDAwXSwKICAgICdOdW0gUm9vbXMnOiBbMywgMywgNCwgNCwgNSwgNSwgNiwgNiwgNywgN10sCiAgICAnUHJpY2UgKCQpJzogWzQwMDAwMCwgNDUwMDAwLCA1MDAwMDAsIDYwMDAwMCwgNjUwMDAwLCA3MDAwMDAsIDc1MDAwMCwgODAwMDAwLCA4NTAwMDAsIDkwMDAwMF0KfQoKIyBDcmVhdGUgYSBwYW5kYXMgRGF0YUZyYW1lCmRmID0gcGQuRGF0YUZyYW1lKGRhdGEpCgojIFN0ZXAgMjogRXhwbG9yZSB0aGUgZGF0YXNldCAob3B0aW9uYWwpCnByaW50KCJGaXJzdCBmZXcgcm93cyBvZiB0aGUgZGF0YXNldDoiKQpwcmludChkZi5oZWFkKCkpCgojIFN0ZXAgMzogUHJlcHJvY2VzcyB0aGUgZGF0YQpYID0gZGZbWydBcmVhIChzcWZ0KScsICdOdW0gUm9vbXMnXV0gICMgRmVhdHVyZXM6IEFyZWEgYW5kIE51bWJlciBvZiBSb29tcwp5ID0gZGZbJ1ByaWNlICgkKSddICAjIFRhcmdldDogSG91c2UgUHJpY2UKCiMgU3BsaXQgdGhlIGRhdGEgaW50byB0cmFpbmluZyAoODAlKSBhbmQgdGVzdGluZyAoMjAlKSBzZXRzClhfdHJhaW4sIFhfdGVzdCwgeV90cmFpbiwgeV90ZXN0ID0gdHJhaW5fdGVzdF9zcGxpdChYLCB5LCB0ZXN0X3NpemU9MC4yLCByYW5kb21fc3RhdGU9NDIpCgojIFN0ZXAgNDogVHJhaW4gYSBMaW5lYXIgUmVncmVzc2lvbiBtb2RlbAptb2RlbCA9IExpbmVhclJlZ3Jlc3Npb24oKQptb2RlbC5maXQoWF90cmFpbiwgeV90cmFpbikKCiMgU3RlcCA1OiBNYWtlIHByZWRpY3Rpb25zIG9uIHRoZSB0ZXN0IHNldAp5X3ByZWQgPSBtb2RlbC5wcmVkaWN0KFhfdGVzdCkKCiMgU3RlcCA2OiBFdmFsdWF0ZSB0aGUgbW9kZWwKbWFlID0gbWVhbl9hYnNvbHV0ZV9lcnJvcih5X3Rlc3QsIHlfcHJlZCkKcjIgPSByMl9zY29yZSh5X3Rlc3QsIHlfcHJlZCkKCnByaW50KGYiXG5Nb2RlbCBFdmFsdWF0aW9uOiIpCnByaW50KGYiTWVhbiBBYnNvbHV0ZSBFcnJvciAoTUFFKTogJHttYWU6LjJmfSIpCnByaW50KGYiUi1zcXVhcmVkIChSMik6IHtyMjouMmZ9IikKCiMgU3RlcCA3OiBWaXN1YWxpemUgdGhlIHJlc3VsdHMgKG9wdGlvbmFsKQojIFBsb3R0aW5nIHRoZSBhY3R1YWwgdnMgcHJlZGljdGVkIHByaWNlcwpwbHQuZmlndXJlKGZpZ3NpemU9KDgsIDYpKQpwbHQuc2NhdHRlcih5X3Rlc3QsIHlfcHJlZCwgY29sb3I9J2JsdWUnLCBtYXJrZXI9J28nLCBlZGdlY29sb3I9J2snKQpwbHQucGxvdChbbWluKHlfdGVzdCksIG1heCh5X3Rlc3QpXSwgW21pbih5X3Rlc3QpLCBtYXgoeV90ZXN0KV0sIGNvbG9yPSdyZWQnLCBsaW5ld2lkdGg9MiwgbGluZXN0eWxlPSctLScpCnBsdC54bGFiZWwoJ0FjdHVhbCBQcmljZXMgKCQpJykKcGx0LnlsYWJlbCgnUHJlZGljdGVkIFByaWNlcyAoJCknKQpwbHQudGl0bGUoJ0FjdHVhbCB2cyBQcmVkaWN0ZWQgSG91c2UgUHJpY2VzJykKcGx0LnNob3coKQoKIyBTdGVwIDg6IFByZWRpY3Rpbmcgb24gbmV3IGRhdGEgKG9wdGlvbmFsKQojIExldCdzIHNheSB3ZSBoYXZlIGEgbmV3IGhvdXNlIHdpdGggMjgwMCBzcWZ0IGFuZCA0IHJvb21zCm5ld19kYXRhID0gcGQuRGF0YUZyYW1lKFtbMjgwMCwgNF1dLCBjb2x1bW5zPVsnQXJlYSAoc3FmdCknLCAnTnVtIFJvb21zJ10pCm5ld19wcmljZSA9IG1vZGVsLnByZWRpY3QobmV3X2RhdGEpCnByaW50KGYiXG5QcmVkaWN0ZWQgcHJpY2UgZm9yIGEgaG91c2Ugd2l0aCAyODAwIHNxZnQgYW5kIDQgcm9vbXM6ICR7bmV3X3ByaWNlWzBdOi4yZn0iKQo=