fork download
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from sklearn.model_selection import train_test_split
  5. from sklearn.linear_model import LinearRegression
  6. from sklearn.metrics import mean_absolute_error, r2_score
  7.  
  8. # Step 1: Generate a simple synthetic dataset for house prices
  9. # Let's simulate data with the following features: area (in sqft), number of rooms, and price (target variable)
  10. data = {
  11. 'Area (sqft)': [1500, 1800, 2400, 3000, 3500, 4000, 4500, 5000, 5500, 6000],
  12. 'Num Rooms': [3, 3, 4, 4, 5, 5, 6, 6, 7, 7],
  13. 'Price ($)': [400000, 450000, 500000, 600000, 650000, 700000, 750000, 800000, 850000, 900000]
  14. }
  15.  
  16. # Create a pandas DataFrame
  17. df = pd.DataFrame(data)
  18.  
  19. # Step 2: Explore the dataset (optional)
  20. print("First few rows of the dataset:")
  21. print(df.head())
  22.  
  23. # Step 3: Preprocess the data
  24. X = df[['Area (sqft)', 'Num Rooms']] # Features: Area and Number of Rooms
  25. y = df['Price ($)'] # Target: House Price
  26.  
  27. # Split the data into training (80%) and testing (20%) sets
  28. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  29.  
  30. # Step 4: Train a Linear Regression model
  31. model = LinearRegression()
  32. model.fit(X_train, y_train)
  33.  
  34. # Step 5: Make predictions on the test set
  35. y_pred = model.predict(X_test)
  36.  
  37. # Step 6: Evaluate the model
  38. mae = mean_absolute_error(y_test, y_pred)
  39. r2 = r2_score(y_test, y_pred)
  40.  
  41. print(f"\nModel Evaluation:")
  42. print(f"Mean Absolute Error (MAE): ${mae:.2f}")
  43. print(f"R-squared (R2): {r2:.2f}")
  44.  
  45. # Step 7: Visualize the results (optional)
  46. # Plotting the actual vs predicted prices
  47. plt.figure(figsize=(8, 6))
  48. plt.scatter(y_test, y_pred, color='blue', marker='o', edgecolor='k')
  49. plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2, linestyle='--')
  50. plt.xlabel('Actual Prices ($)')
  51. plt.ylabel('Predicted Prices ($)')
  52. plt.title('Actual vs Predicted House Prices')
  53. plt.show()
  54.  
  55. # Step 8: Predicting on new data (optional)
  56. # Let's say we have a new house with 2800 sqft and 4 rooms
  57. new_data = pd.DataFrame([[2800, 4]], columns=['Area (sqft)', 'Num Rooms'])
  58. new_price = model.predict(new_data)
  59. print(f"\nPredicted price for a house with 2800 sqft and 4 rooms: ${new_price[0]:.2f}")
  60.  
Success #stdin #stdout 3.5s 127436KB
stdin
Standard input is empty
stdout
First few rows of the dataset:
   Area (sqft)  Num Rooms  Price ($)
0         1500          3     400000
1         1800          3     450000
2         2400          4     500000
3         3000          4     600000
4         3500          5     650000

Model Evaluation:
Mean Absolute Error (MAE): $1353.50
R-squared (R2): 1.00

Predicted price for a house with 2800 sqft and 4 rooms: $561305.73