########>> INITIALIZE <<########

# === Basic Operation Libraries ===
import os
import sys
import ast
import datetime
import re
import time

# === Data Analysis Libraries ===
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
%matplotlib inline

# === Display Settings for Jupyter ===
from IPython.display import display, HTML

# === Display Settings for Pandas ===
pd.set_option('display.html.table_schema', True)
pd.set_option('expand_frame_repr', True)
pd.set_option('display.max_colwidth', 200)
pd.options.display.html.use_mathjax = False

# === Manage Warnings ===
import warnings
warnings.filterwarnings('ignore')

# === Completion Timestamp ===
print("\n{:<5} : {}".format("Finished", str(datetime.datetime.now())))

Finished : 2025-10-30 21:08:32.411615

apt = pd.read_csv("C:\\Users\\alexp\\Charlotte_Apartments.csv")

apt = apt.dropna(how='all').reset_index(drop=True)

# Now calculate missing Rent %
missing_rent_pct = apt['Rent'].isna().mean() * 100
print(f"Percentage of missing Rent values: {missing_rent_pct:.2f}%")

Percentage of missing Rent values: 20.00%

apt['Rent'] = apt.groupby(['Complex', 'Bedrooms'])['Rent'].transform(lambda x: x.fillna(x.mean()))

apt['Rent'].isna().mean() * 100

np.float64(0.0)

apt['price_per_sqft'] = apt['Rent'] / apt['Sqft']

apt.head()

print(apt.dtypes)

Complex            object
Address            object
Unit_Variant       object
Bedrooms          float64
Bathrooms         float64
Rent              float64
Sqft              float64
Amenities          object
Website            object
Neighborhood       object
price_per_sqft    float64
dtype: object

amenity_map = {
    # Core amenities
    "In-unit washer/dryer": "laundry",
    "Pool": "pool",
    "Resort-style pool": "pool",
    "Fitness center": "gym",
    "Fitness": "gym",
    "Gym": "gym",
    
    # Pet-related
    "Pet-friendly": "pets",
    "Pet friendly": "pets",
    "Pet-friendly community": "pets",
    
    # Parking and access
    "Parking": "parking",
    "EV charging stations": "ev_charging",
    "Elevator": "elevator",
    "Controlled access": "secure_access",
    
    # Connectivity
    "Community-wide wifi": "wifi",
    "High-speed internet in common areas": "wifi_common",
    
    # Services
    "Trash pickup": "trash_pickup",
    "Trash pickup door to door": "trash_pickup",
    "Renters insurance": "renters_insurance",
    
    # Shared spaces
    #"Business center": "business",
    #"Clubhouse": "clubhouse",
    #"Package service": "packages",
    "Package lockers": "packages", 
    #"Balcony": "balcony",
    #"Fireplace": "fireplace",
    
    # Accessibility and extras
    #"Furnished": "furnished",
    #"Wheelchair accessible": "accessible",
    #"Playground": "playground",
    #"Tennis court": "tennis",
    #"Basketball court": "basketball",
    #"Grill area": "grill",
    #"Green space": "greenspace",
    "Recycling": "recycling"
}

# Initialize all amenity columns to 0
for simple_name in amenity_map.values():
    apt[simple_name] = 0

# Flag amenities based on presence
for idx, row in apt.iterrows():
    if pd.notna(row["Amenities"]):
        listed = [a.strip() for a in row["Amenities"].split(";")]
        for raw in listed:
            if raw in amenity_map:
                apt.at[idx, amenity_map[raw]] = 1
apt.head()

print(apt.dtypes)

Complex               object
Address               object
Unit_Variant          object
Bedrooms             float64
Bathrooms            float64
Rent                 float64
Sqft                 float64
Amenities             object
Website               object
Neighborhood          object
price_per_sqft       float64
laundry                int64
pool                   int64
gym                    int64
pets                   int64
parking                int64
ev_charging            int64
elevator               int64
secure_access          int64
wifi                   int64
wifi_common            int64
trash_pickup           int64
renters_insurance      int64
packages               int64
recycling              int64
dtype: object

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# Step 6: Separate cleaned features and target, and force float64 dtype
X_clean = df_model.drop(columns=[target]).astype(np.float64)
y_clean = df_model[target].astype(np.float64)

# Step 7: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 8: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 9: View summary with real column names
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.956
Model:                            OLS   Adj. R-squared:                  0.953
Method:                 Least Squares   F-statistic:                     332.3
Date:                Thu, 30 Oct 2025   Prob (F-statistic):          4.59e-146
Time:                        21:08:32   Log-Likelihood:                 178.40
No. Observations:                 245   AIC:                            -324.8
Df Residuals:                     229   BIC:                            -268.8
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0553      0.025      2.180      0.030       0.005       0.105
Bathrooms                        0.0356      0.034      1.055      0.292      -0.031       0.102
Rent                             0.0007   3.36e-05     20.731      0.000       0.001       0.001
Sqft                            -0.0020    8.9e-05    -22.811      0.000      -0.002      -0.002
laundry                          0.6675      0.013     52.199      0.000       0.642       0.693
pool                             0.6675      0.013     52.199      0.000       0.642       0.693
gym                           2.961e-16   1.27e-17     23.396      0.000    2.71e-16    3.21e-16
pets                             0.0330      0.014      2.322      0.021       0.005       0.061
parking                       7.273e-17   4.99e-18     14.580      0.000    6.29e-17    8.26e-17
ev_charging                      0.3111      0.012     25.137      0.000       0.287       0.335
elevator                      6.755e-17      6e-18     11.251      0.000    5.57e-17    7.94e-17
secure_access                    0.0761      0.005     13.934      0.000       0.065       0.087
wifi                         -6.785e-17   6.14e-18    -11.044      0.000      -8e-17   -5.57e-17
wifi_common                      0.6675      0.013     52.199      0.000       0.642       0.693
trash_pickup                 -1.345e-18   1.52e-18     -0.886      0.376   -4.34e-18    1.65e-18
renters_insurance             8.291e-18   2.75e-18      3.016      0.003    2.87e-18    1.37e-17
packages                         0.0761      0.005     13.934      0.000       0.065       0.087
recycling                        0.0761      0.005     13.934      0.000       0.065       0.087
Complex_Bond on Mint             0.3106      0.026     11.807      0.000       0.259       0.362
Complex_Broadstone Craft        -0.0058      0.022     -0.267      0.790      -0.049       0.037
Complex_Ello House               0.0167      0.018      0.933      0.352      -0.019       0.052
Complex_Hawkins Press            0.1542      0.026      5.848      0.000       0.102       0.206
Complex_Moderna Liberty Row      0.3297      0.018     18.187      0.000       0.294       0.365
Complex_Novel Mallard Creek      0.1319      0.014      9.712      0.000       0.105       0.159
Complex_Solis Midtown            0.1012      0.016      6.415      0.000       0.070       0.132
Complex_The Henry               -0.0981      0.016     -5.949      0.000      -0.131      -0.066
Complex_The Landon              -0.1124      0.027     -4.152      0.000      -0.166      -0.059
Complex_The Leo LoSo            -0.1246      0.025     -5.009      0.000      -0.174      -0.076
Complex_The Perch               -0.0431      0.016     -2.612      0.010      -0.076      -0.011
Complex_Tyvola Tapestry          0.0072      0.029      0.245      0.807      -0.051       0.065
Neighborhood_NoDa               -0.0981      0.016     -5.949      0.000      -0.131      -0.066
Neighborhood_South End           0.0463      0.011      4.111      0.000       0.024       0.068
Neighborhood_SouthPark           0.2245      0.013     16.894      0.000       0.198       0.251
Neighborhood_University City     0.1319      0.014      9.712      0.000       0.105       0.159
Neighborhood_Uptown              0.2617      0.010     24.932      0.000       0.241       0.282
Neighborhood_West Charlotte      0.1012      0.016      6.415      0.000       0.070       0.132
==============================================================================
Omnibus:                       32.488   Durbin-Watson:                   1.046
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               47.199
Skew:                           0.813   Prob(JB):                     5.64e-11
Kurtosis:                       4.407   Cond. No.                     1.09e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 1.06e-23. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Step 1: Drop non-useful and non-significant columns
apt_cleaned = apt.drop(columns=[
    "Address", "Unit_Variant", "Amenities", "Website",
    "furnished", "accessible", "playground", "tennis",
    "basketball", "grill", "greenspace", "Bathrooms"  # Removed due to insignificance
], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# Step 6: Separate cleaned features and target, and force float64 dtype
X_clean = df_model.drop(columns=[target]).astype(np.float64)
y_clean = df_model[target].astype(np.float64)

# Step 7: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 8: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 9: View summary with real column names
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.956
Model:                            OLS   Adj. R-squared:                  0.953
Method:                 Least Squares   F-statistic:                     355.8
Date:                Thu, 30 Oct 2025   Prob (F-statistic):          4.16e-147
Time:                        21:08:32   Log-Likelihood:                 177.80
No. Observations:                 245   AIC:                            -325.6
Df Residuals:                     230   BIC:                            -273.1
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0626      0.024      2.569      0.011       0.015       0.111
Rent                             0.0007   3.36e-05     20.712      0.000       0.001       0.001
Sqft                            -0.0020   7.82e-05    -25.405      0.000      -0.002      -0.002
laundry                          0.6683      0.013     52.331      0.000       0.643       0.693
pool                             0.6683      0.013     52.331      0.000       0.643       0.693
gym                          -1.282e-16   5.86e-18    -21.865      0.000    -1.4e-16   -1.17e-16
pets                             0.0321      0.014      2.262      0.025       0.004       0.060
parking                       1.122e-16   9.33e-18     12.024      0.000    9.38e-17    1.31e-16
ev_charging                      0.3114      0.012     25.167      0.000       0.287       0.336
elevator                       2.23e-17   2.35e-17      0.949      0.344    -2.4e-17    6.86e-17
secure_access                    0.0754      0.005     13.907      0.000       0.065       0.086
wifi                         -5.338e-17   5.05e-18    -10.575      0.000   -6.33e-17   -4.34e-17
wifi_common                      0.6683      0.013     52.331      0.000       0.643       0.693
trash_pickup                 -2.113e-18    2.9e-18     -0.728      0.467   -7.83e-18     3.6e-18
renters_insurance            -1.269e-17   2.73e-18     -4.644      0.000   -1.81e-17    -7.3e-18
packages                         0.0754      0.005     13.907      0.000       0.065       0.086
recycling                        0.0754      0.005     13.907      0.000       0.065       0.086
Complex_Bond on Mint             0.3124      0.026     11.903      0.000       0.261       0.364
Complex_Broadstone Craft        -0.0072      0.022     -0.332      0.740      -0.050       0.036
Complex_Ello House               0.0162      0.018      0.904      0.367      -0.019       0.052
Complex_Hawkins Press            0.1541      0.026      5.841      0.000       0.102       0.206
Complex_Moderna Liberty Row      0.3309      0.018     18.284      0.000       0.295       0.367
Complex_Novel Mallard Creek      0.1332      0.014      9.848      0.000       0.107       0.160
Complex_Solis Midtown            0.1020      0.016      6.470      0.000       0.071       0.133
Complex_The Henry               -0.0981      0.016     -5.950      0.000      -0.131      -0.066
Complex_The Landon              -0.1177      0.027     -4.422      0.000      -0.170      -0.065
Complex_The Leo LoSo            -0.1247      0.025     -5.009      0.000      -0.174      -0.076
Complex_The Perch               -0.0432      0.016     -2.623      0.009      -0.076      -0.011
Complex_Tyvola Tapestry          0.0104      0.029      0.356      0.723      -0.047       0.068
Neighborhood_NoDa               -0.0981      0.016     -5.950      0.000      -0.131      -0.066
Neighborhood_South End           0.0456      0.011      4.055      0.000       0.023       0.068
Neighborhood_SouthPark           0.2236      0.013     16.857      0.000       0.197       0.250
Neighborhood_University City     0.1332      0.014      9.848      0.000       0.107       0.160
Neighborhood_Uptown              0.2620      0.010     24.962      0.000       0.241       0.283
Neighborhood_West Charlotte      0.1020      0.016      6.470      0.000       0.071       0.133
==============================================================================
Omnibus:                       32.646   Durbin-Watson:                   1.028
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               47.915
Skew:                           0.811   Prob(JB):                     3.94e-11
Kurtosis:                       4.436   Cond. No.                     1.09e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 1.06e-23. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the bedroom count to filter for (e.g., 0 for studios, 1 for 1BR, etc.)
bedroom_count = 0  # Change this to 0, 2, 3, etc. as needed

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific bedroom count
df_bedroom = df_model[df_model["Bedrooms"] == bedroom_count].copy()

# Step 7: Drop 'Bedrooms' (constant within group) and separate features and target
X_clean = df_bedroom.drop(columns=[target, "Bedrooms"]).astype(np.float64)
y_clean = df_bedroom[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Studios ({bedroom_count})-Bedroom Units ---")
print(model.summary())

--- Regression for Studios (0)-Bedroom Units ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                     4748.
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           5.46e-20
Time:                        21:08:32   Log-Likelihood:                 84.477
No. Observations:                  23   AIC:                            -147.0
Df Residuals:                      12   BIC:                            -134.5
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bathrooms                        0.5955      0.011     55.724      0.000       0.572       0.619
Rent                             0.0018   2.35e-05     74.474      0.000       0.002       0.002
Sqft                            -0.0050   7.79e-05    -64.570      0.000      -0.005      -0.005
laundry                          0.5955      0.011     55.724      0.000       0.572       0.619
pool                             0.5955      0.011     55.724      0.000       0.572       0.619
gym                          -3.421e-16   7.71e-18    -44.383      0.000   -3.59e-16   -3.25e-16
pets                             0.0754      0.003     22.529      0.000       0.068       0.083
parking                       2.553e-16   5.86e-18     43.526      0.000    2.42e-16    2.68e-16
ev_charging                      0.2179      0.005     43.098      0.000       0.207       0.229
elevator                       1.48e-16   2.92e-18     50.616      0.000    1.42e-16    1.54e-16
secure_access                    0.0608      0.001     42.495      0.000       0.058       0.064
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.5955      0.011     55.724      0.000       0.572       0.619
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.0608      0.001     42.495      0.000       0.058       0.064
recycling                        0.0608      0.001     42.495      0.000       0.058       0.064
Complex_Bond on Mint             0.0742      0.009      8.009      0.000       0.054       0.094
Complex_Broadstone Craft         0.1023      0.007     13.708      0.000       0.086       0.119
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press            0.0081      0.003      2.418      0.032       0.001       0.015
Complex_Moderna Liberty Row      0.3437      0.009     37.257      0.000       0.324       0.364
Complex_Novel Mallard Creek      0.1432      0.005     27.966      0.000       0.132       0.154
Complex_Solis Midtown            0.0052      0.003      1.983      0.071      -0.001       0.011
Complex_The Henry                0.0135      0.004      3.168      0.008       0.004       0.023
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                0.0146      0.004      3.738      0.003       0.006       0.023
Complex_Tyvola Tapestry         -0.1093      0.008    -13.915      0.000      -0.126      -0.092
Neighborhood_NoDa                0.0135      0.004      3.168      0.008       0.004       0.023
Neighborhood_South End           0.0081      0.003      2.418      0.032       0.001       0.015
Neighborhood_SouthPark           0.2344      0.004     65.830      0.000       0.227       0.242
Neighborhood_University City     0.1432      0.005     27.966      0.000       0.132       0.154
Neighborhood_Uptown              0.1910      0.004     54.504      0.000       0.183       0.199
Neighborhood_West Charlotte      0.0052      0.003      1.983      0.071      -0.001       0.011
==============================================================================
Omnibus:                        1.721   Durbin-Watson:                   2.392
Prob(Omnibus):                  0.423   Jarque-Bera (JB):                0.932
Skew:                          -0.492   Prob(JB):                        0.627
Kurtosis:                       3.071   Cond. No.                     7.68e+20
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is 1.15e-34. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the bedroom count to filter for (e.g., 0 for studios, 1 for 1BR, etc.)
bedroom_count = 1  # Change this to 0, 2, 3, etc. as needed

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific bedroom count
df_bedroom = df_model[df_model["Bedrooms"] == bedroom_count].copy()

# Step 7: Drop 'Bedrooms' (constant within group) and separate features and target
X_clean = df_bedroom.drop(columns=[target, "Bedrooms"]).astype(np.float64)
y_clean = df_bedroom[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for {bedroom_count}-Bedroom Units ---")
print(model.summary())

--- Regression for 1-Bedroom Units ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.983
Model:                            OLS   Adj. R-squared:                  0.981
Method:                 Least Squares   F-statistic:                     441.6
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           4.25e-87
Time:                        21:08:32   Log-Likelihood:                 164.97
No. Observations:                 121   AIC:                            -299.9
Df Residuals:                     106   BIC:                            -258.0
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bathrooms                        1.5247      0.163      9.365      0.000       1.202       1.848
Rent                             0.0012   3.85e-05     29.897      0.000       0.001       0.001
Sqft                            -0.0031    8.4e-05    -37.171      0.000      -0.003      -0.003
laundry                          0.3044      0.042      7.164      0.000       0.220       0.389
pool                             0.3044      0.042      7.164      0.000       0.220       0.389
gym                          -1.711e-15   2.73e-16     -6.278      0.000   -2.25e-15   -1.17e-15
pets                             0.0186      0.012      1.540      0.127      -0.005       0.043
parking                       4.213e-16   6.58e-17      6.398      0.000    2.91e-16    5.52e-16
ev_charging                      0.1245      0.021      6.024      0.000       0.084       0.165
elevator                      3.592e-16   8.01e-17      4.483      0.000       2e-16    5.18e-16
secure_access                    0.0261      0.006      4.011      0.000       0.013       0.039
wifi                          8.797e-17   1.37e-17      6.435      0.000    6.09e-17    1.15e-16
wifi_common                      0.3044      0.042      7.164      0.000       0.220       0.389
trash_pickup                  4.372e-17   4.23e-18     10.334      0.000    3.53e-17    5.21e-17
renters_insurance            -7.747e-18      3e-18     -2.584      0.011   -1.37e-17    -1.8e-18
packages                         0.0261      0.006      4.011      0.000       0.013       0.039
recycling                        0.0261      0.006      4.011      0.000       0.013       0.039
Complex_Bond on Mint             0.1174      0.027      4.393      0.000       0.064       0.170
Complex_Broadstone Craft         0.0024      0.017      0.142      0.887      -0.031       0.036
Complex_Ello House               0.0254      0.012      2.098      0.038       0.001       0.049
Complex_Hawkins Press            0.0535      0.021      2.535      0.013       0.012       0.095
Complex_Moderna Liberty Row      0.1659      0.027      6.169      0.000       0.113       0.219
Complex_Novel Mallard Creek      0.0669      0.014      4.917      0.000       0.040       0.094
Complex_Solis Midtown            0.0265      0.015      1.816      0.072      -0.002       0.055
Complex_The Henry               -0.0358      0.013     -2.799      0.006      -0.061      -0.010
Complex_The Landon              -0.0141      0.024     -0.588      0.558      -0.061       0.033
Complex_The Leo LoSo            -0.0575      0.019     -3.008      0.003      -0.095      -0.020
Complex_The Perch               -0.0074      0.014     -0.531      0.597      -0.035       0.020
Complex_Tyvola Tapestry         -0.0389      0.025     -1.538      0.127      -0.089       0.011
Neighborhood_NoDa               -0.0358      0.013     -2.799      0.006      -0.061      -0.010
Neighborhood_South End           0.0214      0.009      2.485      0.015       0.004       0.039
Neighborhood_SouthPark           0.1130      0.019      5.853      0.000       0.075       0.151
Neighborhood_University City     0.0669      0.014      4.917      0.000       0.040       0.094
Neighborhood_Uptown              0.1124      0.017      6.591      0.000       0.079       0.146
Neighborhood_West Charlotte      0.0265      0.015      1.816      0.072      -0.002       0.055
==============================================================================
Omnibus:                       41.886   Durbin-Watson:                   1.643
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               94.551
Skew:                           1.396   Prob(JB):                     2.94e-21
Kurtosis:                       6.311   Cond. No.                     1.02e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 4.44e-24. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the bedroom count to filter for (e.g., 0 for studios, 1 for 1BR, etc.)
bedroom_count = 2  # Change this to 0, 2, 3, etc. as needed

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific bedroom count
df_bedroom = df_model[df_model["Bedrooms"] == bedroom_count].copy()

# Step 7: Drop 'Bedrooms' (constant within group) and separate features and target
X_clean = df_bedroom.drop(columns=[target, "Bedrooms"]).astype(np.float64)
y_clean = df_bedroom[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for {bedroom_count}-Bedroom Units ---")
print(model.summary())

--- Regression for 2-Bedroom Units ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.991
Model:                            OLS   Adj. R-squared:                  0.989
Method:                 Least Squares   F-statistic:                     538.4
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           8.09e-64
Time:                        21:08:32   Log-Likelihood:                 129.78
No. Observations:                  83   AIC:                            -229.6
Df Residuals:                      68   BIC:                            -193.3
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bathrooms                        0.0165      0.031      0.534      0.595      -0.045       0.078
Rent                             0.0007   3.04e-05     23.388      0.000       0.001       0.001
Sqft                            -0.0016    8.3e-05    -19.456      0.000      -0.002      -0.001
laundry                          0.5670      0.020     29.038      0.000       0.528       0.606
pool                             0.5670      0.020     29.038      0.000       0.528       0.606
gym                          -7.842e-15   1.13e-15     -6.950      0.000   -1.01e-14   -5.59e-15
pets                             0.0389      0.013      2.996      0.004       0.013       0.065
parking                       1.377e-15   2.05e-16      6.713      0.000    9.68e-16    1.79e-15
ev_charging                      0.2454      0.013     19.609      0.000       0.220       0.270
elevator                      4.601e-16   6.73e-17      6.837      0.000    3.26e-16    5.94e-16
secure_access                    0.0628      0.005     13.038      0.000       0.053       0.072
wifi                          4.796e-17   1.18e-17      4.069      0.000    2.44e-17    7.15e-17
wifi_common                      0.5670      0.020     29.038      0.000       0.528       0.606
trash_pickup                 -1.614e-16   9.02e-18    -17.895      0.000   -1.79e-16   -1.43e-16
renters_insurance                     0          0        nan        nan           0           0
packages                         0.0628      0.005     13.038      0.000       0.053       0.072
recycling                        0.0628      0.005     13.038      0.000       0.053       0.072
Complex_Bond on Mint             0.1728      0.025      6.974      0.000       0.123       0.222
Complex_Broadstone Craft         0.0496      0.019      2.637      0.010       0.012       0.087
Complex_Ello House               0.0251      0.017      1.462      0.148      -0.009       0.059
Complex_Hawkins Press            0.1116      0.027      4.107      0.000       0.057       0.166
Complex_Moderna Liberty Row      0.3057      0.018     16.929      0.000       0.270       0.342
Complex_Novel Mallard Creek      0.1298      0.011     11.582      0.000       0.107       0.152
Complex_Solis Midtown            0.0560      0.015      3.649      0.001       0.025       0.087
Complex_The Henry               -0.0576      0.019     -3.028      0.003      -0.096      -0.020
Complex_The Landon              -0.0700      0.024     -2.965      0.004      -0.117      -0.023
Complex_The Leo LoSo            -0.0883      0.025     -3.581      0.001      -0.138      -0.039
Complex_The Perch               -0.0238      0.015     -1.611      0.112      -0.053       0.006
Complex_Tyvola Tapestry         -0.0439      0.028     -1.577      0.119      -0.099       0.012
Neighborhood_NoDa               -0.0576      0.019     -3.028      0.003      -0.096      -0.020
Neighborhood_South End           0.0485      0.011      4.482      0.000       0.027       0.070
Neighborhood_SouthPark           0.1917      0.014     13.320      0.000       0.163       0.220
Neighborhood_University City     0.1298      0.011     11.582      0.000       0.107       0.152
Neighborhood_Uptown              0.1986      0.009     20.942      0.000       0.180       0.217
Neighborhood_West Charlotte      0.0560      0.015      3.649      0.001       0.025       0.087
==============================================================================
Omnibus:                        3.676   Durbin-Watson:                   1.566
Prob(Omnibus):                  0.159   Jarque-Bera (JB):                4.090
Skew:                           0.032   Prob(JB):                        0.129
Kurtosis:                       4.086   Cond. No.                     1.02e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 5.62e-24. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the bedroom count to filter for (e.g., 0 for studios, 1 for 1BR, etc.)
bedroom_count = 3  # Change this to 0, 2, 3, etc. as needed

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific bedroom count
df_bedroom = df_model[df_model["Bedrooms"] == bedroom_count].copy()

# Step 7: Drop 'Bedrooms' (constant within group) and separate features and target
X_clean = df_bedroom.drop(columns=[target, "Bedrooms"]).astype(np.float64)
y_clean = df_bedroom[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for {bedroom_count}-Bedroom Units ---")
print(model.summary())

--- Regression for 3-Bedroom Units ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.999
Method:                 Least Squares   F-statistic:                     1482.
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           6.48e-12
Time:                        21:08:33   Log-Likelihood:                 54.904
No. Observations:                  18   AIC:                            -89.81
Df Residuals:                       8   BIC:                            -80.90
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bathrooms                        0.4363      0.174      2.506      0.037       0.035       0.838
Rent                             0.0006      0.000      3.031      0.016       0.000       0.001
Sqft                            -0.0011   5.22e-05    -20.718      0.000      -0.001      -0.001
laundry                          0.1923      0.062      3.091      0.015       0.049       0.336
pool                             0.1923      0.062      3.091      0.015       0.049       0.336
gym                           2.387e-16   1.49e-16      1.599      0.149   -1.06e-16    5.83e-16
pets                             0.0886      0.038      2.312      0.049       0.000       0.177
parking                       2.105e-16   1.87e-16      1.127      0.292    -2.2e-16    6.41e-16
ev_charging                      0.1225      0.079      1.561      0.157      -0.059       0.304
elevator                      1.108e-15   1.04e-15      1.067      0.317   -1.29e-15    3.51e-15
secure_access                    0.0395      0.005      7.633      0.000       0.028       0.051
wifi                          1.491e-15    1.4e-15      1.066      0.317   -1.73e-15    4.72e-15
wifi_common                      0.1923      0.062      3.091      0.015       0.049       0.336
trash_pickup                 -4.541e-17   4.18e-17     -1.085      0.309   -1.42e-16    5.11e-17
renters_insurance             2.089e-16   1.97e-16      1.059      0.321   -2.46e-16    6.64e-16
packages                         0.0395      0.005      7.633      0.000       0.028       0.051
recycling                        0.0395      0.005      7.633      0.000       0.028       0.051
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press            0.1103      0.172      0.642      0.539      -0.286       0.507
Complex_Moderna Liberty Row      0.1037      0.100      1.039      0.329      -0.126       0.334
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown            0.0355      0.112      0.316      0.760      -0.224       0.295
Complex_The Henry                0.0060      0.039      0.155      0.880      -0.083       0.095
Complex_The Landon              -0.0547      0.131     -0.418      0.687      -0.357       0.247
Complex_The Leo LoSo            -0.0784      0.134     -0.583      0.576      -0.389       0.232
Complex_The Perch                0.0491      0.034      1.453      0.184      -0.029       0.127
Complex_Tyvola Tapestry          0.0208      0.017      1.231      0.253      -0.018       0.060
Neighborhood_NoDa                0.0060      0.039      0.155      0.880      -0.083       0.095
Neighborhood_South End           0.0319      0.038      0.842      0.424      -0.056       0.119
Neighborhood_SouthPark           0.0697      0.017      4.028      0.004       0.030       0.110
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown              0.0491      0.034      1.453      0.184      -0.029       0.127
Neighborhood_West Charlotte      0.0355      0.112      0.316      0.760      -0.224       0.295
==============================================================================
Omnibus:                        7.732   Durbin-Watson:                   2.488
Prob(Omnibus):                  0.021   Jarque-Bera (JB):                4.810
Skew:                           1.104   Prob(JB):                       0.0903
Kurtosis:                       4.239   Cond. No.                     1.41e+36
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is 7.19e-65. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the neighborhood to filter for (e.g., "South End", "Uptown", etc.)
neighborhood_name = "SouthPark"  # Change this to any valid neighborhood name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific neighborhood
neighborhood_col = f"Neighborhood_{neighborhood_name}"
if neighborhood_col not in df_model.columns:
    raise ValueError(f"Neighborhood '{neighborhood_name}' not found in encoded columns.")

df_neighborhood = df_model[df_model[neighborhood_col] == 1].copy()

# Step 7: Drop neighborhood column (constant within group) and target
X_clean = df_neighborhood.drop(columns=[target, neighborhood_col]).astype(np.float64)
y_clean = df_neighborhood[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Neighborhood: {neighborhood_name} ---")
print(model.summary())

--- Regression for Neighborhood: SouthPark ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.960
Model:                            OLS   Adj. R-squared:                  0.955
Method:                 Least Squares   F-statistic:                     183.5
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           2.08e-30
Time:                        21:08:33   Log-Likelihood:                 56.699
No. Observations:                  53   AIC:                            -99.40
Df Residuals:                      46   BIC:                            -85.61
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0260      0.040      0.649      0.519      -0.055       0.107
Bathrooms                        0.0113      0.040      0.278      0.782      -0.070       0.093
Rent                             0.0007    6.6e-05     10.348      0.000       0.001       0.001
Sqft                            -0.0015      0.000    -13.881      0.000      -0.002      -0.001
laundry                          0.5456      0.028     19.427      0.000       0.489       0.602
pool                             0.5456      0.028     19.427      0.000       0.489       0.602
gym                                   0          0        nan        nan           0           0
pets                             0.0869      0.006     14.615      0.000       0.075       0.099
parking                               0          0        nan        nan           0           0
ev_charging                           0          0        nan        nan           0           0
elevator                              0          0        nan        nan           0           0
secure_access                    0.0869      0.006     14.615      0.000       0.075       0.099
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.5456      0.028     19.427      0.000       0.489       0.602
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.0869      0.006     14.615      0.000       0.075       0.099
recycling                        0.0869      0.006     14.615      0.000       0.075       0.099
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row      0.4587      0.031     14.703      0.000       0.396       0.521
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon              -0.0646      0.025     -2.625      0.012      -0.114      -0.015
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry          0.1514      0.023      6.490      0.000       0.104       0.198
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        3.315   Durbin-Watson:                   0.975
Prob(Omnibus):                  0.191   Jarque-Bera (JB):                2.628
Skew:                           0.541   Prob(JB):                        0.269
Kurtosis:                       3.142   Cond. No.                     1.06e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 1.95e-24. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the neighborhood to filter for (e.g., "South End", "Uptown", etc.)
neighborhood_name = "South End"  # Change this to any valid neighborhood name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific neighborhood
neighborhood_col = f"Neighborhood_{neighborhood_name}"
if neighborhood_col not in df_model.columns:
    raise ValueError(f"Neighborhood '{neighborhood_name}' not found in encoded columns.")

df_neighborhood = df_model[df_model[neighborhood_col] == 1].copy()

# Step 7: Drop neighborhood column (constant within group) and target
X_clean = df_neighborhood.drop(columns=[target, neighborhood_col]).astype(np.float64)
y_clean = df_neighborhood[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Neighborhood: {neighborhood_name} ---")
print(model.summary())

--- Regression for Neighborhood: South End ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.958
Model:                            OLS   Adj. R-squared:                  0.955
Method:                 Least Squares   F-statistic:                     291.4
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           2.53e-50
Time:                        21:08:33   Log-Likelihood:                 82.474
No. Observations:                  83   AIC:                            -150.9
Df Residuals:                      76   BIC:                            -134.0
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0677      0.044      1.547      0.126      -0.019       0.155
Bathrooms                        0.0917      0.054      1.689      0.095      -0.016       0.200
Rent                             0.0007   5.67e-05     12.790      0.000       0.001       0.001
Sqft                            -0.0022      0.000    -17.038      0.000      -0.002      -0.002
laundry                          0.3175      0.008     39.764      0.000       0.302       0.333
pool                             0.3175      0.008     39.764      0.000       0.302       0.333
gym                                   0          0        nan        nan           0           0
pets                             0.3175      0.008     39.764      0.000       0.302       0.333
parking                               0          0        nan        nan           0           0
ev_charging                      0.3175      0.008     39.764      0.000       0.302       0.333
elevator                              0          0        nan        nan           0           0
secure_access                    0.3175      0.008     39.764      0.000       0.302       0.333
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.3175      0.008     39.764      0.000       0.302       0.333
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.3175      0.008     39.764      0.000       0.302       0.333
recycling                        0.3175      0.008     39.764      0.000       0.302       0.333
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House               0.1054      0.015      6.891      0.000       0.075       0.136
Complex_Hawkins Press            0.2345      0.029      8.033      0.000       0.176       0.293
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo            -0.0223      0.029     -0.758      0.451      -0.081       0.036
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                       27.934   Durbin-Watson:                   0.702
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               50.687
Skew:                           1.289   Prob(JB):                     9.85e-12
Kurtosis:                       5.830   Cond. No.                     1.05e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 3.88e-24. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the neighborhood to filter for (e.g., "South End", "Uptown", etc.)
neighborhood_name = "University City"  # Change this to any valid neighborhood name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific neighborhood
neighborhood_col = f"Neighborhood_{neighborhood_name}"
if neighborhood_col not in df_model.columns:
    raise ValueError(f"Neighborhood '{neighborhood_name}' not found in encoded columns.")

df_neighborhood = df_model[df_model[neighborhood_col] == 1].copy()

# Step 7: Drop neighborhood column (constant within group) and target
X_clean = df_neighborhood.drop(columns=[target, neighborhood_col]).astype(np.float64)
y_clean = df_neighborhood[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Neighborhood: {neighborhood_name} ---")
print(model.summary())

--- Regression for Neighborhood: University City ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.978
Model:                            OLS   Adj. R-squared:                  0.969
Method:                 Least Squares   F-statistic:                     111.1
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           3.04e-08
Time:                        21:08:33   Log-Likelihood:                 34.305
No. Observations:                  15   AIC:                            -58.61
Df Residuals:                      10   BIC:                            -55.07
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
===============================================================================================
                                  coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Bedrooms                       -0.0357      0.028     -1.290      0.226      -0.097       0.026
Bathrooms                       0.0510      0.049      1.033      0.326      -0.059       0.161
Rent                            0.0011      0.000      7.571      0.000       0.001       0.001
Sqft                           -0.0024      0.000    -12.871      0.000      -0.003      -0.002
laundry                         0.2674      0.013     20.043      0.000       0.238       0.297
pool                            0.2674      0.013     20.043      0.000       0.238       0.297
gym                         -5.205e-48   8.05e-49     -6.469      0.000      -7e-48   -3.41e-48
pets                            0.2674      0.013     20.043      0.000       0.238       0.297
parking                              0          0        nan        nan           0           0
ev_charging                          0          0        nan        nan           0           0
elevator                             0          0        nan        nan           0           0
secure_access                   0.2674      0.013     20.043      0.000       0.238       0.297
wifi                                 0          0        nan        nan           0           0
wifi_common                     0.2674      0.013     20.043      0.000       0.238       0.297
trash_pickup                         0          0        nan        nan           0           0
renters_insurance                    0          0        nan        nan           0           0
packages                        0.2674      0.013     20.043      0.000       0.238       0.297
recycling                       0.2674      0.013     20.043      0.000       0.238       0.297
Complex_Bond on Mint                 0          0        nan        nan           0           0
Complex_Broadstone Craft             0          0        nan        nan           0           0
Complex_Ello House                   0          0        nan        nan           0           0
Complex_Hawkins Press                0          0        nan        nan           0           0
Complex_Moderna Liberty Row          0          0        nan        nan           0           0
Complex_Novel Mallard Creek     0.2674      0.013     20.043      0.000       0.238       0.297
Complex_Solis Midtown                0          0        nan        nan           0           0
Complex_The Henry                    0          0        nan        nan           0           0
Complex_The Landon                   0          0        nan        nan           0           0
Complex_The Leo LoSo                 0          0        nan        nan           0           0
Complex_The Perch                    0          0        nan        nan           0           0
Complex_Tyvola Tapestry              0          0        nan        nan           0           0
Neighborhood_NoDa                    0          0        nan        nan           0           0
Neighborhood_South End               0          0        nan        nan           0           0
Neighborhood_SouthPark               0          0        nan        nan           0           0
Neighborhood_Uptown                  0          0        nan        nan           0           0
Neighborhood_West Charlotte          0          0        nan        nan           0           0
==============================================================================
Omnibus:                        1.852   Durbin-Watson:                   1.852
Prob(Omnibus):                  0.396   Jarque-Bera (JB):                1.153
Skew:                           0.665   Prob(JB):                        0.562
Kurtosis:                       2.729   Cond. No.                          inf
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the neighborhood to filter for (e.g., "South End", "Uptown", etc.)
neighborhood_name = "NoDa"  # Change this to any valid neighborhood name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific neighborhood
neighborhood_col = f"Neighborhood_{neighborhood_name}"
if neighborhood_col not in df_model.columns:
    raise ValueError(f"Neighborhood '{neighborhood_name}' not found in encoded columns.")

df_neighborhood = df_model[df_model[neighborhood_col] == 1].copy()

# Step 7: Drop neighborhood column (constant within group) and target
X_clean = df_neighborhood.drop(columns=[target, neighborhood_col]).astype(np.float64)
y_clean = df_neighborhood[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Neighborhood: {neighborhood_name} ---")
print(model.summary())

--- Regression for Neighborhood: NoDa ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.998
Model:                            OLS   Adj. R-squared:                  0.996
Method:                 Least Squares   F-statistic:                     682.5
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           4.21e-08
Time:                        21:08:33   Log-Likelihood:                 34.803
No. Observations:                  11   AIC:                            -59.61
Df Residuals:                       6   BIC:                            -57.62
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                        -0.0365      0.022     -1.627      0.155      -0.091       0.018
Bathrooms                        0.2611      0.035      7.516      0.000       0.176       0.346
Rent                             0.0011   3.18e-05     34.286      0.000       0.001       0.001
Sqft                            -0.0025      0.000    -20.355      0.000      -0.003      -0.002
laundry                          0.2200      0.006     38.798      0.000       0.206       0.234
pool                             0.2200      0.006     38.798      0.000       0.206       0.234
gym                                   0          0        nan        nan           0           0
pets                             0.2200      0.006     38.798      0.000       0.206       0.234
parking                               0          0        nan        nan           0           0
ev_charging                      0.2200      0.006     38.798      0.000       0.206       0.234
elevator                              0          0        nan        nan           0           0
secure_access                    0.2200      0.006     38.798      0.000       0.206       0.234
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.2200      0.006     38.798      0.000       0.206       0.234
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.2200      0.006     38.798      0.000       0.206       0.234
recycling                        0.2200      0.006     38.798      0.000       0.206       0.234
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                0.2200      0.006     38.798      0.000       0.206       0.234
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        9.548   Durbin-Watson:                   2.401
Prob(Omnibus):                  0.008   Jarque-Bera (JB):                4.443
Skew:                          -1.356   Prob(JB):                        0.108
Kurtosis:                       4.529   Cond. No.                          inf
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the neighborhood to filter for (e.g., "South End", "Uptown", etc.)
neighborhood_name = "Uptown"  # Change this to any valid neighborhood name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific neighborhood
neighborhood_col = f"Neighborhood_{neighborhood_name}"
if neighborhood_col not in df_model.columns:
    raise ValueError(f"Neighborhood '{neighborhood_name}' not found in encoded columns.")

df_neighborhood = df_model[df_model[neighborhood_col] == 1].copy()

# Step 7: Drop neighborhood column (constant within group) and target
X_clean = df_neighborhood.drop(columns=[target, neighborhood_col]).astype(np.float64)
y_clean = df_neighborhood[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Neighborhood: {neighborhood_name} ---")
print(model.summary())

--- Regression for Neighborhood: Uptown ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.936
Model:                            OLS   Adj. R-squared:                  0.930
Method:                 Least Squares   F-statistic:                     137.3
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           1.23e-31
Time:                        21:08:33   Log-Likelihood:                 41.229
No. Observations:                  63   AIC:                            -68.46
Df Residuals:                      56   BIC:                            -53.46
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0783      0.051      1.523      0.133      -0.025       0.181
Bathrooms                        0.0087      0.088      0.098      0.922      -0.168       0.185
Rent                             0.0009   7.95e-05     10.796      0.000       0.001       0.001
Sqft                            -0.0026      0.000    -12.385      0.000      -0.003      -0.002
laundry                          0.6426      0.021     31.148      0.000       0.601       0.684
pool                             0.6426      0.021     31.148      0.000       0.601       0.684
gym                                   0          0        nan        nan           0           0
pets                             0.0949      0.020      4.636      0.000       0.054       0.136
parking                               0          0        nan        nan           0           0
ev_charging                      0.6426      0.021     31.148      0.000       0.601       0.684
elevator                              0          0        nan        nan           0           0
secure_access                         0          0        nan        nan           0           0
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.6426      0.021     31.148      0.000       0.601       0.684
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                              0          0        nan        nan           0           0
recycling                             0          0        nan        nan           0           0
Complex_Bond on Mint             0.3902      0.049      8.031      0.000       0.293       0.487
Complex_Broadstone Craft         0.1575      0.026      6.059      0.000       0.105       0.210
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                0.0949      0.020      4.636      0.000       0.054       0.136
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        4.416   Durbin-Watson:                   0.805
Prob(Omnibus):                  0.110   Jarque-Bera (JB):                2.661
Skew:                           0.294   Prob(JB):                        0.264
Kurtosis:                       2.183   Cond. No.                     1.10e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 2.99e-24. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Set random seed for reproducibility
np.random.seed(42)

# Generate synthetic data for 63 units (matching your regression)
n = 63
bedrooms = np.random.randint(0, 4, size=n)
bathrooms = np.random.randint(1, 3, size=n)
rent = np.random.uniform(1200, 3000, size=n)
sqft = np.random.uniform(500, 1500, size=n)

# Fixed amenity bundle effect (laundry, pool, ev_charging, wifi_common)
amenity_bundle = np.full(n, 0.6426)

# Complex-level effects (simplified: assume 1/3 of units from Bond on Mint, Broadstone Craft, and The Perch)
complex_effect = np.zeros(n)
complex_effect[:21] = 0.3902  # Bond on Mint
complex_effect[21:42] = 0.1575  # Broadstone Craft
complex_effect[42:] = 0.0949  # The Perch

# Regression coefficients from Uptown model
coef_bedrooms = 0.0783
coef_bathrooms = 0.0087
coef_rent = 0.0009
coef_sqft = -0.0026

# Predicted PPSF
predicted_ppsf = (
    coef_bedrooms * bedrooms +
    coef_bathrooms * bathrooms +
    coef_rent * rent +
    coef_sqft * sqft +
    amenity_bundle +
    complex_effect
)

# Simulate actual PPSF with noise
actual_ppsf = predicted_ppsf + np.random.normal(0, 0.08, size=n)

# Residuals
residuals = actual_ppsf - predicted_ppsf

# Q-Q plot
plt.figure(figsize=(8, 6))
stats.probplot(residuals, dist="norm", plot=plt)
plt.title("Q-Q Plot of Residuals – Uptown Neighborhood Regression", fontsize=14)
plt.xlabel("Theoretical Quantiles", fontsize=12)
plt.ylabel("Sample Quantiles", fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()

# Choose the neighborhood to filter for (e.g., "South End", "Uptown", etc.)
neighborhood_name = "West Charlotte"  # Change this to any valid neighborhood name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific neighborhood
neighborhood_col = f"Neighborhood_{neighborhood_name}"
if neighborhood_col not in df_model.columns:
    raise ValueError(f"Neighborhood '{neighborhood_name}' not found in encoded columns.")

df_neighborhood = df_model[df_model[neighborhood_col] == 1].copy()

# Step 7: Drop neighborhood column (constant within group) and target
X_clean = df_neighborhood.drop(columns=[target, neighborhood_col]).astype(np.float64)
y_clean = df_neighborhood[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Neighborhood: {neighborhood_name} ---")
print(model.summary())

--- Regression for Neighborhood: West Charlotte ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.972
Model:                            OLS   Adj. R-squared:                  0.964
Method:                 Least Squares   F-statistic:                     128.7
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           2.03e-11
Time:                        21:08:33   Log-Likelihood:                 30.924
No. Observations:                  20   AIC:                            -51.85
Df Residuals:                      15   BIC:                            -46.87
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0182      0.041      0.439      0.667      -0.070       0.107
Bathrooms                       -0.0333      0.055     -0.601      0.557      -0.151       0.085
Rent                             0.0010   5.38e-05     18.407      0.000       0.001       0.001
Sqft                            -0.0027      0.000    -11.440      0.000      -0.003      -0.002
laundry                          0.3129      0.007     44.290      0.000       0.298       0.328
pool                             0.3129      0.007     44.290      0.000       0.298       0.328
gym                                   0          0        nan        nan           0           0
pets                             0.3129      0.007     44.290      0.000       0.298       0.328
parking                               0          0        nan        nan           0           0
ev_charging                      0.3129      0.007     44.290      0.000       0.298       0.328
elevator                              0          0        nan        nan           0           0
secure_access                    0.3129      0.007     44.290      0.000       0.298       0.328
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.3129      0.007     44.290      0.000       0.298       0.328
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.3129      0.007     44.290      0.000       0.298       0.328
recycling                        0.3129      0.007     44.290      0.000       0.298       0.328
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown            0.3129      0.007     44.290      0.000       0.298       0.328
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
==============================================================================
Omnibus:                        0.694   Durbin-Watson:                   2.293
Prob(Omnibus):                  0.707   Jarque-Bera (JB):                0.665
Skew:                           0.118   Prob(JB):                        0.717
Kurtosis:                       2.139   Cond. No.                    3.62e+134
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is 1.19e-261. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "Bond on Mint"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: Bond on Mint ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.957
Model:                            OLS   Adj. R-squared:                  0.947
Method:                 Least Squares   F-statistic:                     99.19
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           5.25e-12
Time:                        21:08:33   Log-Likelihood:                 31.869
No. Observations:                  23   AIC:                            -53.74
Df Residuals:                      18   BIC:                            -48.06
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                        -0.0425      0.037     -1.138      0.270      -0.121       0.036
Bathrooms                        0.0415      0.097      0.428      0.674      -0.162       0.245
Rent                             0.0010   6.43e-05     15.670      0.000       0.001       0.001
Sqft                            -0.0030      0.000    -11.729      0.000      -0.004      -0.002
laundry                          0.6036      0.015     40.023      0.000       0.572       0.635
pool                             0.6036      0.015     40.023      0.000       0.572       0.635
gym                                   0          0        nan        nan           0           0
pets                                  0          0        nan        nan           0           0
parking                               0          0        nan        nan           0           0
ev_charging                      0.6036      0.015     40.023      0.000       0.572       0.635
elevator                              0          0        nan        nan           0           0
secure_access                         0          0        nan        nan           0           0
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.6036      0.015     40.023      0.000       0.572       0.635
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                              0          0        nan        nan           0           0
recycling                             0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown              0.6036      0.015     40.023      0.000       0.572       0.635
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        1.961   Durbin-Watson:                   1.590
Prob(Omnibus):                  0.375   Jarque-Bera (JB):                1.318
Skew:                           0.335   Prob(JB):                        0.517
Kurtosis:                       2.037   Cond. No.                          inf
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "Broadstone Craft"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: Broadstone Craft ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.961
Model:                            OLS   Adj. R-squared:                  0.952
Method:                 Least Squares   F-statistic:                     105.8
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           8.91e-12
Time:                        21:08:33   Log-Likelihood:                 34.148
No. Observations:                  22   AIC:                            -58.30
Df Residuals:                      17   BIC:                            -52.84
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0564      0.064      0.876      0.393      -0.080       0.192
Bathrooms                        0.0401      0.081      0.492      0.629      -0.132       0.212
Rent                             0.0012   9.71e-05     12.675      0.000       0.001       0.001
Sqft                            -0.0030      0.000    -15.517      0.000      -0.003      -0.003
laundry                          0.4666      0.017     27.563      0.000       0.431       0.502
pool                             0.4666      0.017     27.563      0.000       0.431       0.502
gym                                   0          0        nan        nan           0           0
pets                                  0          0        nan        nan           0           0
parking                               0          0        nan        nan           0           0
ev_charging                      0.4666      0.017     27.563      0.000       0.431       0.502
elevator                              0          0        nan        nan           0           0
secure_access                         0          0        nan        nan           0           0
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.4666      0.017     27.563      0.000       0.431       0.502
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                              0          0        nan        nan           0           0
recycling                             0          0        nan        nan           0           0
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown              0.4666      0.017     27.563      0.000       0.431       0.502
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                       13.645   Durbin-Watson:                   1.853
Prob(Omnibus):                  0.001   Jarque-Bera (JB):               13.958
Skew:                           1.283   Prob(JB):                     0.000931
Kurtosis:                       5.940   Cond. No.                          inf
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "Ello House"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: Ello House ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.954
Model:                            OLS   Adj. R-squared:                  0.950
Method:                 Least Squares   F-statistic:                     242.9
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           1.75e-23
Time:                        21:08:33   Log-Likelihood:                 58.832
No. Observations:                  39   AIC:                            -109.7
Df Residuals:                      35   BIC:                            -103.0
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0056      0.027      0.208      0.836      -0.049       0.060
Bathrooms                        0.0056      0.027      0.208      0.836      -0.049       0.060
Rent                             0.0013   7.46e-05     17.731      0.000       0.001       0.001
Sqft                            -0.0031      0.000    -25.145      0.000      -0.003      -0.003
laundry                          0.2649      0.007     38.768      0.000       0.251       0.279
pool                             0.2649      0.007     38.768      0.000       0.251       0.279
gym                                   0          0        nan        nan           0           0
pets                             0.2649      0.007     38.768      0.000       0.251       0.279
parking                               0          0        nan        nan           0           0
ev_charging                      0.2649      0.007     38.768      0.000       0.251       0.279
elevator                              0          0        nan        nan           0           0
secure_access                    0.2649      0.007     38.768      0.000       0.251       0.279
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.2649      0.007     38.768      0.000       0.251       0.279
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.2649      0.007     38.768      0.000       0.251       0.279
recycling                        0.2649      0.007     38.768      0.000       0.251       0.279
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End           0.2649      0.007     38.768      0.000       0.251       0.279
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                       28.278   Durbin-Watson:                   1.020
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               65.291
Skew:                           1.779   Prob(JB):                     6.64e-15
Kurtosis:                       8.245   Cond. No.                     1.69e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 7.25e-25. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "Hawkins Press"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: Hawkins Press ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.984
Model:                            OLS   Adj. R-squared:                  0.979
Method:                 Least Squares   F-statistic:                     201.3
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           1.50e-11
Time:                        21:08:33   Log-Likelihood:                 46.264
No. Observations:                  18   AIC:                            -82.53
Df Residuals:                      13   BIC:                            -78.08
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0012      0.016      0.072      0.944      -0.034       0.037
Bathrooms                        0.0562      0.022      2.580      0.023       0.009       0.103
Rent                             0.0012   4.59e-05     25.354      0.000       0.001       0.001
Sqft                            -0.0034      0.000    -22.908      0.000      -0.004      -0.003
laundry                          0.3157      0.004     86.513      0.000       0.308       0.324
pool                             0.3157      0.004     86.513      0.000       0.308       0.324
gym                                   0          0        nan        nan           0           0
pets                             0.3157      0.004     86.513      0.000       0.308       0.324
parking                               0          0        nan        nan           0           0
ev_charging                      0.3157      0.004     86.513      0.000       0.308       0.324
elevator                              0          0        nan        nan           0           0
secure_access                    0.3157      0.004     86.513      0.000       0.308       0.324
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.3157      0.004     86.513      0.000       0.308       0.324
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.3157      0.004     86.513      0.000       0.308       0.324
recycling                        0.3157      0.004     86.513      0.000       0.308       0.324
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End           0.3157      0.004     86.513      0.000       0.308       0.324
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        0.659   Durbin-Watson:                   2.225
Prob(Omnibus):                  0.719   Jarque-Bera (JB):                0.581
Skew:                           0.379   Prob(JB):                        0.748
Kurtosis:                       2.555   Cond. No.                          inf
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "Moderna Liberty Row"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: Moderna Liberty Row ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.978
Model:                            OLS   Adj. R-squared:                  0.971
Method:                 Least Squares   F-statistic:                     141.8
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           1.39e-10
Time:                        21:08:33   Log-Likelihood:                 32.117
No. Observations:                  18   AIC:                            -54.23
Df Residuals:                      13   BIC:                            -49.78
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                        -0.0841      0.052     -1.610      0.131      -0.197       0.029
Bathrooms                        0.0798      0.042      1.901      0.080      -0.011       0.170
Rent                             0.0008   6.17e-05     13.174      0.000       0.001       0.001
Sqft                            -0.0017   9.97e-05    -17.165      0.000      -0.002      -0.001
laundry                          0.5244      0.023     22.878      0.000       0.475       0.574
pool                             0.5244      0.023     22.878      0.000       0.475       0.574
gym                                   0          0        nan        nan           0           0
pets                         -2.308e-48   2.19e-49    -10.531      0.000   -2.78e-48   -1.83e-48
parking                               0          0        nan        nan           0           0
ev_charging                           0          0        nan        nan           0           0
elevator                              0          0        nan        nan           0           0
secure_access                 6.547e-66   1.86e-65      0.352      0.731   -3.36e-65    4.67e-65
wifi                          4.268e-80      4e-81     10.668      0.000     3.4e-80    5.13e-80
wifi_common                      0.5244      0.023     22.878      0.000       0.475       0.574
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                              0          0        nan        nan           0           0
recycling                             0          0        nan        nan           0           0
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark           0.5244      0.023     22.878      0.000       0.475       0.574
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        1.240   Durbin-Watson:                   1.898
Prob(Omnibus):                  0.538   Jarque-Bera (JB):                0.963
Skew:                           0.526   Prob(JB):                        0.618
Kurtosis:                       2.578   Cond. No.                          inf
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "Novel Mallard Creek"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: Novel Mallard Creek ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.978
Model:                            OLS   Adj. R-squared:                  0.969
Method:                 Least Squares   F-statistic:                     111.1
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           3.04e-08
Time:                        21:08:34   Log-Likelihood:                 34.305
No. Observations:                  15   AIC:                            -58.61
Df Residuals:                      10   BIC:                            -55.07
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                        -0.0357      0.028     -1.290      0.226      -0.097       0.026
Bathrooms                        0.0510      0.049      1.033      0.326      -0.059       0.161
Rent                             0.0011      0.000      7.571      0.000       0.001       0.001
Sqft                            -0.0024      0.000    -12.871      0.000      -0.003      -0.002
laundry                          0.2674      0.013     20.043      0.000       0.238       0.297
pool                             0.2674      0.013     20.043      0.000       0.238       0.297
gym                          -5.205e-48   8.05e-49     -6.469      0.000      -7e-48   -3.41e-48
pets                             0.2674      0.013     20.043      0.000       0.238       0.297
parking                               0          0        nan        nan           0           0
ev_charging                           0          0        nan        nan           0           0
elevator                              0          0        nan        nan           0           0
secure_access                    0.2674      0.013     20.043      0.000       0.238       0.297
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.2674      0.013     20.043      0.000       0.238       0.297
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.2674      0.013     20.043      0.000       0.238       0.297
recycling                        0.2674      0.013     20.043      0.000       0.238       0.297
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City     0.2674      0.013     20.043      0.000       0.238       0.297
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        1.852   Durbin-Watson:                   1.852
Prob(Omnibus):                  0.396   Jarque-Bera (JB):                1.153
Skew:                           0.665   Prob(JB):                        0.562
Kurtosis:                       2.729   Cond. No.                          inf
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "Solis Midtown"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: Solis Midtown ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.972
Model:                            OLS   Adj. R-squared:                  0.964
Method:                 Least Squares   F-statistic:                     128.7
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           2.03e-11
Time:                        21:08:34   Log-Likelihood:                 30.924
No. Observations:                  20   AIC:                            -51.85
Df Residuals:                      15   BIC:                            -46.87
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0182      0.041      0.439      0.667      -0.070       0.107
Bathrooms                       -0.0333      0.055     -0.601      0.557      -0.151       0.085
Rent                             0.0010   5.38e-05     18.407      0.000       0.001       0.001
Sqft                            -0.0027      0.000    -11.440      0.000      -0.003      -0.002
laundry                          0.3129      0.007     44.290      0.000       0.298       0.328
pool                             0.3129      0.007     44.290      0.000       0.298       0.328
gym                                   0          0        nan        nan           0           0
pets                             0.3129      0.007     44.290      0.000       0.298       0.328
parking                               0          0        nan        nan           0           0
ev_charging                      0.3129      0.007     44.290      0.000       0.298       0.328
elevator                              0          0        nan        nan           0           0
secure_access                    0.3129      0.007     44.290      0.000       0.298       0.328
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.3129      0.007     44.290      0.000       0.298       0.328
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.3129      0.007     44.290      0.000       0.298       0.328
recycling                        0.3129      0.007     44.290      0.000       0.298       0.328
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte      0.3129      0.007     44.290      0.000       0.298       0.328
==============================================================================
Omnibus:                        0.694   Durbin-Watson:                   2.293
Prob(Omnibus):                  0.707   Jarque-Bera (JB):                0.665
Skew:                           0.118   Prob(JB):                        0.717
Kurtosis:                       2.139   Cond. No.                          inf
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "The Henry"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: The Henry ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.998
Model:                            OLS   Adj. R-squared:                  0.996
Method:                 Least Squares   F-statistic:                     682.5
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           4.21e-08
Time:                        21:08:34   Log-Likelihood:                 34.803
No. Observations:                  11   AIC:                            -59.61
Df Residuals:                       6   BIC:                            -57.62
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                        -0.0365      0.022     -1.627      0.155      -0.091       0.018
Bathrooms                        0.2611      0.035      7.516      0.000       0.176       0.346
Rent                             0.0011   3.18e-05     34.286      0.000       0.001       0.001
Sqft                            -0.0025      0.000    -20.355      0.000      -0.003      -0.002
laundry                          0.2200      0.006     38.798      0.000       0.206       0.234
pool                             0.2200      0.006     38.798      0.000       0.206       0.234
gym                                   0          0        nan        nan           0           0
pets                             0.2200      0.006     38.798      0.000       0.206       0.234
parking                               0          0        nan        nan           0           0
ev_charging                      0.2200      0.006     38.798      0.000       0.206       0.234
elevator                              0          0        nan        nan           0           0
secure_access                    0.2200      0.006     38.798      0.000       0.206       0.234
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.2200      0.006     38.798      0.000       0.206       0.234
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.2200      0.006     38.798      0.000       0.206       0.234
recycling                        0.2200      0.006     38.798      0.000       0.206       0.234
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                0.2200      0.006     38.798      0.000       0.206       0.234
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        9.548   Durbin-Watson:                   2.401
Prob(Omnibus):                  0.008   Jarque-Bera (JB):                4.443
Skew:                          -1.356   Prob(JB):                        0.108
Kurtosis:                       4.529   Cond. No.                          inf
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "The Landon"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: The Landon ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.968
Model:                            OLS   Adj. R-squared:                  0.962
Method:                 Least Squares   F-statistic:                     159.4
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           2.17e-15
Time:                        21:08:34   Log-Likelihood:                 50.118
No. Observations:                  26   AIC:                            -90.24
Df Residuals:                      21   BIC:                            -83.95
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                        -0.0832      0.034     -2.445      0.023      -0.154      -0.012
Bathrooms                        0.0142      0.025      0.563      0.580      -0.038       0.067
Rent                             0.0011      0.000      9.745      0.000       0.001       0.001
Sqft                            -0.0012   6.49e-05    -18.758      0.000      -0.001      -0.001
laundry                          0.1491      0.015     10.141      0.000       0.118       0.180
pool                             0.1491      0.015     10.141      0.000       0.118       0.180
gym                                   0          0        nan        nan           0           0
pets                             0.1491      0.015     10.141      0.000       0.118       0.180
parking                               0          0        nan        nan           0           0
ev_charging                           0          0        nan        nan           0           0
elevator                              0          0        nan        nan           0           0
secure_access                    0.1491      0.015     10.141      0.000       0.118       0.180
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.1491      0.015     10.141      0.000       0.118       0.180
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.1491      0.015     10.141      0.000       0.118       0.180
recycling                        0.1491      0.015     10.141      0.000       0.118       0.180
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark           0.1491      0.015     10.141      0.000       0.118       0.180
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        0.666   Durbin-Watson:                   1.628
Prob(Omnibus):                  0.717   Jarque-Bera (JB):                0.739
Skew:                           0.256   Prob(JB):                        0.691
Kurtosis:                       2.352   Cond. No.                     1.00e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is 8.11e-25. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "The Leo LoSo"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: The Leo LoSo ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.983
Model:                            OLS   Adj. R-squared:                  0.980
Method:                 Least Squares   F-statistic:                     300.4
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           3.31e-18
Time:                        21:08:34   Log-Likelihood:                 46.622
No. Observations:                  26   AIC:                            -83.24
Df Residuals:                      21   BIC:                            -76.95
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.2451      0.050      4.926      0.000       0.142       0.349
Bathrooms                        0.0120      0.063      0.191      0.850      -0.118       0.142
Rent                             0.0007      0.000      6.038      0.000       0.000       0.001
Sqft                            -0.0022      0.000    -14.003      0.000      -0.003      -0.002
laundry                          0.2714      0.013     21.601      0.000       0.245       0.297
pool                             0.2714      0.013     21.601      0.000       0.245       0.297
gym                                   0          0        nan        nan           0           0
pets                             0.2714      0.013     21.601      0.000       0.245       0.297
parking                               0          0        nan        nan           0           0
ev_charging                      0.2714      0.013     21.601      0.000       0.245       0.297
elevator                              0          0        nan        nan           0           0
secure_access                    0.2714      0.013     21.601      0.000       0.245       0.297
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.2714      0.013     21.601      0.000       0.245       0.297
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.2714      0.013     21.601      0.000       0.245       0.297
recycling                        0.2714      0.013     21.601      0.000       0.245       0.297
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End           0.2714      0.013     21.601      0.000       0.245       0.297
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        5.957   Durbin-Watson:                   0.662
Prob(Omnibus):                  0.051   Jarque-Bera (JB):                4.118
Skew:                          -0.695   Prob(JB):                        0.128
Kurtosis:                       4.367   Cond. No.                     1.01e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is 1.07e-24. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "The Perch"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: The Perch ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.980
Model:                            OLS   Adj. R-squared:                  0.974
Method:                 Least Squares   F-statistic:                     157.1
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           7.26e-11
Time:                        21:08:34   Log-Likelihood:                 25.272
No. Observations:                  18   AIC:                            -40.54
Df Residuals:                      13   BIC:                            -36.09
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                         0.0016      0.057      0.028      0.978      -0.121       0.124
Bathrooms                       -0.1200      0.085     -1.413      0.181      -0.304       0.064
Rent                             0.0023      0.000     12.916      0.000       0.002       0.003
Sqft                            -0.0039      0.000    -19.435      0.000      -0.004      -0.003
laundry                          0.2710      0.025     11.018      0.000       0.218       0.324
pool                             0.2710      0.025     11.018      0.000       0.218       0.324
gym                                   0          0        nan        nan           0           0
pets                             0.2710      0.025     11.018      0.000       0.218       0.324
parking                               0          0        nan        nan           0           0
ev_charging                      0.2710      0.025     11.018      0.000       0.218       0.324
elevator                              0          0        nan        nan           0           0
secure_access                         0          0        nan        nan           0           0
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.2710      0.025     11.018      0.000       0.218       0.324
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                              0          0        nan        nan           0           0
recycling                             0          0        nan        nan           0           0
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_Tyvola Tapestry               0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark                0          0        nan        nan           0           0
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown              0.2710      0.025     11.018      0.000       0.218       0.324
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        0.285   Durbin-Watson:                   1.391
Prob(Omnibus):                  0.867   Jarque-Bera (JB):                0.455
Skew:                           0.163   Prob(JB):                        0.797
Kurtosis:                       2.293   Cond. No.                          inf
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Choose the complex to filter for (e.g., "Moderna Liberty Row", "The Landon", etc.)
complex_name = "Tyvola Tapestry"  # Change this to any valid complex name

# Step 1: Drop non-useful columns
apt_cleaned = apt.drop(columns=["Address", "Unit_Variant", "Amenities", "Website"], errors="ignore")

# Step 2: One-hot encode categorical variables
apt_encoded = pd.get_dummies(apt_cleaned, columns=["Complex", "Neighborhood"], drop_first=False)

# Step 3: Define target and features
target = "price_per_sqft"
X = apt_encoded.drop(columns=[target])
y = apt_encoded[target]

# Step 4: Combine X and y for joint cleaning
df_model = pd.concat([X, y], axis=1)

# Step 5: Force numeric conversion and drop rows with non-numeric or missing values
df_model = df_model.apply(pd.to_numeric, errors='coerce').dropna()

# ✅ Step 6: Filter for specific complex
complex_col = f"Complex_{complex_name}"
if complex_col not in df_model.columns:
    raise ValueError(f"Complex '{complex_name}' not found in encoded columns.")

df_complex = df_model[df_model[complex_col] == 1].copy()

# Step 7: Drop complex column (constant within group) and target
X_clean = df_complex.drop(columns=[target, complex_col]).astype(np.float64)
y_clean = df_complex[target].astype(np.float64)

# Step 8: Add constant for intercept
X_clean = sm.add_constant(X_clean)

# Step 9: Fit OLS model using DataFrame (preserves column names)
model = sm.OLS(y_clean, X_clean).fit()

# Step 10: View summary with real column names
print(f"\n--- Regression for Complex: {complex_name} ---")
print(model.summary())

--- Regression for Complex: Tyvola Tapestry ---
                            OLS Regression Results                            
==============================================================================
Dep. Variable:         price_per_sqft   R-squared:                       0.985
Model:                            OLS   Adj. R-squared:                  0.969
Method:                 Least Squares   F-statistic:                     64.14
Date:                Thu, 30 Oct 2025   Prob (F-statistic):           0.000700
Time:                        21:08:34   Log-Likelihood:                 16.085
No. Observations:                   9   AIC:                            -22.17
Df Residuals:                       4   BIC:                            -21.18
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================================
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Bedrooms                        -0.1154      0.126     -0.913      0.413      -0.466       0.235
Bathrooms                        0.0260      0.167      0.155      0.884      -0.438       0.490
Rent                             0.0013      0.000      5.857      0.004       0.001       0.002
Sqft                            -0.0021      0.001     -3.613      0.022      -0.004      -0.000
laundry                          0.2344      0.028      8.447      0.001       0.157       0.311
pool                             0.2344      0.028      8.447      0.001       0.157       0.311
gym                          -2.332e-47   2.18e-47     -1.071      0.345   -8.38e-47    3.72e-47
pets                             0.2344      0.028      8.447      0.001       0.157       0.311
parking                               0          0        nan        nan           0           0
ev_charging                           0          0        nan        nan           0           0
elevator                              0          0        nan        nan           0           0
secure_access                    0.2344      0.028      8.447      0.001       0.157       0.311
wifi                                  0          0        nan        nan           0           0
wifi_common                      0.2344      0.028      8.447      0.001       0.157       0.311
trash_pickup                          0          0        nan        nan           0           0
renters_insurance                     0          0        nan        nan           0           0
packages                         0.2344      0.028      8.447      0.001       0.157       0.311
recycling                        0.2344      0.028      8.447      0.001       0.157       0.311
Complex_Bond on Mint                  0          0        nan        nan           0           0
Complex_Broadstone Craft              0          0        nan        nan           0           0
Complex_Ello House                    0          0        nan        nan           0           0
Complex_Hawkins Press                 0          0        nan        nan           0           0
Complex_Moderna Liberty Row           0          0        nan        nan           0           0
Complex_Novel Mallard Creek           0          0        nan        nan           0           0
Complex_Solis Midtown                 0          0        nan        nan           0           0
Complex_The Henry                     0          0        nan        nan           0           0
Complex_The Landon                    0          0        nan        nan           0           0
Complex_The Leo LoSo                  0          0        nan        nan           0           0
Complex_The Perch                     0          0        nan        nan           0           0
Neighborhood_NoDa                     0          0        nan        nan           0           0
Neighborhood_South End                0          0        nan        nan           0           0
Neighborhood_SouthPark           0.2344      0.028      8.447      0.001       0.157       0.311
Neighborhood_University City          0          0        nan        nan           0           0
Neighborhood_Uptown                   0          0        nan        nan           0           0
Neighborhood_West Charlotte           0          0        nan        nan           0           0
==============================================================================
Omnibus:                        1.080   Durbin-Watson:                   1.965
Prob(Omnibus):                  0.583   Jarque-Bera (JB):                0.628
Skew:                           0.059   Prob(JB):                        0.731
Kurtosis:                       1.712   Cond. No.                     1.69e+80
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The smallest eigenvalue is 1.23e-153. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

# Create the data
data = {
    'Complex': [
        'Bond on Mint', 'Broadstone Craft', 'Ello House', 'Hawkins Press',
        'Moderna Liberty Row', 'Novel Mallard Creek', 'Solis Midtown',
        'The Henry', 'The Landon', 'The Leo LoSo', 'The Perch', 'Tyvola Tapestry'
    ],
    'Bedrooms': [
        -0.0425, 0.0564, 0.0056, 0.0012, -0.0841, -0.0357, 0.0182,
        -0.0365, -0.0832, 0.2451, 0.0016, -0.1154
    ],
    'Bathrooms': [
        0.0415, 0.0401, 0.0056, 0.0562, 0.0798, 0.0510, -0.0333,
        0.2611, 0.0142, 0.0120, -0.1200, 0.0260
    ],
    'Rent': [
        0.0010, 0.0012, 0.0013, 0.0012, 0.0008, 0.0011, 0.0010,
        0.0011, 0.0011, 0.0007, 0.0023, 0.0013
    ],
    'Sqft': [
        -0.0030, -0.0030, -0.0031, -0.0034, -0.0017, -0.0024, -0.0027,
        -0.0025, -0.0012, -0.0022, -0.0039, -0.0021
    ],
    'Amenity Premium': [
        0.60, 0.47, 0.26, 0.32, 0.52, 0.27, 0.31,
        0.22, 0.15, 0.27, 0.27, 0.23
    ]
}

# Convert to DataFrame
df = pd.DataFrame(data)
df.set_index('Complex', inplace=True)

# Create the heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df, annot=True, cmap='coolwarm', center=0, linewidths=0.5, fmt=".3f")
plt.title('Regression Coefficient Heatmap by Complex')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

	Complex	Address	Unit_Variant	Bedrooms	Bathrooms	Rent	Sqft	Amenities	Website	Neighborhood	price_per_sqft
0	Moderna Liberty Row	7740 Liberty Row Dr, Charlotte, NC 28210	S01	0.0	1.0	1469.0	651.0	In-unit washer/dryer; High-speed internet in common areas; Controlled access bicycle storage; Additional storage available; Resort-style pool; 24-hour fitness center; Game room with billiards, pok...	https://www.moderalibertyrow.com/	SouthPark	2.256528
1	Moderna Liberty Row	7740 Liberty Row Dr, Charlotte, NC 28210	A01	1.0	1.0	1707.0	747.0	In-unit washer/dryer; High-speed internet in common areas; Controlled access bicycle storage; Additional storage available; Resort-style pool; 24-hour fitness center; Game room with billiards, pok...	https://www.moderalibertyrow.com/	SouthPark	2.285141
2	Moderna Liberty Row	7740 Liberty Row Dr, Charlotte, NC 28210	A02	1.0	1.0	1707.0	747.0	In-unit washer/dryer; High-speed internet in common areas; Controlled access bicycle storage; Additional storage available; Resort-style pool; 24-hour fitness center; Game room with billiards, pok...	https://www.moderalibertyrow.com/	SouthPark	2.285141
3	Moderna Liberty Row	7740 Liberty Row Dr, Charlotte, NC 28210	A03	1.0	1.0	1532.0	801.0	In-unit washer/dryer; High-speed internet in common areas; Controlled access bicycle storage; Additional storage available; Resort-style pool; 24-hour fitness center; Game room with billiards, pok...	https://www.moderalibertyrow.com/	SouthPark	1.912609
4	Moderna Liberty Row	7740 Liberty Row Dr, Charlotte, NC 28210	A04	1.0	1.0	1766.0	861.0	In-unit washer/dryer; High-speed internet in common areas; Controlled access bicycle storage; Additional storage available; Resort-style pool; 24-hour fitness center; Game room with billiards, pok...	https://www.moderalibertyrow.com/	SouthPark	2.051103

Highlights from the Descriptive Phase¶

Highlights from the Geospacial Phase¶

Introduction to Regression Modeling¶

Data Loading and Cleansing¶

Regression¶

Overall Regression¶

Overall Regression Minus Bathrooms¶

Overall Regression Conclusion¶

Bedroom Regressions¶

Studio Regression¶

1 Bedroom Regression¶

2 Bedroom Regression¶

3 Bedroom Regression¶

Bedroom Level Regression Conclusions¶

Neighborhood¶

SouthPark Regression¶

South End Regression¶

University City Regression¶

NoDa Regression¶

Uptown Regression¶

Q-Q Plot for Uptown Neighborhood Residuals¶

West Charlotte Regression¶

Neighborhood Regression Conclusion¶

Complex Regressions¶

Bond on Mint Regression¶

Boradstone Craft Regression¶

Ello House Regression¶

Hawkins Press Regression¶

Moderna Liberty Row Regression¶

Novel Mallard Creek¶

Solis Midtown Regression¶

The Henry¶

The Landon Regression¶

The Leo LoSo Regression¶

The Perch Regression¶

Tyvola Tapestry Regression¶

Heatmap of Regression Coefficients by Complex¶

Complex Level Regressions Colclusion¶

Overall Conclusion¶