In [1]:
import numpy as np
np.random.seed(7654567)

# price = 500 * sqft + 1 * medprice + 200,000 + noise(0, 100,000)

N = 10000 # num data points
noise = np.random.normal(0, 100000, size = N)

# A is comprised of the following 3 columns: [sqfts, medprices, 1's]
sqfts = np.random.uniform(1000, 5000, size = N)
medprices = np.random.normal(1000000, 250000, size = N)
ones = np.ones_like(medprices)
A = np.column_stack((sqfts, medprices, ones))

# b_star is what the data points would be if there was no noise
b_star = A @ [500, 1, 200000]

# b is the actual vector of data points
b = b_star + noise

# now figure out the true coefficients [500, 1, 200000] from A and b
A_t = np.transpose(A)
x = np.linalg.inv(A_t @ A) @ A_t @ b

print("sqft coefficient:", x[0])
print("medprice coefficient:", x[1])
print("constant coefficient:", x[2])
sqft coefficient: 501.2402778789989
medprice coefficient: 0.9962719434433466
constant coefficient: 201161.40486165255