import numpy as np
np.random.seed(7654567)
# price = 500 * sqft + 1 * medprice + 200,000 + noise(0, 100,000)
N = 10000 # num data points
noise = np.random.normal(0, 100000, size = N)
# A is comprised of the following 3 columns: [sqfts, medprices, 1's]
sqfts = np.random.uniform(1000, 5000, size = N)
medprices = np.random.normal(1000000, 250000, size = N)
ones = np.ones_like(medprices)
A = np.column_stack((sqfts, medprices, ones))
# b_star is what the data points would be if there was no noise
b_star = A @ [500, 1, 200000]
# b is the actual vector of data points
b = b_star + noise
# now figure out the true coefficients [500, 1, 200000] from A and b
A_t = np.transpose(A)
x = np.linalg.inv(A_t @ A) @ A_t @ b
print("sqft coefficient:", x[0])
print("medprice coefficient:", x[1])
print("constant coefficient:", x[2])