Skip to content

Commit c03f00c

Browse files
committed
feat: add Ridge Regression with L2 regularization
1 parent 607630b commit c03f00c

File tree

1 file changed

+127
-0
lines changed

1 file changed

+127
-0
lines changed

machine_learning/ridge_regression.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
"""
2+
Ridge Regression using Gradient Descent.
3+
4+
This script implements Ridge Regression (L2 regularization) using gradient descent.
5+
It predicts Average Damage per Round (ADR) using player ratings.
6+
7+
Author: Nitin Pratap Singh
8+
"""
9+
10+
import numpy as np
11+
import httpx
12+
13+
14+
def collect_dataset():
15+
"""
16+
Collects CSGO dataset from a remote CSV file.
17+
18+
The CSV contains ADR vs Rating of players.
19+
20+
:return: Numpy array of shape (n_samples, 2)
21+
22+
>>> data = collect_dataset()
23+
>>> data.shape[1]
24+
2
25+
"""
26+
response = httpx.get(
27+
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
28+
"master/Week1/ADRvsRating.csv",
29+
timeout=10,
30+
)
31+
lines = response.text.splitlines()
32+
data = [line.split(",") for line in lines]
33+
data.pop(0) # Remove header
34+
dataset = np.array(data, dtype=float)
35+
return dataset
36+
37+
38+
def ridge_cost_function(X, y, theta, lam):
39+
"""
40+
Computes the cost for Ridge Regression (L2 regularization).
41+
42+
:param X: Feature matrix (n_samples, n_features)
43+
:param y: Target vector (n_samples,)
44+
:param theta: Coefficients (n_features,)
45+
:param lam: Regularization strength (lambda)
46+
:return: Cost value (float)
47+
48+
>>> X = np.array([[1, 1], [1, 2]])
49+
>>> y = np.array([1, 2])
50+
>>> theta = np.zeros(2)
51+
>>> round(ridge_cost_function(X, y, theta, 0.1), 2)
52+
1.25
53+
"""
54+
m = len(y)
55+
predictions = X @ theta
56+
error = predictions - y
57+
cost = (1 / (2 * m)) * np.dot(error, error)
58+
reg_cost = (lam / (2 * m)) * np.dot(theta[1:], theta[1:])
59+
return cost + reg_cost
60+
61+
62+
def ridge_gradient_descent(X, y, theta, alpha, iterations, lam, verbose=True):
63+
"""
64+
Performs gradient descent with L2 regularization.
65+
66+
:param X: Feature matrix (n_samples, n_features)
67+
:param y: Target values (n_samples,)
68+
:param theta: Initial weights (n_features,)
69+
:param alpha: Learning rate (float)
70+
:param iterations: Number of iterations (int)
71+
:param lam: Regularization strength (lambda)
72+
:param verbose: Print cost every 10,000 steps if True
73+
:return: Optimized weights (n_features,)
74+
75+
>>> X = np.array([[1, 1], [1, 2]])
76+
>>> y = np.array([1, 2])
77+
>>> theta = np.zeros(2)
78+
>>> final_theta = ridge_gradient_descent(X, y, theta, 0.1, 10, 0.01, verbose=False)
79+
>>> len(final_theta)
80+
2
81+
"""
82+
m = len(y)
83+
for i in range(iterations):
84+
predictions = X @ theta
85+
error = predictions - y
86+
gradient = (1 / m) * (X.T @ error)
87+
reg_term = (lam / m) * theta
88+
reg_term[0] = 0 # Do not regularize the bias term
89+
theta = theta - alpha * (gradient + reg_term)
90+
91+
if i % 10000 == 0 and verbose:
92+
cost = ridge_cost_function(X, y, theta, lam)
93+
print(f"Iteration {i}: Cost = {cost:.5f}")
94+
95+
return theta
96+
97+
98+
def main():
99+
"""
100+
Driver function for running Ridge Regression
101+
"""
102+
data = collect_dataset()
103+
104+
# Normalize feature column to avoid overflow
105+
feature = data[:, 0]
106+
feature = (feature - feature.mean()) / feature.std()
107+
108+
X = np.c_[np.ones(data.shape[0]), feature] # Add bias term
109+
y = data[:, 1]
110+
111+
theta = np.zeros(X.shape[1])
112+
alpha = 0.001 # Lowered learning rate
113+
iterations = 100000
114+
lam = 0.1 # Regularization strength
115+
116+
final_theta = ridge_gradient_descent(X, y, theta, alpha, iterations, lam)
117+
118+
print("\nOptimized weights (theta):")
119+
for i, value in enumerate(final_theta):
120+
print(f"θ{i}: {value:.5f}")
121+
122+
123+
124+
if __name__ == "__main__":
125+
import doctest
126+
doctest.testmod()
127+
main()

0 commit comments

Comments
 (0)