defmodule Learnx.LinearRegression do
@moduledoc """
Ordinary least squares Linear Regression.
LinearRegression fits a linear model with coefficients w = (w1, ..., wp)
to minimize the residual sum of squares between the observed targets in
the dataset, and the targets predicted by the linear approximation.
"""
import Learnx.Preprocessing
import Nx
import Nx.LinAlg
import Nx.Defn
alias __MODULE__, as: Linear
@typedoc """
Linear regressor.
- coef : tensor of shape {n_features, }.
Estimated coefficients for the linear regression problem.
- intercept : float.
Independent term in the linear model. Set to nil if fit_intercept = False.
- n_features : int.
Number of features seen during fit.
"""
defstruct [:coef, :intercept, :n_features]
@type regressor :: %Linear{
coef: tensor,
intercept: number,
n_features: number
}
@type tensor :: Nx.Tensor.t()
@doc """
Fits linear model.
## Parameters
- x : list or tensor of shape {n_samples, n_features} or {n_samples,}.
Training data.
- y : list or tensor of shape {n_samples,}.
Target values. Will be cast to X's type if necessary.
- intercept : boolean.
Wether or not to use intercept in calculations. Simple linear regression
will always use intercept.
## Returns
A linear regressor containing the coefficients, intercept (if chosen) and
number of features.
## Examples
With lists as inputs:
```elixir
iex> x = [-1, 1, 3, 5]
iex> y = [6, 8, 10, 12]
iex> reg = Learnx.LinearRegression.fit(x, y)
iex> reg.coef
#Nx.Tensor<
f32
1.0
>
iex> reg.intercept
7.0
iex> reg.n_features
1
```
With tensors:
```elixir
iex> x = Nx.tensor([-1, 1, 3, 5])
iex> y = Nx.tensor([6, 8, 10, 12])
iex> reg = Learnx.LinearRegression.fit(x, y)
iex> reg.coef
#Nx.Tensor<
f32
1.0
>
iex> reg.intercept
7.0
iex> reg.n_features
1
```
With multiple features as the input:
```elixir
iex> x = [[1, 1], [1, 2], [2, 2], [2, 3]]
iex> y = [6, 8, 9, 11]
iex> reg = Learnx.LinearRegression.fit(x, y)
iex> reg.coef
#Nx.Tensor<
f32[2]
[1.0, 2.0]
>
iex> reg.intercept
2.999993324279785
iex> reg.n_features
2
```
"""
@spec fit(list | tensor, list | tensor, keyword) :: regressor
def fit(x, y, opts \\ []) do
default = [intercept: true]
opts = Keyword.merge(default, opts)
validate_data(x, y)
|> compute(opts[:intercept])
end
@spec compute({tensor, tensor}, boolean) :: regressor
defp compute({x, y}, intercept) do
{_n_samples, n_features} = shape(x)
case {n_features == 1, intercept} do
{true, _} ->
coef = solve_simple(x, y)
%Linear{
coef: coef[1],
intercept: coef[0] |> to_number(),
n_features: n_features
}
{false, true} ->
coef = solve_multiple(x, y, true)
%Linear{
coef: coef[1..n_features],
intercept: coef[0] |> to_number(),
n_features: n_features
}
{false, false} ->
coef = solve_multiple(x, y, false)
%Linear{
coef: coef,
n_features: n_features,
intercept: nil
}
end
end
@spec solve_simple(tensor, tensor) :: regressor
defnp solve_simple(x, y) do
x = x |> squeeze()
b_1 = ss(x, y) / ss(x, x)
b_0 = mean(y) - b_1 * mean(x)
stack([b_0, b_1])
end
@spec ss(tensor, tensor) :: tensor
defnp ss(x, y) do
sum((x - mean(x)) * (y - mean(y)))
end
@spec solve_multiple(tensor, tensor, boolean) :: regressor
defp solve_multiple(x, y, false) do
x
|> transpose()
|> dot(x)
|> invert()
|> dot(transpose(x))
|> dot(y)
end
defp solve_multiple(x, y, true) do
{n_samples, _n_features} = shape(x)
[
for(_ <- 1..n_samples, do: 1) |> tensor() |> reshape({1, :auto})
| x |> transpose() |> to_batched(1) |> Enum.to_list()
]
|> stack()
|> squeeze()
|> transpose()
|> solve_multiple(y, false)
end
@doc """
Predicts using the linear model.
## Parameters
- regressor : trained regressor to use for the predictions.
- x : number, list or tensor of shape {n_observations, n_features}.
Observations to predict.
## Returns
Predicted value or values. Returns the prediction(s) in the type of the
input, i. e. if x is a tensor, it returns a tensor; and if x is a number,
it returns a number.
## Examples
Single feature regression:
```elixir
iex> x = [-1, 1, 3, 5]
iex> y = [6, 8, 10, 12]
iex> reg = Learnx.LinearRegression.fit(x, y)
iex> reg |> Learnx.LinearRegression.predict(2)
9.0
iex> reg |> Learnx.LinearRegression.predict([4, 6])
[11.0, 13.0]
```
Multiple features regression:
```elixir
iex> x = [[1, 1], [1, 2], [2, 2], [2, 3]]
iex> y = [6, 8, 10, 12]
iex> reg = Learnx.LinearRegression.fit(x, y)
iex> reg |> Learnx.LinearRegression.predict([3, 3])
[13.999992370605469]
```
"""
@spec predict(regressor, number | list | tensor) :: number | list | tensor
def predict(regressor = %Linear{n_features: 1}, x) when is_number(x),
do: predict(regressor, tensor(x)) |> to_number()
def predict(regressor = %Linear{n_features: 1}, x) when is_list(x),
do: predict(regressor, tensor(x)) |> to_flat_list()
def predict(%Linear{coef: coef, intercept: intercept, n_features: 1}, x) do
x |> multiply(coef) |> add(intercept)
end
def predict(regressor = %Linear{n_features: n}, x) when is_list(x),
do: predict(regressor, tensor(x) |> reshape({:auto, n})) |> to_flat_list()
def predict(%Linear{coef: coef, intercept: intercept}, x) do
x
|> multiply(coef)
|> sum(axes: [1])
|> add(intercept)
end
end