defmodule URL do
@moduledoc """
Functions for parsing URLs.
This module provides functions for parsing URLs. It is modelled on
Elixir's `URI` module but will also parse scheme-specific URIs such
as [geo](https://tools.ietf.org/rfc/rfc5870), [data](https://tools.ietf.org/html/rfc2397)
[tel](https://tools.ietf.org/html/rfc3966), [mailto](https://tools.ietf.org/html/rfc2047),
and [uuid](https://tools.ietf.org/html/draft-kindel-uuid-uri-00).
"""
@type uri_type :: nil | URL.Data.t() | URL.Geo.t() | URL.Tel.t() | URL.UUID.t() | URL.Mailto.t()
defstruct scheme: nil,
path: nil,
query: nil,
fragment: nil,
authority: nil,
userinfo: nil,
host: nil,
port: nil,
parsed_path: nil
@type t() :: %__MODULE__{
authority: nil | binary(),
fragment: nil | binary(),
host: nil | binary(),
path: nil | binary(),
port: nil | :inet.port_number(),
query: nil | binary(),
scheme: nil | binary(),
userinfo: nil | binary(),
parsed_path: uri_type()
}
@supported_schemes %{
"tel" => URL.Tel,
"data" => URL.Data,
"geo" => URL.Geo,
"mailto" => URL.Mailto,
"uuid" => URL.UUID,
"urn" => URL.UUID
}
import URL.ParseHelpers.Core, only: [structify: 2]
import NimbleParsec
import URL.ParseHelpers.{Core, Mailto, Params, Unwrap}
@doc """
Parses a string url and returns a `t:URL.t/0` struct that
has the same shape as Elixir's `t:URI.t/0` with the
addition of the `parsed_path` key.
### Arguments
* `url` is a binary representation of a URL.
### Returns
* `{:ok, URL.t()}` or
* `{:error, {exception, reason}}`.
### Example
iex> URL.new("geo:48.198634,-16.371648,3.4;crs=wgs84;u=40.0")
{:ok,
%URL{
authority: nil,
fragment: nil,
host: nil,
parsed_path: %URL.Geo{
alt: 3.4,
lat: 48.198634,
lng: -16.371648,
params: %{"crs" => "wgs84", "u" => 40.0}
},
path: "48.198634,-16.371648,3.4;crs=wgs84;u=40.0",
port: nil,
query: nil,
scheme: "geo",
userinfo: nil
}
}
iex> URL.new("geo:48.198634,--16.371648,3.4;crs=wgs84;u=40.0")
{:error,
{URL.Parser.ParseError,
"expected an string of digits while processing lat inside alt inside geo data. Detected on line 1 at \\"-16.371648,3.4;crs=w\\" <> ..."}}
iex> URL.new "/invalid_greater_than_in_path/>"
{:error,
{URI.Error,
"cannot parse due to reason invalid_uri: \\">\\""}}
"""
@spec new(url :: binary()) :: {:ok, __MODULE__.t()} | {:error, {module(), String.t()}}
def new(url) when is_binary(url) do
with {:ok, uri} <- uri_new(url),
{:ok, scheme} <- parse_scheme(uri) do
{:ok, merge_uri(uri, scheme)}
end
end
@doc """
Parses a string url and returns a `t:URL.t/0` struct that
has the same shape as Elixir's `t:URI.t/0` with the
addition of the `parsed_path` key, or raises an exception.
### Arguments
* `url` is a binary representation of a URL.
### Returns
* `t:URL.t/0` or
* raises an exception.
### Example
iex> URL.new!("geo:48.198634,-16.371648,3.4;crs=wgs84;u=40.0")
%URL{
authority: nil,
fragment: nil,
host: nil,
parsed_path: %URL.Geo{
alt: 3.4,
lat: 48.198634,
lng: -16.371648,
params: %{"crs" => "wgs84", "u" => 40.0}
},
path: "48.198634,-16.371648,3.4;crs=wgs84;u=40.0",
port: nil,
query: nil,
scheme: "geo",
userinfo: nil
}
"""
@spec new!(url :: binary()) :: __MODULE__.t() | no_return()
def new!(url) when is_binary(url) do
case new(url) do
{:ok, parsed} ->
parsed
{:error, {URL.Parser.ParseError = exception, reason}} ->
raise(exception, reason)
{:error, {URI.Error = exception, reason}} ->
raise(exception, action: "parse", reason: "invalid_uri", part: reason)
end
end
@doc """
Returns the string representation of the given URL struct (t:t/0).
This function delegates to `URI.to_string/1`.
### Arguments
* `url` is any `t:URL.t/0`.
### Returns
* a string representation of the URL.
### Examples
iex> {:ok, geo_url} = URL.new("geo:48.198634,-16.371648,3.4;crs=wgs84;u=40.0")
iex> URL.to_string(geo_url)
"geo:48.198634,-16.371648,3.4;crs=wgs84;u=40.0"
"""
@dialyzer {:nowarn_function, {:to_string, 1}}
@spec to_string(t()) :: String.t()
def to_string(%URL{} = url) do
URI.to_string(url)
end
@doc false
@deprecated "Use new/1 instead"
@spec parse(url :: binary()) :: {:ok, __MODULE__.t()} | {:error, {module(), String.t()}}
def parse(url) when is_binary(url) do
new(url)
end
@doc """
Parse and percent decode a URL query string.
### Returns
* Either a map of query params or
* an `{:error, {URL.Parser.ParseError, reason}}` tuple.
### Examples
iex> URL.parse_query_string("url=http%3a%2f%2ffonzi.com%2f&name=Fonzi&mood=happy&coat=leather")
%{
"coat" => "leather",
"mood" => "happy",
"name" => "Fonzi",
"url" => "http://fonzi.com/"
}
iex> mailto = "mailto:user@%E7%B4%8D%E8%B1%86.example.org?subject=Test&body=NATTO"
iex> URL.new!(mailto) |> URL.parse_query_string()
%{"body" => "NATTO", "subject" => "Test"}
"""
@spec parse_query_string(String.t() | map()) :: map() | {:error, {module(), binary()}}
def parse_query_string(query) when is_binary(query) do
with {:ok, [params]} <- unwrap(parse_query(query)) do
params
end
end
def parse_query_string({:error, {_, _}} = error) do
error
end
def parse_query_string(%{query: query}) do
parse_query_string(query)
end
@doc false
def parse_query(nil) do
{:ok, [%{}], "", %{}, {0, 0}, 0}
end
@doc false
defparsec :parse_query,
optional(hfields())
for {scheme, module} <- @supported_schemes do
defp parse_scheme(%URI{scheme: unquote(scheme)} = uri) do
unquote(module).parse(uri)
end
end
defp parse_scheme(%URI{}) do
{:ok, nil}
end
defp merge_uri(uri, parsed_path) do
uri
|> Map.to_list()
|> Enum.map(&__MODULE__.trim/1)
|> structify(__MODULE__)
|> add_parsed_path(parsed_path)
end
defp add_parsed_path(url, parsed_path) do
Map.put(url, :parsed_path, parsed_path)
end
@doc false
def trim({key, item}) when is_binary(item) do
{key, String.trim(item)}
end
def trim(other) do
other
end
defp uri_new(uri) do
case URI.new(uri) do
{:error, reason} -> {:error, uri_error(reason)}
{:ok, uri} -> {:ok, uri}
end
end
defp uri_error(part) do
message = URI.Error.message(%URI.Error{action: "parse", reason: "invalid_uri", part: part})
{URI.Error, message}
end
end