lib/crawler/linker/path_builder.ex

defmodule Crawler.Linker.PathBuilder do
  @moduledoc """
  Builds a path for a link (can be a URL itself or a relative link) based on
  the input string which is a URL with or without its protocol.
  """

  alias Crawler.Linker.{PathFinder, PathExpander}

  @doc """
  Builds a path for a link (can be a URL itself or a relative link) based on
  the input string which is a URL with or without its protocol.

  ## Examples

      iex> PathBuilder.build_path(
      iex>   "https://cool.beans:7777/dir/page1",
      iex>   "https://hello.world:8888/remote/page2"
      iex> )
      "hello.world-8888/remote/page2"

      iex> PathBuilder.build_path(
      iex>   "https://cool.beans:7777/dir/page1",
      iex>   "local/page2"
      iex> )
      "cool.beans-7777/dir/local/page2"

      iex> PathBuilder.build_path(
      iex>   "https://cool.beans:7777/dir/page1",
      iex>   "/local/page2"
      iex> )
      "cool.beans-7777/local/page2"

      iex> PathBuilder.build_path(
      iex>   "https://cool.beans:7777/parent/dir/page1",
      iex>   "../local/page2"
      iex> )
      "cool.beans-7777/parent/local/page2"

      iex> PathBuilder.build_path(
      iex>   "https://cool.beans:7777/parent/dir/page1",
      iex>   "../../local/page2"
      iex> )
      "cool.beans-7777/local/page2"
  """
  def build_path(current_url, link, safe \\ true) do
    current_url
    |> base_path(link, safe)
    |> build(link, safe)
  end

  defp base_path(url, "/" <> _link, safe), do: PathFinder.find_domain(url, safe)
  defp base_path(url, _link, safe), do: PathFinder.find_base_path(url, safe)

  defp build(path, link, safe) do
    link
    |> normalise(path)
    |> PathFinder.find_path(safe)
    |> PathExpander.expand_dot()
  end

  defp normalise(link, path) do
    link
    |> String.split("://", parts: 2)
    |> Enum.count()
    |> join_path(link, path)
  end

  defp join_path(2, link, _path), do: link
  defp join_path(1, link, path), do: Path.join(path, link)
end