lib/omni/tools/web_fetch/fetcher.ex

defmodule Omni.Tools.WebFetch.Fetcher do
  @moduledoc false

  alias Omni.Tools.WebFetch.Strategy

  @doc """
  Fetches one or more URLs, returning extracted content as a string.

  For a single URL, returns the content directly. For multiple URLs,
  returns sections separated by URL headers and dividers.
  """
  @spec fetch([String.t()], [{module(), keyword()}], keyword()) :: String.t()
  def fetch([url], strategies, state) do
    case fetch_one(url, strategies, state) do
      {_url, {:ok, content}} -> content
      {url, {:error, exception}} -> raise "#{url}: #{Exception.message(exception)}"
    end
  end

  def fetch(urls, strategies, state) do
    results =
      urls
      |> Task.async_stream(
        &fetch_one(&1, strategies, state),
        max_concurrency: 3,
        timeout: :infinity,
        ordered: true
      )
      |> Enum.map(fn {:ok, result} -> result end)

    case Enum.find(results, &match?({_, {:error, _}}, &1)) do
      {url, {:error, exception}} ->
        raise "#{url}: #{Exception.message(exception)}"

      nil ->
        results |> Enum.map(fn {url, {:ok, content}} -> {url, content} end) |> assemble_batch()
    end
  end

  # ── Per-URL fetch ────────────────────────────────────────────────

  defp fetch_one(url, strategies, state) do
    uri = URI.parse(url)

    result =
      with :ok <- validate_scheme(uri),
           {mod, opts} <- find_strategy(strategies, uri) do
        case execute_request(uri, url, mod, opts, state) do
          {:ok, %Req.Response{} = response} ->
            {:ok, extract_content(response, mod, opts, state)}

          {:ok, message} when is_binary(message) ->
            {:ok, message}

          {:error, exception} ->
            {:error, exception}
        end
      else
        {:error, message} -> {:ok, message}
      end

    {url, result}
  end

  defp validate_scheme(%URI{scheme: scheme}) when scheme in ["http", "https"], do: :ok

  defp validate_scheme(%URI{scheme: nil} = uri) do
    {:error, "Invalid URL (missing scheme): #{URI.to_string(uri)}"}
  end

  defp validate_scheme(%URI{scheme: scheme} = uri) do
    {:error, "Unsupported scheme #{inspect(scheme)}: #{URI.to_string(uri)}"}
  end

  defp find_strategy(strategies, uri) do
    case Strategy.find(strategies, uri) do
      nil -> {:error, "No matching strategy for #{URI.to_string(uri)}"}
      match -> match
    end
  end

  defp execute_request(_uri, url, mod, opts, state) do
    req = build_req(url, mod, opts, state)

    case Req.request(req) do
      {:ok, %{status: status} = response} when status in 200..299 ->
        {:ok, response}

      {:ok, %{status: status}} ->
        {:ok, "HTTP #{status}#{url}"}

      {:error, exception} ->
        {:error, exception}
    end
  end

  defp extract_content(response, mod, opts, state) do
    response = ensure_utf8_body(response)
    content = mod.extract(response, opts)
    max_output = Keyword.fetch!(state, :max_output)
    truncate(content, max_output)
  end

  # ── Request building ─────────────────────────────────────────────

  defp build_req(url, mod, opts, state) do
    base_req = Keyword.fetch!(state, :req)
    timeout = Keyword.fetch!(state, :timeout)

    req =
      Req.merge(base_req, url: url, receive_timeout: timeout, decode_body: false, retry: false)

    if function_exported?(mod, :request, 2) do
      mod.request(req, opts)
    else
      req
    end
  end

  # ── Batch assembly ───────────────────────────────────────────────

  defp assemble_batch(results) do
    results
    |> Enum.map(fn {url, content} -> "## #{url}\n\n#{content}" end)
    |> Enum.join("\n\n---\n\n")
  end

  # ── UTF-8 safety ─────────────────────────────────────────────────

  defp ensure_utf8_body(%{body: body} = response) when is_binary(body) do
    if String.valid?(body) do
      response
    else
      case :unicode.characters_to_binary(body, :utf8) do
        valid when is_binary(valid) -> %{response | body: valid}
        {:error, valid, _} -> %{response | body: IO.iodata_to_binary(valid)}
        {:incomplete, valid, _} -> %{response | body: IO.iodata_to_binary(valid)}
      end
    end
  end

  defp ensure_utf8_body(response), do: response

  # ── Truncation ───────────────────────────────────────────────────

  defp truncate(content, :infinity), do: content
  defp truncate(content, max) when byte_size(content) <= max, do: content

  defp truncate(content, max) do
    head = binary_part(content, 0, max)
    snapped = snap_to_last_newline(head)
    total = byte_size(content)

    "#{snapped}\n...(truncated, showing first #{format_bytes(byte_size(snapped))} of #{format_bytes(total)})"
  end

  defp snap_to_last_newline(binary) do
    size = byte_size(binary)

    case :binary.match(binary, "\n", [{:scope, {size, -size}}]) do
      {pos, 1} -> binary_part(binary, 0, pos)
      :nomatch -> binary
    end
  end

  defp format_bytes(bytes) when bytes < 1_024, do: "#{bytes}B"

  defp format_bytes(bytes) when bytes < 1_048_576 do
    kb = Float.round(bytes / 1_024, 1)
    "#{kb}KB"
  end

  defp format_bytes(bytes) do
    mb = Float.round(bytes / 1_048_576, 1)
    "#{mb}MB"
  end
end