defmodule Kagi.Search do
@moduledoc """
Search response returned by `Kagi.search/1..3`.
Contains parsed result rows and related-search suggestions.
## Fields
* `:results` - search result rows, truncated to the requested `:limit`.
* `:related` - related-search suggestions; empty when Kagi returns none.
"""
alias Kagi.Client
alias Kagi.Error
alias Kagi.HTTP
alias Kagi.SearchResult
@typedoc "Search lens passed via the `:lens` option."
@type lens :: :default | :programming | :forums | :pdfs | :non_commercial | :world_news
@typedoc "Result ordering passed via the `:sort` option."
@type sort :: :recency | :website | :ad_trackers
@typedoc "Relative time window passed via the `:time` option."
@type time_range :: :day | :week | :month | :year
@typedoc "A parsed Kagi search response."
@type t :: %__MODULE__{results: [SearchResult.t()], related: [String.t()]}
defstruct results: [], related: []
@url "https://kagi.com/html/search"
@doc false
@spec request(Client.t(), String.t() | [String.t()], keyword()) ::
{:ok, t()} | {:error, Error.t()}
def request(%Client{} = client, query, options) when is_list(options) do
with {:ok, params} <- query_params(query, options),
{:ok, %{body: html}} <-
HTTP.get(client, @url,
params: params,
headers: [{"cookie", "kagi_session=#{client.session_token}"}]
),
{:ok, html} <- normalize_html(html) do
parse(html, Keyword.get(options, :limit, 10))
end
end
@doc false
@spec parse(String.t(), non_neg_integer()) :: {:ok, t()} | {:error, Error.t()}
def parse(html, limit) when is_binary(html) and is_integer(limit) and limit >= 0 do
document = LazyHTML.from_document(html)
with :ok <- detect_challenge(document, html) do
results =
document
|> parse_standard_results(limit)
|> then(fn results ->
results ++ parse_grouped_results(document, max(limit - length(results), 0))
end)
|> Enum.take(limit)
{:ok, %__MODULE__{results: results, related: parse_related(document)}}
end
end
@spec normalize_html(term()) :: {:ok, String.t()} | {:error, Error.t()}
defp normalize_html(html) when is_binary(html), do: {:ok, html}
defp normalize_html(body) do
{:error,
Error.new(
:parse_error,
"expected search response body to be a string, got: #{inspect(body)}"
)}
end
@spec query_params(String.t() | [String.t()], keyword()) ::
{:ok, keyword()} | {:error, Error.t()}
defp query_params(query, options) do
with {:ok, query} <- build_query(query, options),
{:ok, options} <- validate_options(options) do
[
{:plain, :r, options[:region]},
{:mapped, :l, options[:lens], &lens_value/1},
{:mapped, :order, options[:sort], &sort_value/1},
{:mapped, :dr, options[:time], &time_value/1},
{:plain, :from_date, options[:from]},
{:plain, :to_date, options[:to]},
{:plain, :verbatim, if(options[:verbatim], do: "1")}
]
|> Enum.reduce_while({:ok, [q: query]}, &put_query_param/2)
end
end
@spec validate_options(keyword()) :: {:ok, keyword()} | {:error, Error.t()}
defp validate_options(options) do
with :ok <- validate_time_range(options),
:ok <- validate_limit(options[:limit]),
:ok <- validate_date(:from, options[:from]),
:ok <- validate_date(:to, options[:to]) do
{:ok, options}
end
end
@spec validate_time_range(keyword()) :: :ok | {:error, Error.t()}
defp validate_time_range(options) do
if options[:time] && (options[:from] || options[:to]) do
{:error, Error.new(:invalid_option, ":time cannot be combined with :from or :to")}
else
:ok
end
end
@spec validate_limit(term()) :: :ok | {:error, Error.t()}
defp validate_limit(nil), do: :ok
defp validate_limit(limit) when is_integer(limit) and limit >= 0, do: :ok
defp validate_limit(_limit) do
{:error, Error.new(:invalid_option, ":limit must be a non-negative integer")}
end
@spec validate_date(:from | :to, term()) :: :ok | {:error, Error.t()}
defp validate_date(_key, nil), do: :ok
defp validate_date(key, date) when is_binary(date) do
if Regex.match?(~r/^\d{4}-\d{2}-\d{2}$/, date) do
:ok
else
{:error, Error.new(:invalid_option, ":#{key} must use YYYY-MM-DD")}
end
end
defp validate_date(key, _date) do
{:error, Error.new(:invalid_option, ":#{key} must use YYYY-MM-DD")}
end
@spec build_query(String.t() | [String.t()], keyword()) ::
{:ok, String.t()} | {:error, Error.t()}
defp build_query(query, options) do
query =
query
|> List.wrap()
|> Enum.map_join(" ", &to_string/1)
|> String.trim()
if query == "" do
{:error, Error.new(:invalid_option, "query must not be empty")}
else
query
|> append_filter("site", options[:site])
|> append_filter("filetype", options[:filetype])
|> then(&{:ok, &1})
end
end
@spec append_filter(String.t(), String.t(), String.t() | nil) :: String.t()
defp append_filter(query, _name, nil), do: query
defp append_filter(query, name, value), do: query <> " #{name}:#{value}"
@spec put_query_param(tuple(), {:ok, keyword()}) ::
{:cont, {:ok, keyword()}} | {:halt, {:error, Error.t()}}
defp put_query_param({:plain, _key, nil}, {:ok, params}), do: {:cont, {:ok, params}}
defp put_query_param({:plain, key, value}, {:ok, params}),
do: {:cont, {:ok, Keyword.put(params, key, value)}}
defp put_query_param({:mapped, _key, nil, _mapper}, {:ok, params}), do: {:cont, {:ok, params}}
defp put_query_param({:mapped, key, value, mapper}, {:ok, params}) do
case mapper.(value) do
{:ok, api_value} -> {:cont, {:ok, Keyword.put(params, key, api_value)}}
{:error, %Error{} = error} -> {:halt, {:error, error}}
end
end
@spec lens_value(term()) :: {:ok, String.t()} | {:error, Error.t()}
defp lens_value(:default), do: {:ok, "0"}
defp lens_value(:programming), do: {:ok, "1"}
defp lens_value(:forums), do: {:ok, "2"}
defp lens_value(:pdfs), do: {:ok, "3"}
defp lens_value(:non_commercial), do: {:ok, "4"}
defp lens_value(:world_news), do: {:ok, "5"}
defp lens_value(value) do
{:error, Error.new(:invalid_option, "invalid lens: #{inspect(value)}")}
end
@spec sort_value(term()) :: {:ok, String.t()} | {:error, Error.t()}
defp sort_value(:recency), do: {:ok, "2"}
defp sort_value(:website), do: {:ok, "3"}
defp sort_value(:ad_trackers), do: {:ok, "4"}
defp sort_value(value) do
{:error, Error.new(:invalid_option, "invalid sort: #{inspect(value)}")}
end
@spec time_value(term()) :: {:ok, String.t()} | {:error, Error.t()}
defp time_value(:day), do: {:ok, "1"}
defp time_value(:week), do: {:ok, "2"}
defp time_value(:month), do: {:ok, "3"}
defp time_value(:year), do: {:ok, "4"}
defp time_value(value) do
{:error, Error.new(:invalid_option, "invalid time: #{inspect(value)}")}
end
@spec detect_challenge(LazyHTML.t(), String.t()) :: :ok | {:error, Error.t()}
defp detect_challenge(document, html) do
has_results? =
not Enum.empty?(LazyHTML.query(document, "#search-app, .search-result, .sr-group .__srgi"))
challenge? =
html
|> String.downcase()
|> then(fn lower ->
String.contains?(lower, "cf-challenge") or String.contains?(lower, "captcha") or
String.contains?(lower, "challenge-platform") or
String.contains?(lower, "just a moment")
end)
cond do
has_results? ->
:ok
challenge? ->
{:error, Error.new(:blocked, "Blocked by CAPTCHA/challenge")}
true ->
{:error, Error.new(:parse_error, "search response had no recognizable results structure")}
end
end
@spec parse_standard_results(LazyHTML.t(), non_neg_integer()) :: [SearchResult.t()]
defp parse_standard_results(document, limit) do
document
|> LazyHTML.query(".search-result")
|> Enum.flat_map(&parse_result(&1, ".__sri_title_link"))
|> Enum.take(limit)
end
@spec parse_grouped_results(LazyHTML.t(), non_neg_integer()) :: [SearchResult.t()]
defp parse_grouped_results(_document, 0), do: []
defp parse_grouped_results(document, limit) do
document
|> LazyHTML.query(".sr-group .__srgi")
|> Enum.flat_map(&parse_result(&1, ".__srgi-title a"))
|> Enum.take(limit)
end
@spec parse_result(LazyHTML.t(), String.t()) :: [SearchResult.t()]
defp parse_result(element, link_selector) do
with link when not is_nil(link) <- element |> LazyHTML.query(link_selector) |> Enum.at(0),
[url | _] <- LazyHTML.attribute(link, "href") do
[
%SearchResult{
url: url,
title: link |> LazyHTML.text() |> String.trim(),
snippet: element |> LazyHTML.query(".__sri-desc") |> LazyHTML.text() |> String.trim()
}
]
else
_value -> []
end
end
@spec parse_related(LazyHTML.t()) :: [String.t()]
defp parse_related(document) do
document
|> LazyHTML.query(".related-searches a span")
|> Enum.map(fn element -> element |> LazyHTML.text() |> String.trim() end)
|> Enum.reject(&(&1 == ""))
end
end