lib/exograph.ex

defmodule Exograph do
  @moduledoc """
  Local CodeQL-style code search for Elixir, backed by Postgres and ExAST.

  ## Quick start

      {:ok, index} = Exograph.index("lib", repo: MyApp.Repo, migrate?: true)
      {:ok, hits} = Exograph.search(index, "Repo.get!(_, _)")

  ## DSL queries

      import Exograph.DSL
      query = from(f in Fragment, where: matches(f, "def _ do ... end"))
      {:ok, hits} = Exograph.all(index, query)

  ## Call graph

      {:ok, callers} = Exograph.search_callers(index, "Repo.transaction/1")
      {:ok, callees} = Exograph.search_callees(index, "MyApp.create_user/1")
  """

  alias Exograph.{
    CommentHit,
    DefinitionHit,
    DSL,
    Hit,
    Index,
    Query,
    ReferenceHit,
    Scope,
    Similarity,
    Text,
    TextHit
  }

  alias Exograph.Extractor.ExAST, as: ExASTExtractor
  alias Exograph.FragmentStore.Postgres, as: PostgresFragmentStore
  alias Exograph.InvertedIndex.Postgres, as: PostgresInvertedIndex
  alias Exograph.TreeStore.Postgres, as: PostgresTreeStore

  @spec index(String.t() | [String.t()], keyword()) :: {:ok, Index.t()} | {:error, term()}
  def index(paths, opts \\ []) do
    opts = normalize_backend(opts)
    indexer_opts = extractor_opts(opts)
    store_opts = store_opts(opts)
    store_opts_without_migration = Keyword.put(store_opts, :migrate?, false)
    batch_size = Keyword.get(opts, :index_batch_size, 2_000)

    with {:ok, inverted} <- PostgresInvertedIndex.new(store_opts),
         {:ok, fragment_store} <- PostgresFragmentStore.new(store_opts_without_migration),
         {:ok, tree_store} <- PostgresTreeStore.new(store_opts_without_migration),
         {:ok, {inverted, fragment_store, tree_store}} <-
           put_fragment_stream(
             ExASTExtractor.stream_paths(paths, indexer_opts),
             batch_size,
             inverted,
             fragment_store,
             tree_store
           ) do
      {:ok,
       %Index{
         inverted_backend: PostgresInvertedIndex,
         inverted: inverted,
         fragment_store_backend: PostgresFragmentStore,
         fragment_store: fragment_store,
         tree_store_backend: PostgresTreeStore,
         tree_store: tree_store
       }}
    end
  end

  @spec search(Index.t() | term(), ExAST.Pattern.pattern() | ExAST.Selector.t(), keyword()) ::
          {:ok, [map()]} | {:error, term()}
  def search(index, pattern_or_selector, opts \\ [])

  def search(%Index{} = index, pattern_or_selector, opts) do
    compiled = compile(pattern_or_selector)
    limit = Keyword.get(opts, :limit, 50)

    hits =
      index
      |> DSL.Executor.stream_structural(compiled, opts)
      |> Stream.flat_map(fn fragment ->
        case Query.verify(compiled, fragment) do
          {:ok, matches} ->
            Enum.map(matches, &Hit.with_match(Hit.new(fragment: fragment, score: 1.0), &1))

          :error ->
            []
        end
      end)
      |> Enum.take(limit)

    {:ok, hits}
  end

  def search(_index, _pattern_or_selector, _opts) do
    {:error, :invalid_index}
  end

  @spec all(Index.t(), DSL.Query.t(), keyword()) :: {:ok, [map()]} | {:error, term()}
  def all(index, query, opts \\ [])

  def all(%Index{} = index, %DSL.Query{source: :fragment} = query, opts) do
    DSL.Executor.all(index, query, opts)
  end

  def all(%Index{} = index, %DSL.Query{} = query, opts) do
    DSL.Executor.all(index, query, opts)
  end

  @spec search_callers(Index.t(), String.t(), keyword()) :: {:ok, [Exograph.CallEdge.t()]}
  def search_callers(%Index{} = index, callee, opts \\ []) when is_binary(callee) do
    PostgresInvertedIndex.search_callers(index.inverted, callee, opts)
  end

  @spec search_callees(Index.t(), String.t(), keyword()) :: {:ok, [Exograph.CallEdge.t()]}
  def search_callees(%Index{} = index, caller, opts \\ []) when is_binary(caller) do
    PostgresInvertedIndex.search_callees(index.inverted, caller, opts)
  end

  @doc false
  @spec similar(Index.t(), String.t() | Macro.t(), keyword()) :: {:ok, [map()]} | {:error, term()}
  def similar(%Index{} = index, source_or_ast, opts \\ []) do
    Similarity.search(index, source_or_ast, opts)
  end

  @doc false
  @spec search_text(Index.t(), String.t() | Regex.t(), keyword()) :: {:ok, [TextHit.t()]}
  def search_text(%Index{} = index, literal_or_regex, opts \\ []) do
    if is_binary(literal_or_regex) and function_exported?(index.inverted_backend, :search_text, 3) do
      case index.inverted_backend.search_text(index.inverted, literal_or_regex, opts) do
        {:ok, hits} ->
          hits
          |> Enum.filter(&text_match?(&1.fragment.source || "", literal_or_regex))
          |> typed_hits(TextHit)

        {:error, _reason} ->
          search_text_seq(index, literal_or_regex, opts)
      end
    else
      search_text_seq(index, literal_or_regex, opts)
    end
  end

  @doc false
  @spec search_comments(Index.t(), String.t(), keyword()) :: {:ok, [CommentHit.t()]}
  def search_comments(%Index{} = index, literal, opts \\ []) when is_binary(literal) do
    if function_exported?(index.inverted_backend, :search_comments, 3) do
      case index.inverted_backend.search_comments(index.inverted, literal, opts) do
        {:ok, hits} ->
          hits
          |> Enum.filter(&text_match?(comments_text(&1.fragment.source), literal))
          |> typed_hits(CommentHit)

        {:error, _reason} ->
          search_comments_seq(index, literal, opts)
      end
    else
      search_comments_seq(index, literal, opts)
    end
  end

  @doc false
  @spec search_definitions(Index.t(), String.t(), keyword()) :: {:ok, [DefinitionHit.t()]}
  def search_definitions(%Index{} = index, partial_name, opts \\ [])
      when is_binary(partial_name) do
    if function_exported?(index.inverted_backend, :search_definitions, 3) do
      case index.inverted_backend.search_definitions(index.inverted, partial_name, opts) do
        {:ok, hits} -> typed_hits(hits, DefinitionHit)
        {:error, _} -> search_definitions_seq(index, partial_name, opts)
      end
    else
      search_definitions_seq(index, partial_name, opts)
    end
  end

  @doc false
  @spec search_references(Index.t(), String.t(), keyword()) :: {:ok, [ReferenceHit.t()]}
  def search_references(%Index{} = index, partial_name, opts \\ [])
      when is_binary(partial_name) do
    if function_exported?(index.inverted_backend, :search_references, 3) do
      case index.inverted_backend.search_references(index.inverted, partial_name, opts) do
        {:ok, hits} -> typed_hits(hits, ReferenceHit)
        {:error, _} -> search_references_seq(index, opts)
      end
    else
      search_references_seq(index, opts)
    end
  end

  @doc false
  @spec compile(ExAST.Pattern.pattern() | ExAST.Selector.t()) :: Query.t()
  def compile(%ExAST.Selector{} = selector), do: Query.selector(selector)
  def compile(pattern), do: Query.pattern(pattern)

  @doc false
  @spec tree_nodes(Index.t(), Exograph.Fragment.id()) :: [Exograph.Tree.Node.t()]
  def tree_nodes(%Index{} = index, fragment_id) do
    index.tree_store_backend.nodes(index.tree_store, fragment_id)
  end

  defp put_fragment_stream(fragments, batch_size, inverted, fragment_store, tree_store) do
    fragments
    |> Stream.chunk_every(batch_size)
    |> Enum.reduce_while({:ok, {inverted, fragment_store, tree_store}}, fn batch,
                                                                           {:ok,
                                                                            {inverted,
                                                                             fragment_store,
                                                                             tree_store}} ->
      with {:ok, inverted} <- PostgresInvertedIndex.add(inverted, batch),
           {:ok, fragment_store} <- PostgresFragmentStore.put(fragment_store, batch),
           {:ok, tree_store} <- PostgresTreeStore.put_fragments(tree_store, batch) do
        {:cont, {:ok, {inverted, fragment_store, tree_store}}}
      else
        {:error, reason} -> {:halt, {:error, reason}}
      end
    end)
  end

  defp normalize_backend(opts) do
    case Keyword.get(opts, :backend, :postgres) do
      :postgres -> opts
      "postgres" -> Keyword.put(opts, :backend, :postgres)
      other -> raise ArgumentError, "unsupported backend #{inspect(other)}; use :postgres"
    end
  end

  defp extractor_opts(opts) do
    Keyword.drop(opts, [
      :backend,
      :repo,
      :prefix,
      :migrate?,
      :bm25?,
      :index_batch_size,
      :extractors
    ])
  end

  defp store_opts(opts) do
    Keyword.take(opts, [
      :repo,
      :prefix,
      :migrate?,
      :bm25?,
      :package,
      :package_version,
      :extractors
    ])
  end

  defp search_text_seq(%Index{} = index, literal_or_regex, opts) do
    limit = Keyword.get(opts, :limit, 50)

    query_trigrams =
      if is_binary(literal_or_regex), do: Text.trigrams(literal_or_regex), else: MapSet.new()

    index.fragment_store_backend.all(index.fragment_store)
    |> Enum.filter(fn fragment ->
      source = fragment.source || ""

      trigram_candidate? =
        MapSet.size(query_trigrams) == 0 or
          MapSet.subset?(query_trigrams, Text.trigrams(source))

      Scope.fragment?(fragment, opts) and trigram_candidate? and
        text_match?(source, literal_or_regex)
    end)
    |> Enum.map(&TextHit.new(fragment: &1, score: 1.0))
    |> Enum.take(limit)
    |> ok()
  end

  defp search_comments_seq(%Index{} = index, literal, opts) do
    limit = Keyword.get(opts, :limit, 50)

    index.fragment_store_backend.all(index.fragment_store)
    |> Enum.filter(fn fragment ->
      Scope.fragment?(fragment, opts) and text_match?(comments_text(fragment.source), literal)
    end)
    |> Enum.map(&CommentHit.new(fragment: &1, score: 1.0))
    |> Enum.take(limit)
    |> ok()
  end

  defp search_definitions_seq(%Index{} = index, partial_name, opts) do
    partial_lower = String.downcase(partial_name)
    limit = Keyword.get(opts, :limit, 50)

    index.fragment_store_backend.all(index.fragment_store)
    |> Enum.filter(fn fragment ->
      Scope.fragment?(fragment, opts) and
        fragment.kind in [:def, :defp, :defmacro, :defmacrop] and
        fragment.name != nil and
        String.contains?(String.downcase(fragment.name), partial_lower)
    end)
    |> Enum.map(&DefinitionHit.new(fragment: &1, score: 1.0))
    |> Enum.take(limit)
    |> ok()
  end

  defp search_references_seq(%Index{} = index, opts) do
    limit = Keyword.get(opts, :limit, 50)

    index.fragment_store_backend.all(index.fragment_store)
    |> Enum.filter(fn fragment -> Scope.fragment?(fragment, opts) end)
    |> Enum.map(&ReferenceHit.new(fragment: &1, score: 1.0))
    |> Enum.take(limit)
    |> ok()
  end

  defp typed_hits(hits, module) do
    {:ok,
     Enum.map(hits, fn
       %{__struct__: ^module} = hit -> hit
       hit -> module.new(fragment: hit.fragment, score: hit.score, match: hit.match)
     end)}
  end

  defp ok(results), do: {:ok, results}

  defp text_match?(source, literal) when is_binary(literal),
    do: Text.literal_match?(source, literal)

  defp text_match?(source, %Regex{} = regex), do: Text.regex_match?(source, regex)

  defp comments_text(source) when is_binary(source), do: Exograph.File.comments_text(source)

  defp comments_text(_source), do: ""
end