lib/helix/modules/agent_browser_module.ex

defmodule Helix.Modules.AgentBrowserModule do

  use Helix.Modules.Module
  alias Readability
  alias Playwright
  alias HTTPoison
  alias Floki
  alias URI
  import Helix.Modules.GPTUtils

  def handle_cast({:convey, _event}, state) do
    ui_event(state)

    # url = "https://html.duckduckgo.com/html/?q=technology%20venture%20capitalists%20based%20in%20norway"
    # url = "https://www.stratel.no/"
    # url = "https://www.stratel.no/contact-us"

    # extract_information_if_possible(url, state)
    # links = url_to_links(url, state)
    # link_menu = links_to_menu(links)
    # links_to_follow = chatgpt_choose_from_link_menu(link_menu, state)
    # next_links = links_from_choices(links, links_to_follow)

    url = "https://html.duckduckgo.com/html/?q=" <> URI.encode(state.goal)
    process_url(0, url, state)

    {:noreply, convey_alt("Finished.", state, :Finished)}
  end

  def process_url(depth, url, state) do

    state = Map.put(state, :visited, Map.get(state, :visited, []) ++ [url])
    IO.inspect(state)

    IO.inspect("* Processing URL #{url}")
    IO.inspect("* Extraction")
    extract_information_if_possible(url, state)
    IO.inspect("* Getting Links")
    links = url_to_links(url, state)
    IO.inspect("* Links: #{Kernel.inspect(links)}")
    if links != [] do
      link_menu = links_to_menu(links)
      links_to_follow = chatgpt_choose_from_link_menu(link_menu, state)
      next_links = links_from_choices(links, links_to_follow)
      IO.inspect(next_links)

      if depth <= state.depth do
        Enum.each(next_links, fn link ->
          if link != nil do
            if Enum.member?(state.visited, link.url) do
              IO.inspect("* Skipping #{link.url}")
            else

              if !String.contains?(link.url, "pitchbook.com") do
                process_url(depth + 1, link.url, state)
              end
            end
          end
        end)
      end
    end
  end

  def url_to_links(url, state) do

    case HTTPoison.get(url) do
      {:ok, %HTTPoison.Response{status_code: 200, body: body}} ->
        prelinks =
          body
          |> Floki.parse_document!()
          |> Floki.find("a")

        links = Enum.reduce(prelinks, [], fn link, acc ->
          newurl = link |> Floki.attribute("href") |> Enum.at(0)
          text = link |> Floki.text() |> String.strip()

          if text != "" and newurl != nil do
            # DDG Redirects
            newurl = if String.contains?(newurl, "//duckduckgo.com/l/?uddg=") do
              String.split(newurl, "//duckduckgo.com/l/?uddg=") |> Enum.at(1) |> URI.decode
            else
              newurl
            end

            # DDG junk
            newurl = if String.contains?(newurl, "&") do
              String.split(newurl, "&") |> Enum.at(0)
            else
              newurl
            end

            # Relative paths
            newurl = if String.starts_with?(newurl, "/") do
              parsed = URI.new!(url)
              parsed.scheme <> "://" <> parsed.host <> newurl
            else
              newurl
            end

            acc ++ [%{url: newurl, text: text}]
          else
            acc
          end

        end) |> Enum.uniq_by(fn %{text: _t,  url: u} -> u end)

        IO.inspect("Links")

        links
      {:ok, q} ->
        IO.inspect("Q: #{Kernel.inspect(q)}")
        []
      {:error, e} ->
        IO.inspect("URL to links error, #{Kernel.inspect(e)}")
        []
      end
  end

  defp links_to_menu(links) do
    text = links |> Enum.with_index |> Enum.reduce("", fn {%{text: t, url: u}, index}, acc ->
      acc <> Kernel.inspect(index) <> ":\n - URL: #{u}\n - Title: #{t}\n\n"
    end) |> String.slice(0..3500)
    IO.inspect("Menu:")
    IO.puts(text)
    text
  end

  defp links_from_choices(links, choices) do
    Enum.map(choices, &(Enum.at(links, &1)))
  end

  defp extract_information_if_possible(url, state) do

    # XXX This could be fucking better.
    # Mailtos, all span, div, pre, p texts.
    try do
      # XXX: Can't configure timeout in Readability, bleh.
      summary = Readability.summarize(url)
      info = chatgpt_extract_information(summary.title, summary.article_text, state)
      if info != %{} do
        convey(Kernel.inspect(info), state)
      end
    rescue
      e -> nil
    end

  end

  defp chatgpt_choose_from_link_menu(link_menu, state) do

    prompt = """
    You are a robot which communicates only in JSON. You do NOT ever output natural language.
    Of these indexed links, which would you choose to find '#{state.goal}'.
    Answer as a JSON list and ONLY a JSON list of numbers. Do not provide any additional friendlyness. Answer only as a JSON list of numbers. Include a maximum of 5 numbers in the list.
    If cannot answer or have not been provided with any information, that's okay, reply with "[]".

    Good examples:
    [1, 2, 3]
    [6, 22]
    []
    """

    custom_config = %{
      api_key: Map.get(state, :OAI_API_KEY, "oai_REPLACE_ME")
    }

    messages = [ %{role: "system", content: prompt}]
    messages = messages ++ [%{role: "user", content: link_menu}]

    case OpenAI.chat_completion(
      custom_config,
      model: Map.get(state, :model, "gpt-3.5-turbo"),
      messages: messages,
      max_tokens: get_state(state, :max_tokens, "1024"),
      temperature: get_state(state, :temperature, "0.1"),
      max_tokens: get_state(state, :max_tokens, "1024"),
      stop: get_state(state, :stop, "")
    )
    do
      {:ok, res} ->
        res_json = extract_chat_result(res)
        case JSON.decode(res_json) do
          {:ok, res} ->
            res
          {:error, _} ->
            IO.inspect("JSON error.")
            []
        end
      {:error, :timeout} ->
        IO.inspect("Agent Browser Timeout")
        broadcast_error(state, Kernel.inspect("OpenAI API Timeout"))
        []
      {:error, e} ->
        IO.inspect("Unexpcected error: " <> Kernel.inspect(e))
        IO.inspect(create_error_event(e, state.id))
        broadcast_error(state, Kernel.inspect(e))
        []
    end

  end

  defp chatgpt_extract_information(title, text, state) do

    prompt = """
    You are a robot which communicates only in JSON. You do NOT ever output natural language.
    Given this raw text extract and format any #{state.extract} related to #{state.goal}.
    Answer as a JSON object and ONLY as a JSON object. Do not provide any additional friendlyness. Answer only as a JSON object.
    If you cannot find any #{state.extract}, that's okay, reply with "{}".
    """

    content = "Page Title:\n#{title}\nPage Body:\n #{text}"

    custom_config = %{
      api_key: Map.get(state, :OAI_API_KEY, "oai_REPLACE_ME")
    }

    messages = [ %{role: "system", content: prompt}]
    messages = messages ++ [%{role: "user", content: content}]

    case OpenAI.chat_completion(
      custom_config,
      model: Map.get(state, :model, "gpt-3.5-turbo"),
      messages: messages,
      max_tokens: get_state(state, :max_tokens, "1024"),
      temperature: get_state(state, :temperature, "0.1"),
      max_tokens: get_state(state, :max_tokens, "1024"),
      stop: get_state(state, :stop, "")
    )
    do
      {:ok, res} ->
        res_json = extract_chat_result(res)
        case JSON.decode(res_json) do
          {:ok, res} ->
            res
          {:error, _} ->
            IO.inspect("JSON error.")
            %{}
        end
      {:error, :timeout} ->
        IO.inspect("Agent Browser Timeout")
        broadcast_error(state, Kernel.inspect("OpenAI API Timeout"))
        %{}
      {:error, e} ->
        IO.inspect("Unexpcected error: " <> Kernel.inspect(e))
        IO.inspect(create_error_event(e, state.id))
        broadcast_error(state, Kernel.inspect(e))
        %{}
    end

  end

  # defp playwright() do
        # {:ok, browser} = Playwright.launch(:chromium, %{headless: true})
    # page =
    #   browser |> Playwright.Browser.new_page()

    # page
    #   |> Playwright.Page.goto("https://html.duckduckgo.com/html/?q=example")

    # x = page
    #   |> Playwright.Page.text_content("body")

    # require IEx;
    # IEx.pry()

    # browser
    #   |> Playwright.Browser.close()
  # end

end