lib/elixpose.ex

defmodule Elixpose do
  @moduledoc """
  `Elixpose` helps you to scrap web pages.
    It shows you a lot of information about the page.
  """

  @doc """
    Count the CSS referenced

    ## Examples

        iex> Elixpose.count_css("https://pt.stackoverflow.com/")
        3
  """
  def count_css(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      document
      |> Floki.find("link[rel=stylesheet]")
      |> Enum.count()
      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

  @doc """
    Count the JS referenced

    ## Examples

        iex> Elixpose.count_js("https://pt.stackoverflow.com/")
        19
  """
  def count_js(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      document
      |> Floki.find("script")
      |> Enum.count()
      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

  @doc """
    Count the HTML Elements

    ## Examples

        iex> Elixpose.count_html_elements("https://pt.stackoverflow.com/")
        1
  """
  def count_html_elements(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      document
      |> Floki.find("html")
      |> Enum.count()
      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

   @doc """
    Count the HTML Elements

    ## Examples

        iex> Elixpose.count_meta_tags("https://pt.stackoverflow.com/")
        12
  """
  def count_meta_tags(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      document
      |> Floki.find("meta")
      |> Enum.count()
      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

   @doc """
    Count the OnClick events

    ## Examples

        iex> Elixpose.count_onclick_events("https://pt.stackoverflow.com/")
        15
  """
  def count_onclick_events(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      document
      |> Floki.find("body")
      |> Floki.find("[onclick]")
      |> Enum.count()

      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

  @doc """
    Count the amount of Forms in the HTML

    ## Examples

        iex> Elixpose.count_forms("https://pt.stackoverflow.com/")
        1
  """
  def count_forms(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      document
      |> Floki.find("form")
      |> Enum.count()

      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

  @doc """
    Get the text content from <script> Html Tag
  """
  def get_js_content(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      document
      |> Floki.find("script:not([src])")
      |> Enum.map(fn {_chave, _valor1, valor2} -> valor2 end)

      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

  @doc """
    Get the text content from <style> Html Tag
  """
  def get_css_content(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      document
      |> Floki.find("style")
      |> Enum.map(fn {_chave, _valor1, valor2} -> valor2 end)

      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

  @doc """
    Return information about the form tag
  """
  def get_forms_info(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      document
      |> Floki.find("form")
      |> Enum.map(fn {_key, value1, _value2} -> value1 end)

      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

   @doc """
    Returns the page size in kb

    ## Examples

        iex> Elixpose.get_page_size("https://pt.stackoverflow.com/")
        101
  """
  def get_page_size(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code, headers: content_header}} -> {code, content_header, raw_body}
      #html = raw_body
      page_size = div(String.length(raw_body), 1024)

      page_size

      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

  @spec get_onclick_values(binary, any) :: list | {:error, any}
  @doc """
    Returns the OnClick values
  """
  def get_onclick_values(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      document
      |> Floki.find("body")
      |> Floki.find("[onclick]")
      |> Enum.map(fn {_key, value1, _value2} -> value1 end)

      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

   @doc """
    Check if some JS file has an Ajax call

    ## Examples

        iex> Elixpose.has_ajax_call?("https://pt.stackoverflow.com/")
        true
  """
  def has_ajax_call?(url, headers \\ []) do
    case HTTPoison.get(url, headers) do
      {:ok, %{body: raw_body, status_code: code}} -> {code, raw_body}
      html = raw_body
      {:ok, document} = Floki.parse_document(html)

      total= document
      |> Floki.find("head")
      |> Floki.find("script[src*='ajax']")
      |> Enum.count()

      if total > 0 do
        true
      else
        false
      end

      {:error, %{reason: reason}} -> {:error, reason}
    end
  end

   @doc """
    Get the JSON with the amount of elements
  """
  def get_json_report(url, headers \\ []) do

    %Report{id: make_ref(),
              amount_css: count_css(url, headers),
              amount_js: count_js(url, headers),
              amount_html_elements: count_html_elements(url, headers),
              amount_meta: count_meta_tags(url, headers),
              amount_js_events: count_onclick_events(url, headers),
              amount_forms: count_forms(url, headers),
              amount_forms_info: get_forms_info(url, headers) |> length(),
              amount_js_content: get_js_content(url, headers) |> length(),
              amount_onclick_events: get_onclick_values(url, headers) |> length(),
              form_info: get_forms_info(url, headers),
              onclick_values: get_onclick_values(url, headers),
              has_ajax_call: has_ajax_call?(url, headers)}
    |> JSON.encode()

  end

end