lib/open_graph.ex

defmodule OpenGraph do
  @moduledoc """
  Parse websites to extract Open Graph meta tags.

  The example above shows how to fetch the GitHub Open Graph rich objects.

  ```
  Mix.install([
    {:opengraph_parser, "~> 0.4.4"},
    {:req, "~> 0.4.14"}
  ])

  Req.get!("https://github.com").body
  |> OpenGraph.parse(html)

  %OpenGraph{description: "GitHub is where people build software. More than 15 million...",
  image: "https://assets-cdn.github.com/images/modules/open_graph/github-octocat.png",
  site_name: "GitHub", title: "Build software better, together", type: nil,
  url: "https://github.com"}
  ```
  """

  # Basic fields
  defstruct [
    :title,
    :type,
    :image,
    :url,
    # Optional fields
    :description,
    :audio,
    :determiner,
    :locale,
    :site_name,
    :video,
    # Image fields
    :"image:secure_url",
    :"image:type",
    :"image:width",
    :"image:height",
    :"image:alt",
    # Video fields
    :"video:secure_url",
    :"video:type",
    :"video:width",
    :"video:height",
    :"video:alt",
    # Audio fields
    :"audio:secure_url",
    :"audio:type",
    # Book fields
    :"book:author",
    :"book:isbn",
    :"book:release_date",
    :"book:tag",
    :"price:amount",
    :"price:currency",
    :"product:price:amount",
    :"product:price:currency",
    :"fediverse:creator"
  ]

  @type t :: %OpenGraph{
          title: String.t() | nil,
          type: String.t() | nil,
          url: String.t() | nil,
          description: String.t() | nil,
          audio: String.t() | nil,
          determiner: String.t() | nil,
          locale: String.t() | nil,
          site_name: String.t() | nil,
          video: String.t() | nil,
          "image:secure_url": String.t() | nil,
          "image:type": String.t() | nil,
          "image:width": String.t() | nil,
          "image:height": String.t() | nil,
          "image:alt": String.t() | nil,
          "video:secure_url": String.t() | nil,
          "video:type": String.t() | nil,
          "video:width": String.t() | nil,
          "video:height": String.t() | nil,
          "video:alt": String.t() | nil,
          "audio:secure_url": String.t() | nil,
          "audio:type": String.t() | nil,
          "book:author": list(String.t()) | nil,
          "book:isbn": String.t() | nil,
          "book:release_date": String.t() | nil,
          "book:tag": list(String.t()) | nil,
          "price:amount": String.t() | nil,
          "price:currency": String.t() | nil,
          "product:price:amount": String.t() | nil,
          "product:price:currency": String.t() | nil,
          "fediverse:creator": String.t() | nil
        }

  @doc """
  Parses the given HTML to extract the Open Graph objects.

  Args:
    * `html` - raw HTML as a binary string or char list

  This functions returns an OpenGraph struct.
  """

  @spec parse(String.t()) :: t()
  def parse(html) when is_binary(html) or is_list(html) do
    {:ok, document} = Floki.parse_document(html)

    opengraph_tags = find_opengraph_tags(document)
    other_meta_tags = find_other_tags(document)

    struct(OpenGraph, opengraph_tags ++ other_meta_tags)
  end

  defp find_opengraph_tags(document) do
    allowed_keys = get_allowed_keys()

    document
    |> Floki.find("meta")
    |> Enum.filter(fn metatag ->
      if Floki.attribute(metatag, "property") != nil do
        property = Floki.attribute(metatag, "property") |> List.first()
        filter_og_metatags(property)
      else
        false
      end
    end)
    |> Enum.flat_map(fn x -> format(x, "property") end)
    |> Enum.flat_map(fn x -> replace_books_with_book(x) end)
    |> Enum.reduce(%{}, fn {key, value}, acc ->
      if Enum.member?(["book:tag", "book:author"], key) do
        array = Map.get(acc, key, [])
        Map.merge(acc, %{key => Enum.concat(array, [value])})
      else
        Map.merge(acc, %{key => value})
      end
    end)
    |> Enum.filter(fn {key, value} ->
      value != nil && Enum.member?(allowed_keys, key)
    end)
    |> Enum.map(fn {key, value} ->
      {String.to_atom(key), value}
    end)
  end

  defp find_other_tags(document) do
    allowed_keys = get_allowed_keys()

    document
    |> Floki.find("meta")
    |> Enum.filter(fn metatag ->
      if Floki.attribute(metatag, "name") != nil do
        name = Floki.attribute(metatag, "name") |> List.first()
        filter_name_metatags(name)
      else
        false
      end
    end)
    |> Enum.flat_map(fn x -> format(x, "name") end)
    |> Enum.filter(fn {key, value} ->
      value != nil && Enum.member?(allowed_keys, key)
    end)
    |> Enum.map(fn {key, value} ->
      {String.to_atom(key), value}
    end)
  end

  defp format(metatag, key) do
    property = Floki.attribute(metatag, key) |> List.first() |> drop_og_prefix()

    content = Floki.attribute(metatag, "content") |> List.first()
    [{property, content}]
  end

  defp replace_books_with_book({key_string, value}) do
    if String.starts_with?(key_string, "books:") do
      new_key = key_string |> String.replace(~r/^books:/, "book:")
      [{new_key, value}]
    else
      [{key_string, value}]
    end
  end

  defp filter_og_metatags("og:" <> _property), do: true
  defp filter_og_metatags("book:" <> _property), do: true
  defp filter_og_metatags("books:" <> _property), do: true
  defp filter_og_metatags("product:" <> _property), do: true
  defp filter_og_metatags(_), do: false

  defp filter_name_metatags("fediverse:creator"), do: true
  defp filter_name_metatags(_), do: false

  defp drop_og_prefix("og:" <> property), do: property
  defp drop_og_prefix(property), do: property

  defp get_allowed_keys do
    Map.keys(OpenGraph.__struct__())
    |> Enum.map(&Atom.to_string(&1))
    |> Enum.filter(fn x -> x !== "__struct__" end)
  end
end