lib/name_case.ex

defmodule NameCase do
  @mac_prefix_name_exceptions [
    # Lithuanian
    {~r/\bMacEvicius/, "Macevicius"},
    # Portuguese
    {~r/\bMacHado/, "Machado"},
    {~r/\bMacEdo/, "Macedo"},
    {~r/\bMacHar/, "Machar"},
    {~r/\bMacHin/, "Machin"},
    {~r/\bMacHlin/, "Machlin"},
    {~r/\bMacIas/, "Macias"},
    {~r/\bMacIulis/, "Maciulis"},
    {~r/\bMacKie/, "Mackie"},
    {~r/\bMacKle/, "Mackle"},
    {~r/\bMacKlin/, "Macklin"},
    {~r/\bMacKmin/, "Mackmin"},
    {~r/\bMacQuarie/, "Macquarie"},
    {~r/\bMacOmber/, "Macomber"},
    {~r/\bMacIn/, "Macin"},
    {~r/\bMacKintosh/, "Mackintosh"},
    {~r/\bMacKen/, "Macken"},
    {~r/\bMacHen/, "Machen"},
    {~r/\bMacisaac/, "MacIsaac"},
    {~r/\bMacHiel/, "Machiel"},
    {~r/\bMacIol/, "Maciol"},
    {~r/\bMacKell/, "Mackell"},
    {~r/\bMacKlem/, "Macklem"},
    {~r/\bMacKrell/, "Mackrell"},
    {~r/\bMacLin/, "Maclin"},
    {~r/\bMacKey/, "Mackey"},
    {~r/\bMacKley/, "Mackley"},
    {~r/\bMacHell/, "Machell"},
    {~r/\bMacHon/, "Machon"}
  ]

  @general_replacements [
    # al Arabic or forename Al.
    {~r/\bAl(?=\s+\w)/, "al"},
    # ap Welsh.
    {~r/\bAp\b/, "ap"},
    # bin, binti, binte Arabic.
    {~r/\b(Bin|Binti|Binte)\b/, "bin"},
    # ben Hebrew or forename Ben.
    {~r/\bBen(?=\s+\w)/, "ben"},
    # bat Hebrew or forename Bat.
    {~r/\bBat(?=\s+\w)/, "bat"},
    # della and delle Italian.
    {~r/\bDell([ae])\b/, "dell\\1"},
    # da, de, di Italian; du French; do Brasil.
    {~r/\bD([aeiou])\b/, "d\\1"},
    # das, dos Brasileiros.
    {~r/\bD([ao]s)\b/, "d\\1"},
    # del Italian; der/den Dutch/Flemish.
    {~r/\bDe([lrn])\b/, "de\\1"},
    # el Greek or El Spanish.
    {~r/\bEl\b/, "el"},
    # la French or La Spanish.
    {~r/\bLa\b/, "la"},
    # lo Italian; le French.
    {~r/\bL([eo])\b/, "l\\1"},
    # ten, ter Dutch/Flemish.
    {~r/\bTe([rn])\b/, "te\\1"},
    # van German or forename Van.
    {~r/\bVan(?=\s+\w)/, "van"},
    # von Dutch/Flemish.
    {~r/\bVon\b/, "von"}
  ]

  @spanish_conjunctions ~w(Y E I)

  @roman_regex ~r/\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b/

  @post_nominals ~w(
    ACILEx ACSM ADC AEPC AFC AFM AICSM AKC AM ARBRIBA ARCS ARRC ARSM AUH AUS
    BA BArch BCh BChir BCL BDS BEd BEM BEng BM BS BSc BSW BVM&S BVScBVetMed
    CB CBE CEng CertHE CGC CGM CH CIE CMarEngCMarSci CMarTech CMG CMILT CML CPhT CPLCTP CPM CQSW CSciTeach CSI CTL CVO
    DBE DBEnv DC DCB DCM DCMG DConstMgt DCVO DD DEM DFC DFM DIC Dip DipHE DipLP DipSW DL DLitt DLP DPhil DProf DPT DREst DSC DSM DSO DSocSci
    ED EdD EJLog EMLog EN EngD EngTech ERD ESLog
    FADO FAWM FBDOFCOptom FCEM FCILEx FCILT FCSP. FdAFdSc FdEng FFHOM FFPM FRCAFFPMRCA FRCGP FRCOG FRCP FRCPsych FRCS FRCVS FSCR.
    GBE GC GCB GCIE GCILEx GCMG GCSI GCVO GM
    HNC HNCert HND HNDip
    ICTTech IDSM IEng IMarEng IOMCPM ISO
    J JP JrLog
    KBE KC KCB KCIE KCMG KCSI KCVO KG KP KT
    LFHOM LG LJ LLB LLD LLM Log LPE LT LVO
    MA MAcc MAnth MArch MarEngTech MB MBA MBChB MBE MBEIOM MBiochem MC MCEM MCGI MCh. MChem MChiro MClinRes MComp MCOptom MCSM MCSP MD MEarthSc MEng MEnt MEP MFHOM MFin MFPM MGeol MILT MJur MLA MLitt MM MMath MMathStat MMORSE MMus MOst MP MPAMEd MPharm MPhil MPhys MRCGP MRCOG MRCP MRCPath MRCPCHFRCPCH MRCPsych MRCS MRCVS MRes MS MSc MScChiro MSci MSCR MSM MSocSc MSP MSt MSW MSYP MVO
    NPQH
    OBE OBI OM OND
    PgC PGCAP PGCE PgCert PGCHE PgCLTHE PgD PGDE PgDip PhD PLog PLS
    QAM QC QFSM QGM QHC QHDS QHNS QHP QHS QPM QS QTSCSci
    RD RFHN RGN RHV RIAI RIAS RM RMN RN RN1RNA RN2 RN3 RN4 RN5 RN6 RN7 RN8 RN9 RNC RNLD RNMH ROH RRC RSAW RSci RSciTech RSCN RSN RVM RVN
    SCHM SCJ SCLD SEN SGM SL SPANSPMH SPCC SPCN SPDN SPHP SPLD SrLog SRN SROT
    TD
    UD
    V100 V200 V300 VC VD VetMB VN VRD
  )

  @downcase_words ~w(The Of And)

  @default_options [
    lazy: true,
    mac_prefix: true,
    spanish: true,
    roman: true,
    post_nominals: true
  ]

  @doc """
  Returns a properly namecased `name`.

  ## Options

    * `:lazy`          - whether to skip mixed case names (defaults to `true`)
    * `:mac_prefix`    - whether to fix Mac/Mc prefix names (defaults to `true`)
    * `:spanish`       - whether to fix Spanish names (defaults to `true`)
    * `:roman`         - whether to fix roman numeral cases, eg. XI, XII (defaults to `true`)
    * `:post_nominals` - whether to fix post-nominal letter cases, eg. OBE, PhD (defaults to `true`)

  ## Examples

      iex> NameCase.nc("MCDONALDS")
      "McDonalds"

      iex> NameCase.nc("LEIGH-WILLIAMS")
      "Leigh-Williams"

      iex> NameCase.nc("ST. jOhN", lazy: false)
      "St. John"

  """
  def nc(name, opts \\ []) when is_binary(name) do
    opts = Keyword.merge(@default_options, opts)

    if opts[:lazy] && mixed_case?(name) do
      name
    else
      capitalize(name)
      |> update_mac_prefix(opts)
      |> update_general_replacements()
      |> update_roman(opts)
      |> update_spanish_conjunctions(opts)
      |> update_post_nominals(opts)
      |> update_downcase_words()
    end
  end

  defp update_roman(name, roman: false), do: name

  defp update_roman(name, _opts) do
    Regex.replace(@roman_regex, name, &String.upcase/1)
  end

  defp update_mac_prefix(name, irish: false), do: name

  defp update_mac_prefix(name, _opts) do
    name =
      if String.match?(name, ~r/.*?\bMac[A-Za-z]{2,}[^aciozj]\b/) ||
           String.match?(name, ~r/.*?\bMc/) do
        fix_mac_prefix_exceptions(name)
      else
        name
      end

    String.replace(name, "Macmurdo", "MacMurdo")
  end

  defp fix_mac_prefix_exceptions(name) do
    name =
      Regex.replace(~r/\b(Ma?c)([A-Za-z]+)/, name, fn _, mac_prefix, rest ->
        mac_prefix <> String.capitalize(rest)
      end)

    Enum.reduce(@mac_prefix_name_exceptions, name, fn {pattern, replacement}, acc ->
      String.replace(acc, pattern, replacement)
    end)
  end

  defp update_spanish_conjunctions(name, spanish: false), do: name

  defp update_spanish_conjunctions(name, _opts) do
    # Fix Spanish conjunctions.
    Enum.reduce(@spanish_conjunctions, name, fn conjunction, acc ->
      conjunction_regex = Regex.compile!("\\b#{conjunction}\\b")
      String.replace(acc, conjunction_regex, String.downcase(conjunction))
    end)
  end

  defp update_general_replacements(name) do
    Enum.reduce(@general_replacements, name, fn {pattern, replacement}, acc ->
      String.replace(acc, pattern, replacement)
    end)
  end

  defp update_post_nominals(name, post_nominals: false), do: name

  defp update_post_nominals(name, _opts) do
    Enum.reduce(@post_nominals, name, fn post_nominal, acc ->
      post_nominal_regex = Regex.compile!("\\b#{post_nominal}\\b", "ix")
      String.replace(acc, post_nominal_regex, post_nominal)
    end)
  end

  defp update_downcase_words(name) do
    Enum.reduce(@downcase_words, name, fn word, acc ->
      word_regex = Regex.compile!("\\b#{word}\\b")
      String.replace(acc, word_regex, String.downcase(acc))
    end)
  end

  defp capitalize(name) do
    name
    |> String.downcase()
    |> String.replace(~r/\b\w/, &String.upcase/1)
    # Lowercase 's
    |> String.replace(~r/'\w\b/, &String.downcase/1)
  end

  defp mixed_case?(name) do
    first_letter_lower? = String.match?(name, ~r/^[a-z]/)
    all_lower_or_upper? = String.downcase(name) == name || String.upcase(name) == name
    !(first_letter_lower? || all_lower_or_upper?)
  end
end