defmodule Zhuyin do
@moduledoc """
Utilities to deal with zhuyin syllables and groups thereof.
The main goal of this module is to provide functions to manipulate strings that contain zhuyin
words, which are potentially mixed with other content. These strings are represented by the
`t:zhuyin_list/0` type. Zhuyin lists can be obtained by parsing a string with the `read/2`,
`read!/2` or `sigil_z/2` functions. Afterwards, these lists can be converted into astring
representation by using `to_string/1`n.
A `t:zhuyin_list/0` is a list which contains strings and zhuyin structs (`t:t/0`). These structs
are used to encode zhuyin syllables; they can be created directly through the use of the
`from_string/1` or `from_string!/1` functions. Like `t:zhuyin_lists/0`, `t:t/0` structs can be
converted strings through the use of the `to_string/1`functions.
Additionally both `t:zhuyin_list/0` and zhuyin structs (`t:t/0`) can be created from or
converted to pinyin structs (`Pinyin.t:t/0`) using `from_pinyin/1` or `to_pinyin/1` or
"""
alias Zhuyin.Parsers
# ----- #
# Types #
# ----- #
@type t :: %__MODULE__{tone: 0..4, initial: String.t(), final: String.t()}
@enforce_keys [:final]
defstruct tone: 0, initial: "", final: ""
@typedoc """
List of zhuyin syllables mixed with plain strings.
"""
@type zhuyin_list :: [t() | String.t()]
# Ordered by which tone number they correspond to
zhuyin_tones = ["˙", "", "ˊ", "ˇ", "ˋ"]
@doc false
def _zhuyin_tones, do: unquote(zhuyin_tones)
@doc false
def _tone_for_index(idx)
for {tone, idx} <- Enum.with_index(zhuyin_tones) do
def _tone_for_index(unquote(idx)), do: unquote(tone)
end
# ------------------------- #
# Pinyin Mapping / Creation #
# ------------------------- #
@initials %{
"ㄅ" => "b",
"ㄆ" => "p",
"ㄇ" => "m",
"ㄈ" => "f",
"ㄉ" => "d",
"ㄊ" => "t",
"ㄋ" => "n",
"ㄌ" => "l",
"ㄍ" => "g",
"ㄎ" => "k",
"ㄏ" => "h",
"ㄐ" => "j",
"ㄑ" => "q",
"ㄒ" => "x",
"ㄓ" => "zh",
"ㄔ" => "ch",
"ㄕ" => "sh",
"ㄖ" => "r",
"ㄗ" => "z",
"ㄘ" => "c",
"ㄙ" => "s"
}
@reverse_initials Map.new(@initials, fn {key, val} -> {val, key} end)
# In pinyin standalone finals are spelled differently than when they are
# combined with an initial
@standalone_finals %{
"ㄧ" => "yi",
"ㄨ" => "wu",
"ㄩ" => "yu",
"ㄧㄚ" => "ya",
"ㄨㄚ" => "wa",
"ㄧㄥ" => "ying",
"ㄧㄤ" => "yang",
"ㄧㄝ" => "ye",
"ㄨㄛ" => "wo",
"ㄨㄥ" => "weng",
"ㄨㄤ" => "wang",
"ㄧㄠ" => "yao",
"ㄨㄞ" => "wai",
"ㄩㄝ" => "yue",
"ㄩㄥ" => "yong",
"ㄧㄡ" => "you",
"ㄨㄟ" => "wei",
"ㄧㄢ" => "yan",
"ㄨㄢ" => "wan",
"ㄩㄢ" => "yuan",
"ㄧㄣ" => "yin",
"ㄨㄣ" => "wen",
"ㄩㄣ" => "yun",
# Technically standalone initials. Parsed as standalone finals because it's easier to deal with
"ㄓ" => "zhi",
"ㄔ" => "chi",
"ㄕ" => "shi",
"ㄖ" => "ri",
"ㄗ" => "zi",
"ㄘ" => "ci",
"ㄙ" => "si",
# Standalone finals that are the same in Pinyin as when combined with an initial
"ㄦ" => "er",
"ㄢ" => "an"
}
@reverse_standalone_finals Map.new(@standalone_finals, fn {key, val} -> {val, key} end)
@finals %{
"ㄧ" => "i",
"ㄨ" => "u",
"ㄩ" => "v",
"ㄚ" => "a",
"ㄛ" => "o",
"ㄜ" => "e",
"ㄝ" => "e",
"ㄞ" => "ai",
"ㄟ" => "ei",
"ㄠ" => "ao",
"ㄡ" => "ou",
"ㄢ" => "an",
"ㄣ" => "en",
"ㄤ" => "ang",
"ㄥ" => "eng",
"ㄦ" => "er",
"ㄧㄚ" => "ia",
"ㄨㄚ" => "ua",
"ㄧㄥ" => "ing",
"ㄧㄤ" => "iang",
"ㄧㄝ" => "ie",
"ㄨㄛ" => "uo",
"ㄨㄥ" => "ong",
"ㄨㄤ" => "uang",
"ㄧㄠ" => "iao",
"ㄨㄞ" => "uai",
"ㄩㄝ" => "ve",
"ㄩㄥ" => "iong",
"ㄧㄡ" => "iu",
"ㄨㄟ" => "ui",
"ㄧㄢ" => "ian",
"ㄨㄢ" => "uan",
"ㄩㄢ" => "van",
"ㄧㄣ" => "in",
"ㄨㄣ" => "un",
"ㄩㄣ" => "vn"
}
@reverse_finals Map.new(@finals, fn {key, val} -> {val, key} end)
@doc """
Create pinyin structs from a zhuyin struct or list.
## Examples
iex> Zhuyin.to_pinyin(~z/ㄋㄧˇㄏㄠˇ/)
~p/nǐhǎo/
iex> Zhuyin.to_pinyin(%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3})
%Pinyin{initial: "n", final: "i", tone: 3}
"""
@spec to_pinyin(t() | zhuyin_list()) :: Pinyin.t() | Pinyin.pinyin_list()
def to_pinyin(zhuyin = %Zhuyin{}) do
if initial = @initials[zhuyin.initial] do
%Pinyin{initial: initial, final: Map.fetch!(@finals, zhuyin.final), tone: zhuyin.tone}
else
%Pinyin{initial: "", final: Map.fetch!(@standalone_finals, zhuyin.final), tone: zhuyin.tone}
end
end
def to_pinyin(list) when is_list(list) do
Enum.map(list, fn
z = %Zhuyin{} -> Zhuyin.to_pinyin(z)
str when is_binary(str) -> str
end)
end
@doc """
Create zhuyin structs from a pinyin struct or list.
## Examples
iex> Zhuyin.from_pinyin(~p/nǐhǎo/)
[%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}, %Zhuyin{initial: "ㄏ", final: "ㄠ", tone: 3}]
iex> Zhuyin.from_pinyin(%Pinyin{initial: "n", final: "i", tone: 3})
%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}
"""
@spec from_pinyin(Pinyin.t() | Pinyin.pinyin_list()) :: t()
def from_pinyin(pinyin = %Pinyin{}) do
if initial = @reverse_initials[pinyin.initial] do
%__MODULE__{initial: initial, final: @reverse_finals[pinyin.final], tone: pinyin.tone}
else
%__MODULE__{initial: "", final: @reverse_standalone_finals[pinyin.final], tone: pinyin.tone}
end
end
def from_pinyin(list) when is_list(list) do
list
|> Enum.map(fn
p = %Pinyin{} -> from_pinyin(p)
str when is_binary(str) -> str
end)
end
@doc """
Read a string and convert it into a list of strings and zhuyin structs.
This function reads a string containing zhuyin words mixed with normal text. The output of this
function is a list of strings and zhuyin structs. White space and punctuation will be separated
from other strings.
## Parse Modes
By default, this function only accepts strings which consists exclusively of zhuyin, whitespace
and punctuation. Parsing any text that cannot be interpreted as zhuyin will result in an error:
iex> Zhuyin.read("ㄋㄧˇㄏㄠˇ!")
{:ok, [%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}, %Zhuyin{initial: "ㄏ", final: "ㄠ", tone: 3}, "!"]}
iex> Zhuyin.read("ㄋㄧˇㄏㄠˇ, hello!")
{:error, "hello!"}
This behaviour can be tweaked if zhuyin mixed with regular text needs to be parsed; this can be
done by passing a `mode` to this function. There are 3 available modes:
- `:exclusive`: The default. Every character (except white space and punctuation) is
interpreted as zhuyin. If this is not possible, an error is returned.
- `:words`: Any word (i.e. a continuous part of the string that does not contain whitespace or
punctuation) is either interpreted as a sequence of zhuyin syllables or as non-pinyin text. If
a word contains any characters that cannot be interpreted as zhuyin, the whole word is
considered to be non-zhuyin text. This mode does not return errors.
- `:mixed`: Any word can contain a mixture of zhuyin and non-zhuyin characters. The read
function will interpret anything it can interpret as zhuyin as zhuyin and leaves the other
text unmodified. This is mainly useful to mix characters and zhuyin. It is recommend to use
the `:words` mode when possible instead of this mode, as this mode will often parse regular
text as zhuyin text. This mode does not return errors.
The following examples show the use of all three modes:
iex> Zhuyin.read("ㄋㄧˇㄏㄠˇ!", :exclusive)
{:ok, [%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}, %Zhuyin{initial: "ㄏ", final: "ㄠ", tone: 3}, "!"]}
iex> Zhuyin.read("ㄋㄧˇㄏㄠˇ, hello!", :exclusive)
{:error, "hello!"}
iex> Zhuyin.read("ㄋㄧˇ好, hello!", :exclusive)
{:error, "ㄋㄧˇ好, hello!"}
iex> Zhuyin.read("ㄋㄧˇㄏㄠˇ!", :words)
{:ok, [%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}, %Zhuyin{initial: "ㄏ", final: "ㄠ", tone: 3}, "!"]}
iex> Zhuyin.read("ㄋㄧˇㄏㄠˇ, hello!", :words)
{:ok, [%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}, %Zhuyin{initial: "ㄏ", final: "ㄠ", tone: 3}, ", ", "hello", "!"]}
iex> Zhuyin.read("ㄋㄧˇ好, hello!", :words)
{:ok, ["ㄋㄧˇ好", ", ", "hello", "!"]}
iex> Zhuyin.read("ㄋㄧˇㄏㄠˇ!", :mixed)
{:ok, [%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}, %Zhuyin{initial: "ㄏ", final: "ㄠ", tone: 3}, "!"]}
iex> Zhuyin.read("ㄋㄧˇㄏㄠˇ, hello!", :mixed)
{:ok, [%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}, %Zhuyin{initial: "ㄏ", final: "ㄠ", tone: 3}, ", ", "hello", "!"]}
iex> Zhuyin.read("ㄋㄧˇ好, hello!", :mixed)
{:ok, [%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}, "好", ", ", "hello", "!"]}
"""
@spec read(String.t(), :exclusive | :words | :mixed) ::
{:ok, zhuyin_list()} | {:error, String.t()}
def read(string, mode \\ :exclusive) when mode in [:exclusive, :words, :mixed] do
case mode do
:exclusive -> Zhuyin.Parsers.zhuyin_only(string)
:words -> Zhuyin.Parsers.zhuyin_words(string)
:mixed -> Zhuyin.Parsers.zhuyin_mix(string)
end
|> parser_result()
end
@doc """
Identical to `read/2`, but returns the result or a `ParseError`
## Examples
iex> Zhuyin.read!("ㄋㄧˇㄏㄠˇ!")
[%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}, %Zhuyin{initial: "ㄏ", final: "ㄠ", tone: 3}, "!"]
iex> Zhuyin.read!("ㄋㄧˇㄏㄠˇ, hello!")
** (ParseError) Error occurred when attempting to parse: "hello!"
iex> Zhuyin.read!("ㄋㄧˇㄏㄠˇ, hello!", :words)
[%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}, %Zhuyin{initial: "ㄏ", final: "ㄠ", tone: 3}, ", ", "hello", "!"]
"""
@spec read!(String.t(), :exclusive | :words | :mixed) ::
zhuyin_list() | no_return()
def read!(string, mode \\ :exclusive) when mode in [:exclusive, :words, :mixed] do
case read(string, mode) do
{:ok, res} -> res
{:error, remainder} -> raise ParseError, remainder
end
end
defp parser_result({:ok, res, "", %{}, _, _}), do: {:ok, res}
defp parser_result({:error, _, rem, %{}, _, _}), do: {:error, rem}
defp parser_result!({:ok, res, "", %{}, _, _}), do: res
defp parser_result!({:error, _, rem, %{}, _, _}), do: raise(ParseError, rem)
@doc """
Create a single zhuyin struct (`t:t/0`) from a string.
This function can be used to parse a single zhuyin syllable.
If parsing fails, an `{:error, <remainder of string>}` is returned, `<remainder of string>`
contains the part of the string which made parsing fail.
## Examples
iex> Zhuyin.from_string("ㄋㄧˇ")
{:ok, %Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}}
iex> Zhuyin.from_string("ㄋㄧˇㄏㄠˇ")
{:error, "ㄏㄠˇ"}
iex> Zhuyin.from_string("ㄋㄧˇhǎo")
{:error, "hǎo"}
"""
@spec from_string(String.t()) :: {:ok, t()} | {:error, String.t()}
def from_string(word) do
case word |> Parsers.syllable() |> parser_result() do
{:ok, [res]} -> {:ok, res}
err -> err
end
end
@doc """
Create a single zhuyin struct (`t:t/0`) from a string.
Like `from_string/1`, but returns the result or raises an exception if an error occurred while
parsing.
## Examples
iex> Zhuyin.from_string!("ㄋㄧˇ")
%Zhuyin{initial: "ㄋ", final: "ㄧ", tone: 3}
iex> Zhuyin.from_string!("ㄋㄧˇㄏㄠˇ")
** (ParseError) Error occurred when attempting to parse: "ㄏㄠˇ"
iex> Zhuyin.from_string!("ㄋㄧˇhǎo")
** (ParseError) Error occurred when attempting to parse: "hǎo"
"""
@spec from_string!(String.t()) :: t() | no_return()
def from_string!(word), do: word |> Parsers.syllable() |> parser_result!() |> hd()
@doc """
Sigil to create a zhuyin list or struct.
When used without any modifiers, this sigil converts its input into a zhuyin list through the
use of `read!/2` in `:exclusive` mode. The `w` and `m` modifiers can be used to use `:words` or
`:mixed` mode respectively.
When this sigil is called with the `s` modifier, a zhuyin struct is created by calling
`from_string!/1`.
## Examples
iex> ~z/ㄋㄧˇ/
[%Zhuyin{tone: 3, initial: "ㄋ", final: "ㄧ"}]
iex> ~z/ㄋㄧˇ hello/w
[%Zhuyin{tone: 3, initial: "ㄋ", final: "ㄧ"}, " ", "hello"]
iex> ~z/ㄋㄧˇ好/m
[%Zhuyin{tone: 3, initial: "ㄋ", final: "ㄧ"}, "好"]
iex> ~z/ㄋㄧˇ/s
%Zhuyin{tone: 3, initial: "ㄋ", final: "ㄧ"}
"""
defmacro sigil_z({:<<>>, _, [word]}, [?s]) when is_binary(word) do
Macro.escape(from_string!(word))
end
defmacro sigil_z({:<<>>, _, [string]}, mode)
when is_binary(string) and mode in [[], [?w], [?m]] do
mode =
case mode do
[?w] -> :words
[?m] -> :mixed
[] -> :exclusive
end
Macro.escape(read!(string, mode))
end
end
# --------- #
# Protocols #
# --------- #
defimpl String.Chars, for: Zhuyin do
def to_string(z = %Zhuyin{}), do: z.initial <> z.final <> Zhuyin._tone_for_index(z.tone)
end
defimpl List.Chars, for: Zhuyin do
def to_charlist(p = %Zhuyin{}), do: Kernel.to_charlist(to_string(p))
end
defimpl Inspect, for: Zhuyin do
import Inspect.Algebra
def inspect(p = %Zhuyin{}, _), do: concat(["#Zhuyin<", to_string(p), ">"])
end