%% This Source Code Form is subject to the terms of the Mozilla Public
%% License, v. 2.0. If a copy of the MPL was not distributed with this
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
%%
%% Copyright © 2021-2022 VMware, Inc. or its affiliates. All rights reserved.
%%
%% @doc Khepri path API.
%%
%% A path is the type used by Khepri to reference nodes in the tree structure.
%% A path describes how to reach a node from the root node.
%%
%% A path, or <em>native path</em>, is a list of components. Components can be
%% Erlang atoms and binaries. Example:
%%
%% ```
%% %% Native path.
%% Path = [stock, wood, <<"oak">>].
%% '''
%%
%% A path may contain conditions to tune how a node is matched or to match
%% multiple nodes at once. This is called a <em>path pattern</em>. A path
%% pattern may contain conditions in addition to regular components (Erlang
%% atoms and binaries). See {@link khepri_condition} to learn more about
%% conditions. Example:
%%
%% ```
%% %% Path pattern with a condition on `wood'.
%% PathPattern = [stock,
%% #if_all{conditions = [wood,
%% #if_node_exists{exists = true}]},
%% oak].
%% '''
%%
%% To be user-friendly, string-based and binary-based <em>Unix-like paths</em>
%% are accepted by most functions. The syntax of these <em>Unix paths</em> is
%% described in the {@link unix_path()} type documentation. Example:
%%
%% ```
%% %% Unix path, equivalent of the first native path example.
%% UnixPath = "/:stock/:wood/oak".
%% '''
-module(khepri_path).
-include_lib("stdlib/include/assert.hrl").
-include("include/khepri.hrl").
-include("src/khepri_error.hrl").
-export([compile/1,
from_string/1,
from_binary/1,
to_string/1,
to_binary/1,
sigil_p/2,
sigil_P/2,
combine_with_conditions/2,
targets_specific_node/1,
component_targets_specific_node/1,
is_valid/1,
ensure_is_valid/1,
abspath/2,
realpath/1,
pattern_includes_root_node/1]).
-ifdef(TEST).
-export([component_to_string/1]).
-endif.
-type node_id() :: atom() | binary().
%% A node name.
-type component() :: node_id() |
?KHEPRI_ROOT_NODE |
?THIS_KHEPRI_NODE |
?PARENT_KHEPRI_NODE.
%% Component name in a path to a node.
-type native_path() :: [component()].
%% Native path to a node.
%%
%% A native path is a list of atoms, binaries and special components.
%%
%% It is called <em>native</em> because it requires no further processing
%% (unlike {@link unix_path()}) and is the format used internally by the state
%% machine.
%%
%% Special components are:
%% <ol>
%% <li>`?KHEPRI_ROOT_NODE' to explicitly mark the root node. A path is absolute
%% by default. Using `?KHEPRI_ROOT_NODE' is only useful when manipulating the
%% root node itself (querying it or storing something in the root node).</li>
%% <li>`?THIS_KHEPRI_NODE' to make a relative path (the default being an
%% absolute path). This is mostly useful for {@link
%% khepri_condition:keep_while()} to make it easy to put a condition on the
%% node itself.</li>
%% <li>`?PARENT_KHEPRI_NODE' to target the parent of a node, with the same
%% benefits and use cases as `?THIS_KHEPRI_NODE'.</li>
%% </ol>
%%
%% Example:
%%
%% ```
%% %% Native path.
%% Path = [stock, wood, <<"oak">>].
%% '''
-type native_pattern() :: [pattern_component()].
%% Path pattern which may match zero, one or more nodes.
%%
%% A native pattern is a list of atoms, binaries, special components and
%% conditions.
%%
%% It is called <em>native</em> because it requires no further processing
%% (unlike {@link unix_pattern()}) and is the format used internally by the
%% state machine.
%%
%% See {@link native_path()} for a description of special components.
%%
%% Conditions are any condition defined by {@link
%% khepri_condition:condition()}.
%%
%% Example:
%%
%% ```
%% %% Path pattern with a condition on `wood'.
%% PathPattern = [stock,
%% #if_all{conditions = [wood,
%% #if_node_exists{exists = true}]},
%% oak].
%% '''
-type unix_path() :: string() | binary().
%% Unix-like path to a node.
%%
%% These <em>Unix paths</em> have the following syntax:
%%
%% <ul>
%% <li>Path components are separated by a forward slash, `/'.</li>
%% <li>Atom-based node IDs are prefixed with a `:' character: `:wood'.</li>
%% <li>Binary-based node IDs are written as-is: `oak'.</li>
%% <li>Atom and binaries can be percent-encoded.</li>
%% <li>An absolute path must start with `/', otherwise it is considered a
%% relative path</li>
%% <li>`.' and `..' represent `?THIS_KHEPRI_NODE' and `?PARENT_KHEPRI_NODE'
%% respectively</li>
%% <li>Simple glob patterns are accepted:
%% <ul>
%% <li>`abc*def' is the same as `#if_name_matches{regex = "^abc.*def$"}'</li>
%% <li>`*' is the same as `?KHEPRI_WILDCARD_STAR' or `#if_name_matches{regex =
%% any}'</li>
%% <li>`**' is the same as `?KHEPRI_WILDCARD_STAR_STAR' or
%% `if_path_matches{regex = any}'</li>
%% </ul></li>
%% </ul>
%%
%% <strong>Warning</strong>: There is no special handling of Unicode in tree
%% node names. To use Unicode, it is recommended to either use a native path or
%% a binary-based Unix-like path. If using a string-based Unix-like path, the
%% behavior is undefined and the call may crash. Matching against node names is
%% also undefined behavior and may crash, regardless of the type of path being
%% used. It will be improved in the future.
%%
%% Example:
%% ```
%% %% Unix path, equivalent of the first native path example.
%% UnixPath = "/:stock/:wood/oak".
%% '''
-type unix_pattern() :: string() | binary().
%% Unix-like path pattern to a node.
%%
%% It accepts the following special characters:
%% <ol>
%% <li>`*' anywhere in a path component behaves like a {@link
%% khepri_condition:if_name_matches()}.</li>
%% <li>`**' as a path component behaves like a {@link
%% khepri_condition:if_path_matches()}.</li>
%% </ol>
%%
%% A Unix-like path pattern can't express all the conditions of a native path
%% pattern currently.
%%
%% Otherwise it works as a {@link unix_path()} and has the same syntax and
%% limitations.
%%
%% Example:
%% ```
%% %% Unix path pattern, matching multiple types of oak.
%% UnixPathPattern = "/:stock/:wood/*oak".
%% '''
-type path() :: native_path() | unix_path().
%% Path to a node.
-type pattern() :: native_pattern() | unix_pattern().
%% Path pattern which may match zero, one or more nodes.
-type pattern_component() :: component() | khepri_condition:condition().
%% Path pattern component which may match zero, one or more nodes.
-export_type([path/0,
native_path/0,
unix_path/0,
pattern/0,
native_pattern/0,
unix_pattern/0,
component/0,
pattern_component/0,
node_id/0]).
-define(
reject_invalid_path(Path),
?khepri_misuse(invalid_path, #{path => Path})).
-define(
reject_invalid_path(Path, Component),
?khepri_misuse(invalid_path, #{path => Path,
component => Component})).
-spec compile(PathPattern) -> PathPattern when
PathPattern :: native_pattern().
%% @private
compile(PathPattern) ->
lists:map(fun khepri_condition:compile/1, PathPattern).
-spec from_string(String) -> PathPattern when
String :: pattern(),
PathPattern :: native_pattern().
%% @doc Converts a Unix-like path to a native path.
%%
%% The Unix-like string can be either an Erlang string or an Erlang binary.
%%
%% For convenience, a native path is also accepted and returned as-is.
from_string([$/, $/ | MaybeString]) ->
%% The path starts with two forward slashes. Therefore the path starts
%% with an empty binary.
from_string([$/ | MaybeString], [<<>>, ?KHEPRI_ROOT_NODE]);
from_string([$/ | MaybeString]) ->
from_string(MaybeString, [?KHEPRI_ROOT_NODE]);
from_string(MaybeString) when is_list(MaybeString) ->
from_string(MaybeString, []);
from_string(Binary) when is_binary(Binary) ->
String = erlang:binary_to_list(Binary),
from_string(String);
from_string(NotPath) ->
?reject_invalid_path(NotPath).
-spec from_binary(String) -> PathPattern when
String :: pattern(),
PathPattern :: native_pattern().
%% @doc Converts a Unix-like path to a native path.
%%
%% This is the same as calling `from_string(String)'. Therefore, it accepts
%% Erlang strings or binaries and native paths.
%%
%% @see from_string/1.
from_binary(MaybeString) ->
from_string(MaybeString).
-spec sigil_p(PathPattern, Options) -> NativePathPattern when
PathPattern :: pattern(),
Options :: [char()],
NativePathPattern :: native_pattern().
%% @doc Elixir sigil to parse Unix-like path using the `~p"/:path/:to/node"'
%% syntax.
%%
%% The lowercase `~p' sigil means that the string will go through
%% interpolation first before this function is called.
%%
%% @see sigil_P/2.
%%
%% @private
sigil_p(PathPattern, _Options) ->
from_string(PathPattern).
-spec sigil_P(PathPattern, Options) -> NativePathPattern when
PathPattern :: pattern(),
Options :: [char()],
NativePathPattern :: native_pattern().
%% @doc Elixir sigil to parse Unix-like path using the `~P"/:path/:to/node"'
%% syntax.
%%
%% The uppercase `~P' sigil means that the string will NOT go through
%% interpolation first before this function is called.
%%
%% @see sigil_p/2.
%%
%% @private
sigil_P(PathPattern, _Options) ->
from_string(PathPattern).
from_string([Component | _] = Rest, ReversedPath)
when ?IS_KHEPRI_NODE_ID(Component) orelse
?IS_KHEPRI_CONDITION(Component) ->
finalize_path(Rest, ReversedPath);
from_string([Char, Component | _] = Rest, ReversedPath)
when ?IS_SPECIAL_KHEPRI_PATH_COMPONENT(Char) andalso
(?IS_KHEPRI_NODE_ID(Component) orelse
?IS_KHEPRI_CONDITION(Component)) ->
finalize_path(Rest, ReversedPath);
from_string([?PARENT_KHEPRI_NODE, $/ | _] = Rest, ReversedPath) ->
%% If the character used to represent the parent node in a regular path
%% (`^') appears alone in a path component, it's a regular path. Other
%% special path components may appear alone in both forms though.
finalize_path(Rest, ReversedPath);
from_string([Char] = Rest, [] = ReversedPath)
when ?IS_SPECIAL_KHEPRI_PATH_COMPONENT(Char) ->
finalize_path(Rest, ReversedPath);
from_string([$/, $/ | Rest], ReversedPath) ->
%% Two consecutive forward slashes mean there is an empty binary
%% component.
ReversedPath1 = prepend_component(<<>>, ReversedPath),
from_string([$/ | Rest], ReversedPath1);
from_string([$/ | Rest], ReversedPath) ->
from_string(Rest, ReversedPath);
from_string([$: | Rest], ReversedPath) ->
parse_atom_from_string(Rest, ReversedPath);
from_string([Char | _] = Rest, ReversedPath) when is_integer(Char) ->
parse_binary_from_string(Rest, ReversedPath);
from_string([], ReversedPath) ->
finalize_path([], ReversedPath);
from_string(Rest, ReversedPath) ->
NotPath = lists:reverse(ReversedPath) ++ Rest,
?reject_invalid_path(NotPath).
parse_atom_from_string(Rest, ReversedPath) ->
parse_atom_from_string(Rest, "", ReversedPath).
parse_atom_from_string([$/ | _] = Rest, Acc, ReversedPath) ->
Component = finalize_atom_component(Acc),
ReversedPath1 = prepend_component(Component, ReversedPath),
from_string(Rest, ReversedPath1);
parse_atom_from_string([Char | Rest], Acc, ReversedPath)
when is_integer(Char) ->
Acc1 = [Char | Acc],
parse_atom_from_string(Rest, Acc1, ReversedPath);
parse_atom_from_string([] = Rest, Acc, ReversedPath) ->
Component = finalize_atom_component(Acc),
ReversedPath1 = prepend_component(Component, ReversedPath),
from_string(Rest, ReversedPath1).
finalize_atom_component(Acc) ->
Acc1 = lists:reverse(Acc),
Acc2 = percent_decode_string(Acc1),
erlang:list_to_atom(Acc2).
parse_binary_from_string(Rest, ReversedPath) ->
parse_binary_from_string(Rest, "", ReversedPath).
parse_binary_from_string([$/ | _] = Rest, Acc, ReversedPath) ->
Component = finalize_binary_componenent(Acc),
ReversedPath1 = prepend_component(Component, ReversedPath),
from_string(Rest, ReversedPath1);
parse_binary_from_string([Char | Rest], Acc, ReversedPath)
when is_integer(Char) ->
Acc1 = [Char | Acc],
parse_binary_from_string(Rest, Acc1, ReversedPath);
parse_binary_from_string([] = Rest, Acc, ReversedPath) ->
Component = finalize_binary_componenent(Acc),
ReversedPath1 = prepend_component(Component, ReversedPath),
from_string(Rest, ReversedPath1).
finalize_binary_componenent(Acc) ->
Acc1 = lists:reverse(Acc),
case Acc1 of
"." ->
?THIS_KHEPRI_NODE;
".." ->
?PARENT_KHEPRI_NODE;
"*" ->
?KHEPRI_WILDCARD_STAR;
"**" ->
?KHEPRI_WILDCARD_STAR_STAR;
_ ->
Acc2 = percent_decode_string(Acc1),
case re:run(Acc2, "\\*", [{capture, none}]) of
match ->
ReOpts = [global, {return, list}],
Regex = re:replace(Acc2, "\\*", ".*", ReOpts),
#if_name_matches{regex = "^" ++ Regex ++ "$"};
nomatch ->
erlang:list_to_binary(Acc2)
end
end.
prepend_component(Component, []) when ?IS_KHEPRI_NODE_ID(Component) ->
%% This is a relative path.
[Component, ?THIS_KHEPRI_NODE];
prepend_component(Component, ReversedPath) ->
[Component | ReversedPath].
finalize_path(Rest, []) ->
Rest;
finalize_path(Rest, ReversedPath) ->
case lists:reverse(ReversedPath) ++ Rest of
[?KHEPRI_ROOT_NODE | Path] -> Path;
Path -> Path
end.
-spec to_string(NativePath) -> UnixPath when
NativePath :: native_path(),
UnixPath :: string().
%% @doc Converts a native path to a string.
to_string([] = Path) ->
to_string(Path, "/", false);
to_string([?KHEPRI_ROOT_NODE | Path]) ->
to_string(Path, "/", false);
to_string([?THIS_KHEPRI_NODE] = Path) ->
to_string(Path, "", false);
to_string([?THIS_KHEPRI_NODE, <<>> | _] = Path) ->
%% Special case: a relative path starting with an empty binary. We need to
%% keep the leading '.' because we rely on forward slashes to "encode" the
%% empty binary. If we don't keep the '.', the path will become absolute.
to_string(Path, "", false);
to_string([?THIS_KHEPRI_NODE | Path]) ->
to_string(Path, "", false);
to_string([?PARENT_KHEPRI_NODE | _] = Path) ->
to_string(Path, "", false);
to_string(Path) ->
to_string(Path, "", true).
to_string([<<>> = Component], Result, NeedSlash) ->
Component1 = component_to_string(Component),
Result1 = append_string_component(Component1, Result, NeedSlash) ++ [$/],
Result1;
to_string([Component | Rest], Result, NeedSlash) ->
Component1 = component_to_string(Component),
Result1 = append_string_component(Component1, Result, NeedSlash),
to_string(Rest, Result1, true);
to_string([], Result, _NeedSlash) ->
Result.
append_string_component(Component, Result, true) ->
Result ++ [$/ | Component];
append_string_component(Component, Result, false) ->
Result ++ Component.
-spec to_binary(NativePath) -> UnixPath when
NativePath :: native_path(),
UnixPath :: binary().
%% @doc Converts a native path to a binary.
to_binary(Path) ->
String = to_string(Path),
erlang:list_to_binary(String).
-spec component_to_string(component()) -> string().
%% @private
component_to_string(?KHEPRI_ROOT_NODE) ->
"/";
component_to_string(?THIS_KHEPRI_NODE) ->
".";
component_to_string(?PARENT_KHEPRI_NODE) ->
"..";
component_to_string(Component) when is_atom(Component) ->
":" ++ percent_encode_string(erlang:atom_to_list(Component));
component_to_string(Component) when is_binary(Component) ->
percent_encode_string(erlang:binary_to_list(Component)).
-define(IS_HEX(Digit), (is_integer(Digit) andalso
((Digit >= $0 andalso Digit =< $9) orelse
(Digit >= $A andalso Digit =< $F) orelse
(Digit >= $a andalso Digit =< $f)))).
percent_decode_string(String) when is_list(String) ->
percent_decode_string(String, "").
percent_decode_string([$%, Digit1, Digit2 | Rest], PercentDecoded)
when ?IS_HEX(Digit1) andalso ?IS_HEX(Digit2) ->
Char = erlang:list_to_integer([Digit1, Digit2], 16),
PercentDecoded1 = PercentDecoded ++ [Char],
percent_decode_string(Rest, PercentDecoded1);
percent_decode_string([Char | Rest], PercentDecoded) ->
PercentDecoded1 = PercentDecoded ++ [Char],
percent_decode_string(Rest, PercentDecoded1);
percent_decode_string([], PercentDecoded) ->
PercentDecoded.
percent_encode_string(String) when is_list(String) ->
percent_encode_string(String, "").
percent_encode_string([Char | Rest], PercentEncoded)
when is_integer(Char) andalso
((Char >= $A andalso Char =< $Z) orelse
(Char >= $a andalso Char =< $z) orelse
(Char >= $0 andalso Char =< $9) orelse
(Char =:= $. andalso PercentEncoded =/= "") orelse
Char =:= $- orelse Char =:= $_ orelse Char =:= $~) ->
PercentEncoded1 = PercentEncoded ++ [Char],
percent_encode_string(Rest, PercentEncoded1);
percent_encode_string([Char | Rest], PercentEncoded) ->
PEChar = lists:flatten(io_lib:format("%~2.16.0B", [Char])),
PercentEncoded1 = PercentEncoded ++ PEChar,
percent_encode_string(Rest, PercentEncoded1);
percent_encode_string([], PercentEncoded) ->
PercentEncoded.
-spec combine_with_conditions(PathPattern, Conditions) -> PathPattern when
PathPattern :: native_pattern(),
Conditions :: [khepri_condition:condition()].
combine_with_conditions(Path, []) ->
Path;
combine_with_conditions(Path, Conditions) ->
[ChildName | Rest] = lists:reverse(Path),
Combined = #if_all{conditions = [ChildName | Conditions]},
lists:reverse([Combined | Rest]).
-spec targets_specific_node(PathPattern) -> Ret when
PathPattern :: native_pattern(),
Ret :: {true, Path} | false,
Path :: native_path().
targets_specific_node(PathPattern) ->
targets_specific_node(PathPattern, []).
targets_specific_node([Condition | Rest], Path) ->
case component_targets_specific_node(Condition) of
{true, Component} -> targets_specific_node(Rest, [Component | Path]);
false -> false
end;
targets_specific_node([], Path) ->
{true, lists:reverse(Path)}.
-spec component_targets_specific_node(ComponentPattern) -> Ret when
ComponentPattern :: pattern_component(),
Ret :: {true, Component} | false,
Component :: component().
%% @private
component_targets_specific_node(ChildName)
when ?IS_KHEPRI_PATH_COMPONENT(ChildName) ->
{true, ChildName};
component_targets_specific_node(#if_not{condition = Cond}) ->
component_targets_specific_node(Cond);
component_targets_specific_node(#if_all{conditions = []}) ->
false;
component_targets_specific_node(#if_all{conditions = Conds}) ->
lists:foldl(
fun
(Cond, {true, _} = True) ->
case component_targets_specific_node(Cond) of
True -> True;
{true, _} -> false;
false -> True
end;
(Cond, false) ->
case component_targets_specific_node(Cond) of
{true, _} = True -> True;
false -> false
end;
(Cond, undefined) ->
component_targets_specific_node(Cond)
end, undefined, Conds);
component_targets_specific_node(#if_any{conditions = []}) ->
false;
component_targets_specific_node(#if_any{conditions = Conds}) ->
lists:foldl(
fun
(Cond, {true, _} = True) ->
case component_targets_specific_node(Cond) of
True -> True;
{true, _} -> false;
false -> false
end;
(_, false) ->
false;
(Cond, undefined) ->
component_targets_specific_node(Cond)
end, undefined, Conds);
component_targets_specific_node(_) ->
false.
-spec is_valid(PathPattern) -> IsValid when
PathPattern :: native_pattern(),
IsValid :: true | {false, ComponentPattern},
ComponentPattern :: pattern_component().
is_valid(PathPattern) when is_list(PathPattern) ->
lists:foldl(
fun
(_, {false, _} = False) -> False;
(Component, _) -> khepri_condition:is_valid(Component)
end, true, PathPattern);
is_valid(NotPathPattern) ->
{false, NotPathPattern}.
-spec ensure_is_valid(PathPattern) -> ok | no_return() when
PathPattern :: native_pattern().
ensure_is_valid(PathPattern) ->
case is_valid(PathPattern) of
true ->
ok;
{false, Component} ->
?reject_invalid_path(PathPattern, Component)
end.
-spec abspath(Path, BasePath) -> Path when
Path :: native_pattern(),
BasePath :: native_pattern().
abspath([FirstComponent | _] = AbsolutePath, _)
when FirstComponent =/= ?THIS_KHEPRI_NODE andalso
FirstComponent =/= ?PARENT_KHEPRI_NODE ->
AbsolutePath;
abspath([_ | _] = RelativePath, BasePath) ->
realpath(BasePath ++ RelativePath, []);
abspath([] = PathToRoot, _) ->
PathToRoot.
-spec realpath(Path) -> Path when
Path :: native_pattern().
realpath(Path) ->
realpath(Path, []).
realpath([?KHEPRI_ROOT_NODE | Rest], _Result) ->
realpath(Rest, []);
realpath([?THIS_KHEPRI_NODE | Rest], Result) ->
realpath(Rest, Result);
realpath([?PARENT_KHEPRI_NODE | Rest], [_ | Result]) ->
realpath(Rest, Result);
realpath([?PARENT_KHEPRI_NODE | Rest], [] = Result) ->
realpath(Rest, Result);
realpath([Component | Rest], Result) ->
realpath(Rest, [Component | Result]);
realpath([], Result) ->
lists:reverse(Result).
pattern_includes_root_node(Path) ->
[] =:= realpath(Path).