src/cowmachine_util.erl

%% @author Justin Sheehy <justin@basho.com>
%% @author Andy Gross <andy@basho.com>
%% @copyright 2007-2008 Basho Technologies
%%
%% @doc Utilities for parsing, quoting, and negotiation.
%% @end
%%
%%    Licensed under the Apache License, Version 2.0 (the "License");
%%    you may not use this file except in compliance with the License.
%%    You may obtain a copy of the License at
%%
%%        http://www.apache.org/licenses/LICENSE-2.0
%%
%%    Unless required by applicable law or agreed to in writing, software
%%    distributed under the License is distributed on an "AS IS" BASIS,
%%    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%%    See the License for the specific language governing permissions and
%%    limitations under the License.

-module(cowmachine_util).

-export([is_valid_header/1, is_valid_header_value/1, valid_location/1]).
-export([parse_qs/1]).
-export([convert_request_date/1]).
-export([choose_media_type_provided/2]).
-export([is_media_type_accepted/2]).
-export([choose_media_type_accepted/2]).
-export([choose_charset/2]).
-export([choose_encoding/2]).
-export([parse_header/1]).
-export([normalize_content_type/1]).
-export([format_content_type/1]).
-export([quoted_string/1]).
-export([split_quoted_strings/1]).


%% @doc Check header valid characters, 
%% see <a href="https://www.ietf.org/rfc/rfc822.txt">rfc822</a>

-spec is_valid_header(Header) -> Result when
	Header :: binary(),
	Result :: boolean().
is_valid_header(<<>>) -> false;
is_valid_header(H) -> is_valid_header_1(H).

-spec is_valid_header_1(Header) -> Result when
	Header :: binary(),
	Result :: boolean().
is_valid_header_1(<<>>) -> true;
is_valid_header_1(<<$:, _/binary>>) -> false;
is_valid_header_1(<<C, _/binary>>) when C >= $A, C =< $Z -> false;
is_valid_header_1(<<C, R/binary>>) when C >= 33, C =< 126 -> is_valid_header_1(R);
is_valid_header_1(_) -> false.

%% @doc Check if the given value is acceptaple for a http header value.

-spec is_valid_header_value(Header) -> Result when
	Header :: binary(),
	Result :: boolean().
is_valid_header_value(<<>>) -> true;
is_valid_header_value(<<13, _/binary>>) -> false;
is_valid_header_value(<<10, _/binary>>) -> false;
is_valid_header_value(<<C, R/binary>>) when C =< 127 -> is_valid_header_value(R);
is_valid_header_value(_) -> false.


%% @doc Check if the given location is safe to use as a location header. This is
%% uses as a defense against urls with scripts. The test is quite strict and will
%% drop values that might have been acceptable.

-spec valid_location(Location) -> Result when
	Location :: binary(),
	Result :: {true, binary()} | false.
valid_location(Location) ->
    case is_valid_header_value(Location) of
        true ->
            case z_html:noscript(Location, false) of
                <<"#script-removed">> -> false;
                <<>> -> false;
                Loc -> {true, Loc}
            end;
        false ->
            false
    end.

%% @doc Parse the HTTP date (IMF-fixdate, rfc850, asctime).

-spec convert_request_date(Date) -> Result when
	Date :: binary(),
	Result :: calendar:datetime().
convert_request_date(Date) ->
    try
        cow_date:parse_date(Date)
    catch
        error:_ -> bad_date
    end.

%% @doc Match the `Accept' request header with `content_types_provided'.<br/>
%% Return the `Content-Type' for the response.<br/>
%% If there is no acceptable/available match, return the atom `none'.<br/>
%% `AcceptHead' is the value of the request's `Accept' header.<br/>
%% `Provided' is a list of media types the controller can provide:
%% <ul>
%% 	<li>a binary e.g.  `<<"text/html">>'<br/></li>
%% 	<li>a binary and parameters e.g. — `{<<"text/html">>,[{<<"level">>,<<"1">>}]}'</li>
%% 	<li>two binaries e.g. `{<<"text">>, <<"html">>}'</li>
%% 	<li>two binaries and parameters e.g. — `{<<"text">>,<<"html">>,[{<<"level">>,<<"1">>}]}'</li>
%% </ul>
%% (the plain string case with no parameters is much more common)

-spec choose_media_type_provided(Provided, AcceptHead) -> Result when
	Provided :: MediaTypes,
	MediaTypes :: [MediaType],
	MediaType :: cow_http_hd:media_type(),
	AcceptHead :: binary(),
	Result :: cow_http_hd:media_type() | none.
choose_media_type_provided(Provided, AcceptHead) when is_list(Provided), is_binary(AcceptHead) ->
    Requested = accept_header_to_media_types(AcceptHead),
    Prov1 = normalize_provided(Provided),
    choose_media_type_provided_1(Prov1, Requested).

-spec choose_media_type_provided_1(Provided, Requested) -> Result when
	Provided :: list(), 
	Requested :: [cow_http_hd:media_type()],
	Result :: cow_http_hd:media_type() | none.
choose_media_type_provided_1(_Provided, []) ->
    none;
choose_media_type_provided_1(Provided, [H|T]) ->
    case media_match_provided(H, Provided) of
        none -> choose_media_type_provided_1(Provided, T);
        CT -> CT
    end.

% @doc Return the first matching content type or the atom `none'.

-spec media_match_provided(MediaTypes, Provided) -> Result when
	MediaTypes :: {binary(), binary()} | {binary(), binary()} | any(), 
	Provided :: list(),
	Result :: cow_http_hd:media_type() | none.
media_match_provided(_, []) ->
    none;
media_match_provided({<<"*">>, <<"*">>, []}, [H|_]) ->
    H;
media_match_provided({TypeA, TypeB, Params}, Provided) ->
    case lists:dropwhile(
                fun({PT1,PT2,PP}) ->
                    not (media_type_match(TypeA, TypeB, PT1, PT2)
                         andalso media_params_match(Params, PP))
                end,
                Provided)
    of
        [] -> none;
        [M|_] -> M
    end.

%% @doc Match the `Content-Type' request header with `content_types_accepted' against.

-spec is_media_type_accepted(ContentTypesAccepted, ContentTypeReqHeader) -> Result when
	ContentTypesAccepted :: list(), 
	ContentTypeReqHeader :: cow_http_hd:media_type(),
	Result :: boolean().
is_media_type_accepted([], _ReqHeader) ->
    true;
is_media_type_accepted(ContentTypesAccepted, ContentTypeReqHeader) when is_list(ContentTypesAccepted), is_tuple(ContentTypeReqHeader) ->
    ContentTypesAccepted1 = normalize_provided(ContentTypesAccepted),
    choose_media_type_accepted_1(ContentTypesAccepted1, ContentTypeReqHeader) =/= none.

-spec choose_media_type_accepted(ContentTypesAccepted, ReqHeader) -> Result when
	ContentTypesAccepted :: [cowmachine_req:media_type()], 
	ReqHeader :: cow_http_hd:media_type(),
	Result :: cowmachine_req:media_type() | none.
choose_media_type_accepted([], ReqHeader) ->
    ReqHeader;
choose_media_type_accepted(ContentTypesAccepted, ContentTypeReqHeader) when is_list(ContentTypesAccepted), is_tuple(ContentTypeReqHeader) ->
    ContentTypesAccepted1 = normalize_provided(ContentTypesAccepted),
    choose_media_type_accepted_1(ContentTypesAccepted1, ContentTypeReqHeader).

-spec choose_media_type_accepted_1(ContentTypesAccepted, ReqHeader) -> Result when
	ContentTypesAccepted :: [cowmachine_req:media_type()], 
	ReqHeader :: cow_http_hd:media_type(),
	Result :: cowmachine_req:media_type() | none.
choose_media_type_accepted_1([], _CTReq) ->
    none;
choose_media_type_accepted_1([{A1,A2,APs} = AT|T], {CT1,CT2,CTPs} = CTReq) ->
    case media_type_match(A1, A2, CT1, CT2) of
        true ->
            case media_params_match(CTPs, APs) of
                true ->
                    AT;
                false ->
                    choose_media_type_accepted_1(T, CTReq)
            end;
        false ->
            choose_media_type_accepted_1(T, CTReq)
    end.


-spec media_type_match(Req1, Req2, Prov1, Prov2) -> Result when
	Req1 :: binary(), 
	Req2 :: binary(), 
	Prov1 :: binary(), 
	Prov2 :: binary(),
	Result :: boolean().
media_type_match(Req1, Req2, Req1, Req2) -> true;
media_type_match(<<"*">>, <<"*">>, _Prov1, _Prov2) -> true;
media_type_match(Req1, <<"*">>, Req1, _Prov2) -> true;
media_type_match(_Req1, _Req2, _Prov1, _Prov2) -> false.

%% @doc Match the media parameters. Provided must be a subset of requested.
%%      There may not be a type in provided that is not in requested.

-spec media_params_match(ReqList, ProvList) -> Result when
	ReqList :: list(), 
	ProvList :: list(),
	Result :: boolean().
media_params_match(_ReqList, []) -> true;
media_params_match(ReqList, ReqList) -> true;
media_params_match(ReqList, ProvList) ->
    lists:all(
        fun(Prov) ->
            lists:member(Prov, ReqList)
        end,
        ProvList).
% @private
% @doc Given the value of an accept header, produce an ordered list based on the q-values.
% The first result being the highest-priority requested type.

-spec accept_header_to_media_types(HeadVal) -> Result when
	HeadVal :: binary(),
	Result :: [cow_http_hd:media_type()].
accept_header_to_media_types(HeadVal) ->
    try
        MTs = cow_http_hd:parse_accept(HeadVal),
        Sorted = lists:reverse(lists:keysort(2, MTs)),
        [ MType || {MType, _Prio, _Extra} <- Sorted ]
    catch
        _:_ -> []
    end.

-spec normalize_provided(Provided) -> Result when
	Provided :: [cowmachine_req:media_type()],
	Result :: cow_http_hd:media_type().
normalize_provided(Provided) ->
    [ normalize_content_type(X) || X <- Provided ].

-spec normalize_content_type(Type) -> Result when
	Type :: cowmachine_req:media_type(),
	Result :: cow_http_hd:media_type().
normalize_content_type(Type) when is_binary(Type) ->
    cow_http_hd:parse_content_type(Type);
normalize_content_type({Type,Params}) when is_binary(Type), is_list(Params) ->
    {T1,T2, _} = cow_http_hd:parse_content_type(Type),
    {T1, T2, Params};
normalize_content_type({Type1,Type2}) when is_binary(Type1), is_binary(Type2) ->
    {Type1, Type2, []};
normalize_content_type({Type1,Type2,Params}) when is_binary(Type1), is_binary(Type2), is_list(Params) ->
    {Type1, Type2, Params}.

-spec format_content_type(MediaType) -> Result when
	MediaType :: cow_http_hd:media_type(),
	Result :: binary().
format_content_type({T1, T2, []}) ->
    <<T1/binary, $/, T2/binary>>;
format_content_type({T1, T2, Params}) ->
    ParamsBin = [ [$;, Param, $=, Value] || {Param,Value} <- Params ],
    iolist_to_binary([T1, $/, T2, ParamsBin]).

%% @doc Select the best fitting character set or `none'

-spec choose_charset(CSets, AccCharHdr) -> Result when
	CSets :: [binary()], 
	AccCharHdr :: binary(),
	Result :: none | binary().
choose_charset(CSets, AccCharHdr) ->
    do_choose(CSets, AccCharHdr, <<"utf-8">>).

%% @doc Select the best fitting encoding or `none'

-spec choose_encoding(Encs, AccEncHdr) -> Result when
	Encs :: [binary()], 
	AccEncHdr :: binary(),
	Result :: none | binary().
choose_encoding(Encs, AccEncHdr) ->
    do_choose(Encs, AccEncHdr, <<"identity">>).

-spec do_choose(Choices, Header, Default) -> Result when
	Choices :: [binary()],
	Header :: binary(),
	Default :: binary(),
	Result :: none | binary().
do_choose(Choices, Header, Default) ->
    try
        Accepted = cow_http_hd:parse_accept_encoding(Header),
        Accepted1 = lists:reverse(lists:keysort(2, Accepted)),
        DefaultPrio = [P || {C,P} <- Accepted1, C =:= Default],
        StarPrio = [P || {C,P} <- Accepted1, C =:= <<"*">>],
        DefaultOkay = case DefaultPrio of
            [] ->
                case StarPrio of
                    [0] -> no;
                    _ -> yes
                end;
            [0] -> no;
            _ -> yes
        end,
        AnyOkay = case StarPrio of
            [] -> no;
            [0] -> no;
            _ -> yes
        end,
        do_choose(Default, DefaultOkay, AnyOkay, Choices, Accepted)
    catch
        _:_ ->
            Default
    end.

-spec do_choose(Default, DefaultOkay, AnyOkay, Choices, Accepted) -> Result when
	Default :: binary(),
	DefaultOkay :: yes | no,
	AnyOkay :: yes | no,
	Choices :: list(), 
	Accepted :: list(),
	Result :: binary() | none.
do_choose(_Default, _DefaultOkay, _AnyOkay, [], _Accepted) ->
    none;
do_choose(_Default, _DefaultOkay, yes, [Choice|_], []) ->
    Choice;
do_choose(Default, yes, no, Choices, []) ->
    case lists:member(Default, Choices) of
        true -> Default;
        _ -> none
    end;
do_choose(_Default, no, no, _Choices, []) ->
    none;
do_choose(Default, DefaultOkay, AnyOkay, Choices, [{Acc,0}|AccRest]) ->
    do_choose(Default, DefaultOkay, AnyOkay, lists:delete(Acc, Choices), AccRest);
do_choose(Default, DefaultOkay, AnyOkay, Choices, [{Acc,_Prio}|AccRest]) ->
    case lists:member(Acc, Choices) of
        true -> Acc;
        false -> do_choose(Default, DefaultOkay, AnyOkay, Choices, AccRest)
    end.


%% The percent decoding is inlined to greatly improve the performance
%% by avoiding copying binaries twice (once for extracting, once for
%% decoding) instead of just extracting the proper representation.
%%
%% Copyright (c) 2013-2015, Loïc Hoguin <essen@ninenines.eu>
%%
%% Permission to use, copy, modify, and/or distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
%% copyright notice and this permission notice appear in all copies.
%%
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
%%

%% @doc Parse an `application/x-www-form-urlencoded' string.<br/>
%% See also <a href="https://www.w3.org/TR/html401/interact/forms.html#didx-applicationx-www-form-urlencoded">specification</a>.

-spec parse_qs(String) -> Result when
	String :: binary(),
	Result :: list({binary(),binary()}).
parse_qs(<<>>) ->
    [];
parse_qs(Qs) ->
    parse_qs_name(Qs, [], <<>>).

%% @throws invalid_percent_encoding

-spec parse_qs_name(String, Acc, Name) -> Result when
	String :: binary(),
	Acc :: list(),
	Name :: binary(),
	Result :: list({binary(),binary()}).
parse_qs_name(<< $%, H, L, Rest/binary >>, Acc, Name) ->
    C = (unhex(H) bsl 4 bor unhex(L)),
    parse_qs_name(Rest, Acc, << Name/binary, C >>);
parse_qs_name(<< $+, Rest/binary >>, Acc, Name) ->
    parse_qs_name(Rest, Acc, << Name/binary, " " >>);
parse_qs_name(<< $=, _/binary >>, _Acc, <<>>) ->
    throw(invalid_qs_name);
parse_qs_name(<< $=, Rest/binary >>, Acc, Name) ->
    parse_qs_value(Rest, Acc, Name, <<>>);
parse_qs_name(<< $&, Rest/binary >>, Acc, Name) ->
    case Name of
        <<>> -> parse_qs_name(Rest, Acc, <<>>);
        _ -> parse_qs_name(Rest, [{Name, <<>>}|Acc], <<>>)
    end;
parse_qs_name(<< C, Rest/binary >>, Acc, Name) when C =/= $% ->
    parse_qs_name(Rest, Acc, << Name/binary, C >>);
parse_qs_name(<<>>, Acc, Name) ->
    case Name of
        <<>> -> lists:reverse(Acc);
        _ -> lists:reverse([{Name, <<>>}|Acc])
    end;
parse_qs_name(_Rest, _Acc, _Name) ->
    throw(invalid_percent_encoding).

%% @throws invalid_percent_encoding

-spec parse_qs_value(String, Acc, Name, Value) -> Result when
	String :: binary(),
	Acc :: list(),
	Name :: binary(),
	Value :: binary(),
	Result :: list({binary(),binary()}).
parse_qs_value(<< $%, H, L, Rest/binary >>, Acc, Name, Value) ->
    C = (unhex(H) bsl 4 bor unhex(L)),
    parse_qs_value(Rest, Acc, Name, << Value/binary, C >>);
parse_qs_value(<< $+, Rest/binary >>, Acc, Name, Value) ->
    parse_qs_value(Rest, Acc, Name, << Value/binary, " " >>);
parse_qs_value(<< $&, Rest/binary >>, Acc, Name, Value) ->
    parse_qs_name(Rest, [{Name, Value}|Acc], <<>>);
parse_qs_value(<< C, Rest/binary >>, Acc, Name, Value) when C =/= $% ->
    parse_qs_value(Rest, Acc, Name, << Value/binary, C >>);
parse_qs_value(<<>>, Acc, Name, Value) ->
    lists:reverse([{Name, Value}|Acc]);
parse_qs_value(_Rest, _Acc, _Name, _Value) ->
    throw(invalid_percent_encoding).

%% @throws invalid_percent_encoding

-spec unhex(Char) -> Result when
	Char :: char(),
	Result :: 1..15.
unhex($0) ->  0;
unhex($1) ->  1;
unhex($2) ->  2;
unhex($3) ->  3;
unhex($4) ->  4;
unhex($5) ->  5;
unhex($6) ->  6;
unhex($7) ->  7;
unhex($8) ->  8;
unhex($9) ->  9;
unhex($A) -> 10;
unhex($B) -> 11;
unhex($C) -> 12;
unhex($D) -> 13;
unhex($E) -> 14;
unhex($F) -> 15;
unhex($a) -> 10;
unhex($b) -> 11;
unhex($c) -> 12;
unhex($d) -> 13;
unhex($e) -> 14;
unhex($f) -> 15;
unhex(_) -> throw(invalid_percent_encoding).


%% author Bob Ippolito <bob@mochimedia.com>
%% copyright 2007 Mochi Media, Inc.
%% @doc  Parse a Content-Type like header, return the main Content-Type
%%       and a property list of options.

-spec parse_header(String) -> Result when
	String :: binary(),
	Result :: {binary(), list({binary(),binary()})}.
parse_header(String) ->
    %% TODO: This is exactly as broken as Python's cgi module.
    %%       Should parse properly like mochiweb_cookies.
    [Type | Parts] = [z_string:trim(S) || S <- binary:split(String, <<";">>, [global])],
    F = fun (S, Acc) ->
            case binary:split(S, <<"=">>) of
                [_] -> Acc;
                [<<>>, _] -> Acc;
                [_, <<>>] -> Acc;
                [Name, Value] ->
                    [{z_string:to_lower(z_string:trim(Name)), unquote_header(z_string:trim(Value))} | Acc]
            end
        end,
    {z_string:to_lower(Type), lists:foldr(F, [], Parts)}.

-spec unquote_header(Header) -> Result when
	Header :: binary(),
	Result :: binary().
unquote_header(<<$", Rest/binary>>) ->
    unquote_header(Rest, <<>>);
unquote_header(S) ->
    S.

-spec unquote_header(Header, Acc) -> Result when
	Header :: binary(),
	Acc :: binary(),
	Result :: binary().
unquote_header(<<>>, Acc) -> Acc;
unquote_header(<<$">>, Acc) -> Acc;
unquote_header(<<$\\, C, Rest/binary>>, Acc) ->
    unquote_header(Rest, <<Acc/binary, C>>);
unquote_header(<<C, Rest/binary>>, Acc) ->
    unquote_header(Rest, <<Acc/binary, C>>).


-spec quoted_string(String) -> Result when
	String :: binary(),
	Result :: binary().
quoted_string(<<$", _Rest/binary>> = Str) ->
    Str;
quoted_string(Str) ->
    escape_quotes(Str, <<$">>).                % Initialize Acc with opening quote

-spec escape_quotes(String, Acc) -> Result when
	String :: binary(), 
	Acc :: binary(),
	Result :: binary().
escape_quotes(<<>>, Acc) ->
    <<Acc/binary, $">>;                              % Append final quote
escape_quotes(<<$\\, Char, Rest/binary>>, Acc) ->
    escape_quotes(Rest, <<Acc/binary, $\\, Char>>);  % Any quoted char should be skipped
escape_quotes(<<$", Rest/binary>>, Acc) ->
    escape_quotes(Rest, <<Acc/binary, $\\, $">>);    % Unquoted quotes should be escaped
escape_quotes(<<Char, Rest/binary>>, Acc) ->
    escape_quotes(Rest, <<Acc/binary, Char>>).


-spec split_quoted_strings(String) -> Result when
	String :: binary(),
	Result :: [binary()].
split_quoted_strings(Str) ->
    split_quoted_strings(Str, []).

-spec split_quoted_strings(String, Acc) -> Result when
	String :: binary(),
	Acc :: [binary()],
	Result :: [binary()].
split_quoted_strings(<<>>, Acc) ->
    lists:reverse(Acc);
split_quoted_strings(<<$", Rest/binary>>, Acc) ->
    {Str, Cont} = unescape_quoted_string(Rest, <<>>),
    split_quoted_strings(Cont, [Str | Acc]);
split_quoted_strings(<<_Skip, Rest/binary>>, Acc) ->
    split_quoted_strings(Rest, Acc).

-spec unescape_quoted_string(String, Acc) -> Result when
	String :: binary(),
	Acc :: bitstring(),
	Result :: {bitstring(),binary()}.
unescape_quoted_string(<<>>, Acc) ->
    {Acc, <<>>};
unescape_quoted_string(<<$\\, Char, Rest/binary>>, Acc) -> % Any quoted char should be unquoted
    unescape_quoted_string(Rest, <<Acc/binary, Char>>);
unescape_quoted_string(<<$", Rest/binary>>, Acc) ->        % Quote indicates end of this string
    {Acc, Rest};
unescape_quoted_string(<<Char, Rest/binary>>, Acc) ->
    unescape_quoted_string(Rest, <<Acc/binary, Char>>).