-module(mesv@parse).
-compile([no_auto_import, nowarn_unused_vars, nowarn_unused_function, nowarn_nomatch, inline]).
-define(FILEPATH, "src\\mesv\\parse.gleam").
-export([build/1, column/2, expect_headers/2, set_row_sep/2, set_escaper/2, set_trim_whitespace/3, set_col_sep/2, set_strict_columns/1, partition_on_unescaped_/2, parse/2]).
-export_type([parsing_error/0, parser/1]).
-if(?OTP_RELEASE >= 27).
-define(MODULEDOC(Str), -moduledoc(Str)).
-define(DOC(Str), -doc(Str)).
-else.
-define(MODULEDOC(Str), -compile([])).
-define(DOC(Str), -compile([])).
-endif.
?MODULEDOC(
" Module containing the functions for creating a `Parser`, and using the `Parser`\n"
" to parse an input CSV String into a `List` of some data types.\n"
" \n"
" ### Important!\n"
" At this stage, everything is still in flux, and breaking changes can occur\n"
" on minor version updates. Be careful and check for possible issues before updating!\n"
" \n"
" ## Examples\n"
" A full example of parsing an example CSV String.\n"
" ```gleam\n"
" import gleam/int\n"
" import mesv\n"
" import mesv/parse\n"
" \n"
" const expected_data: List(#(String, Int, Bool)) = [\n"
" #(\"Andrew\", 20, True),\n"
" #(\"Blake\", 25, True),\n"
" #(\"Cassandra\", 2, False),\n"
" ]\n"
" \n"
" pub fn main() -> Nil {\n"
" let parsed_data =\n"
" parse.build({\n"
" // Create a parsing function using `mesv.parsed`\n"
" // to construct a curried parsing function\n"
" use name <- mesv.parsed\n"
" use age <- mesv.parsed\n"
" use adult <- mesv.parsed\n"
" \n"
" // If any value fails (ie, returns Error(Nil)),\n"
" // the parsing of a row will stop.\n"
" // However, if it reaches here,\n"
" // it returns the following data type\n"
" #(name, age, adult)\n"
" })\n"
" |> parse.column(Ok)\n"
" |> parse.column(int.parse)\n"
" |> parse.column(fn(val: String) -> Result(Bool, Nil) {\n"
" case val {\n"
" \"true\" | \"True\" -> Ok(True)\n"
" \"false\" | \"False\" -> Ok(False)\n"
" _ -> Error(Nil)\n"
" }\n"
" })\n"
" // Specify that the first row is the headers,\n"
" // and if they don't match what is specified, \n"
" // the parsing will fail\n"
" |> parse.expect_headers([\"Name\", \"Age\", \"Is an adult\"])\n"
" // Pass in the CSV String to parse\n"
" |> parse.parse(\n"
" \"Name,Age,Is an adult\\n\"\n"
" <> \"Andrew,20,true\\n\"\n"
" <> \"Blake,25,True\\n\"\n"
" <> \"Cassandra,2,False\",\n"
" )\n"
" \n"
" assert parsed_data == Ok(#(expected_data, []))\n"
" }\n"
" ```\n"
" \n"
" Parsing a CSV and performing some operations on the data immediately after parsing\n"
" ```gleam\n"
" // [...]\n"
" const expected_data: List(#(String, Int, Bool)) = [\n"
" #(\"Anna\", 20, True),\n"
" #(\"Bob\", 25, True),\n"
" #(\"Cleopatra\", 2095, False),\n"
" // She's dead, she can't be an adult.\n"
" // But alas, our parser is too simple to understand\n"
" // this fact, so it will throw an error.\n"
" ]\n"
" \n"
" pub fn main() -> Nil {\n"
" let parsed_data =\n"
" parse.build({\n"
" use name <- mesv.parsed\n"
" use age <- mesv.parsed\n"
" // As long as the operation is guaranteed to result\n"
" // in the data type specified in the Parser,\n"
" // you can do anything in here!\n"
" #(name, age, age >= 18)\n"
" })\n"
" |> parse.column(Ok)\n"
" |> parse.column(int.parse)\n"
" // Pass in the CSV String to parse\n"
" |> parse.parse(\n"
" \"Anna,20\\n\"\n"
" <> \"Bob,25\\n\"\n"
" <> \"Cleopatra,2095\",\n"
" )\n"
" \n"
" assert parsed_data == Ok(#(expected_data, []))\n"
" }\n"
" ```\n"
" \n"
).
-type parsing_error() :: {cant_parse_row, integer(), binary(), binary()} |
{expected_headers_mismatch, list(binary()), list(binary())} |
ran_out_of_values |
{strict_parsed_with_leftovers, list(binary())} |
{encountered_malformed_element, binary(), binary()}.
-opaque parser(DPL) :: {parser,
binary(),
binary(),
binary(),
gleam@option:option(list(binary())),
fun((list(binary())) -> {ok, {DPL, list(binary())}} |
{error, parsing_error()}),
boolean(),
{boolean(), boolean()}}.
-file("src\\mesv\\parse.gleam", 175).
?DOC(
" Function for directly building a `Parser` that uses the subsequent elements in order.\n"
" \n"
" The function passed in should be a curried one - ie, a function that returns a\n"
" function, and so on, with every subsequent function taking in some type of argument.\n"
" \n"
" To build the parser, transform it using the `parse.column` function to specify\n"
" how to parse each subsequent value in a row.\n"
" \n"
" ## Examples\n"
" The simplest parser is one element:\n"
" ```gleam\n"
" parse.build(fn(str) { str })\n"
" |> parse.column(Ok)\n"
" ```\n"
" When used, it will create a `List(String)` containing the first cell of each\n"
" row of the input CSV String.\n"
" \n"
" Infallible transformation of the data can be done both inside of the initial\n"
" function that is passed to `parse.build` and in `parse.column`, but fallible\n"
" transformations (those that output a `Result` or `Option` when the argument\n"
" requires what's inside the `Option`) must reside in the `parse.column` call.\n"
" \n"
" A more complex `Parser` would be something like this:\n"
" ```gleam\n"
" parse.build({\n"
" use name: String <- mesv.parsed\n"
" use age: Int <- mesv.parsed\n"
" use adult: Bool <- mesv.parsed\n"
"\n"
" #(name, age, adult)\n"
" })\n"
" ```\n"
" and to parse the arguments to construct the result, again, use the\n"
" `parse.column` function.\n"
).
-spec build(fun((DPM) -> DPN)) -> parser(fun((DPM) -> DPN)).
build(F) ->
{parser,
<<","/utf8>>,
<<"\n"/utf8>>,
<<"\""/utf8>>,
none,
fun(Tokens) -> {ok, {F, Tokens}} end,
false,
{true, true}}.
-file("src\\mesv\\parse.gleam", 208).
?DOC(
" Transform a `Parser`, by passing in a parsing function for a specified column.\n"
" \n"
" This function will be called for every row, and the output of this function,\n"
" if it's `Ok(a)`, will be passed to the `Parser`'s internal function,\n"
" and the parsing of the row continued;\n"
" \n"
" If it's `Error(Nil)`, the parsing of the row will fail.\n"
" \n"
" ## Examples\n"
" ```gleam\n"
" // Parser(fn(String) -> a)\n"
" parser\n"
" |> parse.column(Ok)\n"
" // Parser(a)\n"
" ```\n"
).
-spec column(
parser(fun((DPP) -> DPQ)),
fun((binary()) -> {ok, DPP} | {error, nil})
) -> parser(DPQ).
column(Parser, Parse) ->
{parser,
erlang:element(2, Parser),
erlang:element(3, Parser),
erlang:element(4, Parser),
erlang:element(5, Parser),
fun(Tokens) ->
gleam@result:'try'(
(erlang:element(6, Parser))(Tokens),
fun(_use0) ->
{Constructor, Remaining_tokens} = _use0,
case Remaining_tokens of
[Token | Rest] ->
_pipe = Token,
_pipe@1 = Parse(_pipe),
_pipe@2 = gleam@result:map_error(
_pipe@1,
fun(_) ->
{cant_parse_row,
-1,
Token,
<<"idk, think of a better error system."/utf8>>}
end
),
_pipe@3 = gleam@result:map(_pipe@2, Constructor),
gleam@result:map(_pipe@3, fun(B) -> {B, Rest} end);
[] ->
{error, ran_out_of_values}
end
end
)
end,
erlang:element(7, Parser),
erlang:element(8, Parser)}.
-file("src\\mesv\\parse.gleam", 256).
?DOC(
" Configure the parser to treat the first parsed row as the headers,\n"
" and specify that we expect the CSV headers to equal these headers.\n"
" \n"
" If the first row is not **strictly identical** to the contents of\n"
" the arguments to this function, the parser will return an `Error`.\n"
" \n"
" ## Examples\n"
" ```gleam\n"
" parser\n"
" |> parse.parse(\"a,1,c\")\n"
" // -> row returns Ok(#(\"a\", 1, \"c\"))\n"
" \n"
" parser\n"
" |> set_col_sep(\"|\")\n"
" |> parse.parse(\"a,1,c\")\n"
" // -> row returns Error(RanOutOfValues)\n"
" parser\n"
" |> set_col_sep(\"|\")\n"
" |> parse.parse(\"a|1|c\")\n"
" // -> row returns Ok(#(\"a\", 1, \"c\"))\n"
" ```\n"
).
-spec expect_headers(parser(DPV), list(binary())) -> parser(DPV).
expect_headers(Parser, Headers) ->
{parser,
erlang:element(2, Parser),
erlang:element(3, Parser),
erlang:element(4, Parser),
{some, Headers},
erlang:element(6, Parser),
erlang:element(7, Parser),
erlang:element(8, Parser)}.
-file("src\\mesv\\parse.gleam", 279).
?DOC(
" Function to set a specific row separator, instead of the default newline (`\\n`)\n"
" \n"
" ## Examples\n"
" ```gleam\n"
" parser\n"
" |> parse.parse(\"a,1,c\\nd,4,a\")\n"
" // -> parse returns [#(\"a\", 1, \"c\"), #(\"d\", 4, \"a\")]\n"
" \n"
" parser\n"
" |> set_row_sep(\"|\")\n"
" |> parse.parse(\"a,1,c\\nd,4,a\")\n"
" // -> parse returns [#(\"a\", 1, \"c\\nd\")]\n"
" // the two cells \"4\" and \"a\" are treated as leftovers\n"
" parser\n"
" |> set_row_sep(\"|\")\n"
" |> parse.parse(\"a,1,c|d,4,a\")\n"
" // -> parse returns [#(\"a\", 1, \"c\"), #(\"d\", 4, \"a\")]\n"
" ```\n"
).
-spec set_row_sep(parser(DPZ), binary()) -> parser(DPZ).
set_row_sep(Parser, New_row_separator) ->
{parser,
erlang:element(2, Parser),
New_row_separator,
erlang:element(4, Parser),
erlang:element(5, Parser),
erlang:element(6, Parser),
erlang:element(7, Parser),
erlang:element(8, Parser)}.
-file("src\\mesv\\parse.gleam", 315).
?DOC(
" Function to set a specific value escaper, instead of the default doublequotes (`\"`)\n"
" \n"
" Escapers are wrapped around a cell if that cell contains any one or more of:\n"
" - column separator (by default `,`)\n"
" - row separator (by default `\\n`)\n"
" - escaper itself\n"
" \n"
" In the event that a cell contains an escaper, the escaper is first replaced\n"
" with two escapers.\n"
" \n"
" So `here's \" ` would first become `here's \"\" `, then be wrapped and become\n"
" `\"here's \"\" \"`.\n"
" \n"
" ## Examples\n"
" ```gleam\n"
" parser\n"
" |> parse.parse(\"a,'b','c'''\")\n"
" // -> row returns Ok(#(\"a\", \"'b'\", \"'c'''\"))\n"
" parser\n"
" |> parse.parse(\"a,\\\"b\\\",\\\"c\\\"\\\"\\\"\")\n"
" // -> row returns Ok(#(\"a\", \"b\", \"c\\\"\"))\n"
" \n"
" parser\n"
" |> set_escaper(\"'\")\n"
" |> parse.parse(\"a,'b','c'''\")\n"
" // -> row returns Ok(#(\"a\", \"b\", \"c'\"))\n"
" parser\n"
" |> set_escaper(\"'\")\n"
" |> parse.parse(\"a,\\\"b\\\",\\\"c\\\"\\\"\\\"\")\n"
" // -> row returns Ok(#(\"a\", \"\\\"b\\\"\", \"\\\"c\\\"\\\"\\\"\"))\n"
" ```\n"
).
-spec set_escaper(parser(DQC), binary()) -> parser(DQC).
set_escaper(Parser, New_escaper) ->
{parser,
erlang:element(2, Parser),
erlang:element(3, Parser),
New_escaper,
erlang:element(5, Parser),
erlang:element(6, Parser),
erlang:element(7, Parser),
erlang:element(8, Parser)}.
-file("src\\mesv\\parse.gleam", 329).
?DOC(
" Function to set whether the parser should trim the whitespace on both ends of each value.\n"
" \n"
" This operation is performed before the cell is unwrapped (escapers removed), so if the CSV\n"
" file was modified somehow\n"
" (for example, using VSCode plugin [Rainbow CSV](https://marketplace.visualstudio.com/items?itemName=mechatroner.rainbow-csv) to align the columns),\n"
" the cell can be correctly unescaped and parsed.\n"
" \n"
" I think the behaviour of this function and internal order of operations will change\n"
" in the future, so no examples yet.\n"
).
-spec set_trim_whitespace(parser(DQF), boolean(), boolean()) -> parser(DQF).
set_trim_whitespace(Parser, Trim_start, Trim_end) ->
{parser,
erlang:element(2, Parser),
erlang:element(3, Parser),
erlang:element(4, Parser),
erlang:element(5, Parser),
erlang:element(6, Parser),
erlang:element(7, Parser),
{Trim_start, Trim_end}}.
-file("src\\mesv\\parse.gleam", 355).
?DOC(
" Function to set a specific column separator, instead of the default comma (`,`)\n"
" \n"
" ## Examples\n"
" ```gleam\n"
" parser\n"
" |> parse.parse(\"a,1,c\")\n"
" // -> row returns Ok(#(\"a\", 1, \"c\"))\n"
" \n"
" parser\n"
" |> set_col_sep(\"|\")\n"
" |> parse.parse(\"a,1,c\")\n"
" // -> row returns Error(RanOutOfValues)\n"
" parser\n"
" |> set_col_sep(\"|\")\n"
" |> parse.parse(\"a|1|c\")\n"
" // -> row returns Ok(#(\"a\", 1, \"c\"))\n"
" ```\n"
).
-spec set_col_sep(parser(DQI), binary()) -> parser(DQI).
set_col_sep(Parser, New_column_separator) ->
{parser,
New_column_separator,
erlang:element(3, Parser),
erlang:element(4, Parser),
erlang:element(5, Parser),
erlang:element(6, Parser),
erlang:element(7, Parser),
erlang:element(8, Parser)}.
-file("src\\mesv\\parse.gleam", 381).
?DOC(
" Function to make the parser strict in terms of columns.\n"
" \n"
" This means that when parsing a row, there must be exactly as many cells as there were\n"
" arguments for the internal `Parser` function. If this function is called, if there are\n"
" any leftover values after the parsing is finished, parsing that row returns an `Error`\n"
" even if the parsing returned a value.\n"
" \n"
" ## Examples\n"
" ```gleam\n"
" parser\n"
" |> parse.parse(\"a,1,c\")\n"
" // -> row returns Ok(#(\"a\", 1))\n"
" \n"
" parser\n"
" |> set_strict_columns()\n"
" |> parse.parse(\"a,1,c\")\n"
" // -> row returns Error(StrictParsedWithLeftovers([\"c\"]))\n"
" ```\n"
).
-spec set_strict_columns(parser(DQL)) -> parser(DQL).
set_strict_columns(Parser) ->
{parser,
erlang:element(2, Parser),
erlang:element(3, Parser),
erlang:element(4, Parser),
erlang:element(5, Parser),
erlang:element(6, Parser),
true,
erlang:element(8, Parser)}.
-file("src\\mesv\\parse.gleam", 554).
?DOC(
" Internal helper function to check whether the CSV headers that were found match\n"
" the expected pattern that was specified in the Parser building process.\n"
).
-spec process_headers(gleam@option:option(list(binary())), list(binary())) -> {ok,
list(binary())} |
{error, parsing_error()}.
process_headers(Expected, Found) ->
case Expected of
{some, Pattern} ->
case Found =:= Pattern of
true ->
{ok, Found};
false ->
{error, {expected_headers_mismatch, Pattern, Found}}
end;
none ->
{ok, Found}
end.
-file("src\\mesv\\parse.gleam", 539).
?DOC(
" Internal helper function for creating a function for 'unescaping' an element\n"
" (for each `rule`, replacing the second element in the tuple with the first).\n"
" \n"
" This function takes in a String that is guaranteed to be a value - that is,\n"
" it'seither unescaped, or it starts with an escaper and ends with an escaper.\n"
" \n"
" It's a curried function because I like functional programming, and because it *should*\n"
" give some performance improvements if I create such a function before any looping\n"
" instead of constructing one for each iteration.\n"
).
-spec unescape(list({binary(), binary()})) -> fun((binary()) -> binary()).
unescape(Rules) ->
fun(El) -> _pipe = Rules,
_pipe@1 = gleam@list:map(
_pipe,
fun(Rule) ->
fun(_capture) ->
gleam@string:replace(
_capture,
erlang:element(2, Rule),
erlang:element(1, Rule)
)
end
end
),
gleam@list:fold(_pipe@1, El, fun(Acc, Rule@1) -> Rule@1(Acc) end) end.
-file("src\\mesv\\parse.gleam", 576).
?DOC(
" Internal helper function for constructing a function that splits a `String`\n"
" on `separator`, as long as the `separator` is not between two `not_in`.\n"
" \n"
" It is public because I created unit tests for it.\n"
" \n"
" Feel free to use it, but it is not part of the API, so a breaking change\n"
" can occur in every version change, without prior notice.\n"
).
-spec partition_on_unescaped_(binary(), binary()) -> fun((binary()) -> list(binary())).
partition_on_unescaped_(El, Escaper) ->
fun(To_split) -> _pipe = To_split,
_pipe@1 = gleam@string:split(_pipe, El),
mesv@util:list_merge_map(
_pipe@1,
fun(First, Second) ->
case (mesv@util:count_occurences(Escaper, First) rem 2) =:= 1 of
true ->
{some,
<<<<First/binary, El/binary>>/binary,
Second/binary>>};
false ->
none
end
end
) end.
-file("src\\mesv\\parse.gleam", 399).
?DOC(
" Function to use the specified `Parser(a)` to transform the source into a `List(a)`\n"
" \n"
" If the headers specified in the `expect_headers` function did not match the specified pattern,\n"
" a `ParsingError` will be returned, of the type `ExpectedHeadersMismatch`, containing both\n"
" the expected headers, and what was found.\n"
" \n"
" If the headers weren't specified, or were specified and match the expected pattern, the\n"
" function will return `Ok(#(List(parsed_type), List(ParsingError)))`;\n"
" The first is the list of all rows that were successfully parsed, while the second is a list\n"
" of `ParsingError`s that were thrown due to a row failing to parse.\n"
" \n"
" What to do with both of these Lists is up to the user, whether to ignore all errors or abort\n"
" if any errors occur.\n"
).
-spec parse(parser(DQO), binary()) -> {ok, {list(DQO), list(parsing_error())}} |
{error, parsing_error()}.
parse(Parser, Source) ->
{parser,
Column_separator,
Row_separator,
Escaper,
Headers,
Parse,
Strict_columns,
{Trim_start, Trim_end}} = Parser,
Split_rows = partition_on_unescaped_(Row_separator, Escaper),
case Split_rows(Source) of
[] ->
{ok, {[], []}};
[Found_headers | Contents] ->
Split_columns = partition_on_unescaped_(Column_separator, Escaper),
Trim_whitespace = fun(Element) -> _pipe = Element,
_pipe@1 = case Trim_start of
true ->
fun gleam@string:trim_start/1;
false ->
fun gleam@function:identity/1
end(_pipe),
case Trim_end of
true ->
fun gleam@string:trim_end/1;
false ->
fun gleam@function:identity/1
end(_pipe@1) end,
Unwrap = fun(Element@1) ->
case {gleam_stdlib:string_starts_with(Element@1, Escaper),
gleam_stdlib:string_ends_with(Element@1, Escaper)} of
{true, true} ->
{ok,
begin
_pipe@2 = Element@1,
_pipe@3 = gleam_stdlib:string_remove_prefix(
_pipe@2,
Escaper
),
gleam_stdlib:string_remove_suffix(
_pipe@3,
Escaper
)
end};
{false, false} ->
{ok, Element@1};
{_, _} ->
{error,
{encountered_malformed_element,
Element@1,
<<"Mismatched escapers"/utf8>>}}
end
end,
Unescape = unescape([{Escaper, <<Escaper/binary, Escaper/binary>>}]),
gleam@result:'try'(
process_headers(Headers, Split_columns(Found_headers)),
fun(_) ->
Process_row = fun(Elements) -> _pipe@4 = Elements,
_pipe@5 = gleam@list:map(_pipe@4, Trim_whitespace),
_pipe@6 = gleam@list:map(_pipe@5, Unwrap),
_pipe@7 = gleam@result:all(_pipe@6),
gleam@result:'try'(
_pipe@7,
fun(Elements@1) -> _pipe@8 = Elements@1,
_pipe@9 = gleam@list:map(_pipe@8, Unescape),
_pipe@10 = Parse(_pipe@9),
gleam@result:'try'(
_pipe@10,
fun(Output) ->
{Value, Leftovers} = Output,
case {Strict_columns, Leftovers} of
{false, _} ->
{ok, Value};
{true, []} ->
{ok, Value};
{true, _} ->
{error,
{strict_parsed_with_leftovers,
Leftovers}}
end
end
) end
) end,
{ok,
begin
_pipe@11 = case Headers of
{some, _} ->
Contents;
none ->
[Found_headers | Contents]
end,
_pipe@14 = gleam@list:map(
_pipe@11,
fun(Row_string) -> _pipe@12 = Row_string,
_pipe@13 = Split_columns(_pipe@12),
Process_row(_pipe@13) end
),
_pipe@15 = gleam@result:partition(_pipe@14),
_pipe@16 = gleam@pair:map_first(
_pipe@15,
fun lists:reverse/1
),
gleam@pair:map_second(_pipe@16, fun lists:reverse/1)
end}
end
)
end.