/// Encodes quoted strings according to RFC 2045.
///
/// See the following link for reference:
/// - <https://tools.ietf.org/html/rfc2045#section-3.3>
import gleam/bool
import gleam/list
import gleam/order
import gleam/pair
import gleam/result
import gleam/string
import internal/encoder/encoding
/// Estimates the quoted string encoded the size of string in bytes,
/// taking the encoding mode and the maximum size of a line into
/// account.
pub fn estimate_encoded_size(
of string: String,
enforce field_type: encoding.FieldType,
with mode: encoding.EncodingMode,
maximum_size maximum_size: Int,
) -> Result(Int, encoding.EncoderError) {
string
|> chunk_on_whitespace()
|> list.try_fold(0, fn(current_size, chunk) {
case validate_chunk(chunk, field_type, mode, maximum_size - 2) {
Ok(chunk_size) -> Ok(current_size + chunk_size)
Error(_) as error -> error
}
})
}
/// Quoted string encode a string, using the specified encoding mode,
/// taking the preferred line size and maximum line size into account,
/// and pretending to start the first line at the passed start
/// position.
pub fn encode_string(
encode string: String,
enforce field_type: encoding.FieldType,
with mode: encoding.EncodingMode,
start position: Int,
preferred_size preferred_size: Int,
maximum_size maximum_size: Int,
) -> Result(List(String), encoding.EncoderError) {
string
|> chunk_on_whitespace()
|> join_chunks(field_type, mode, position, preferred_size, maximum_size)
|> result.map(list.reverse)
}
fn join_chunks(
chunks: List(String),
field_type: encoding.FieldType,
mode: encoding.EncodingMode,
count: Int,
preferred_size: Int,
maximum_size: Int,
) -> Result(List(String), encoding.EncoderError) {
chunks
|> list.try_fold(#(False, []), fn(accumulator, chunk) {
let #(needs_quoting, chunks) = accumulator
case encode_chunk(chunk, field_type, mode) {
Ok(#(chunk_needs_quoting, encoded_chunk)) ->
Ok(#(needs_quoting || chunk_needs_quoting, [encoded_chunk, ..chunks]))
Error(error) -> Error(error)
}
})
|> result.try(fn(result) {
let #(should_quote, chunks) = result
// The chunks are still in reversed order, take advantage of this by adding a
// quote to the first chunk (which is actually the last in the result) if quoting
// is needed. Immediately reverse back the chunks, so they're in the correct order.
let chunks = case should_quote, chunks {
True, [chunk, ..other_chunks] ->
list.reverse([chunk <> "\"", ..other_chunks])
True, [] | False, _ -> list.reverse(chunks)
}
// Add a quote to the start of the first chunk of needed.
let chunks = case should_quote, chunks {
True, [chunk, ..other_chunks] -> ["\"" <> chunk, ..other_chunks]
True, [] | False, _ -> chunks
}
chunks
|> list.try_fold(#(count, []), fn(accumulator, chunk) {
let #(used_size, lines) = accumulator
let chunk_size = string.byte_size(chunk)
use <- bool.guard(
when: chunk_size > maximum_size,
return: Error(encoding.MaximumSizeExceeded(maximum_size)),
)
case lines {
[] -> Ok(#(chunk_size, [chunk]))
lines if used_size + chunk_size > preferred_size ->
Ok(#(chunk_size, [chunk, ..lines]))
[first_line, ..other_lines] ->
Ok(#(used_size + chunk_size, [first_line <> chunk, ..other_lines]))
}
})
|> result.map(pair.second)
})
}
fn chunk_on_whitespace(value: String) -> List(String) {
case value {
"" -> []
_ -> {
value
|> string.split("")
|> list.index_fold([0], fn(accumulator, character, index) {
case is_whitespace(character) {
True -> [index, ..accumulator]
False -> accumulator
}
})
|> list.prepend(string.length(value))
|> list.reverse()
|> list.window_by_2()
|> list.map(fn(chunk) {
let #(start, end) = chunk
string.slice(value, start, end - start)
})
}
}
}
fn validate_chunk(
chunk: String,
field_type: encoding.FieldType,
mode: encoding.EncodingMode,
maximum_size: Int,
) -> Result(Int, encoding.EncoderError) {
let chunk_size = string.byte_size(chunk)
chunk
|> string.split("")
|> list.try_fold(chunk_size, fn(accumulator, character) {
use <- bool.guard(
when: !is_valid_character(character, mode),
return: Error(encoding.InvalidCharacter(character)),
)
Ok(
accumulator
+ bool.guard(should_escape_character(character, field_type), 1, fn() { 0 }),
)
})
|> result.try(fn(chunk_size) {
case chunk_size <= maximum_size {
True -> Ok(chunk_size)
False -> Error(encoding.MaximumSizeExceeded(maximum_size))
}
})
}
fn encode_chunk(
chunk: String,
field_type: encoding.FieldType,
mode: encoding.EncodingMode,
) -> Result(#(Bool, String), encoding.EncoderError) {
chunk
|> string.split("")
|> list.try_fold(#(False, ""), fn(accumulator, character) {
use <- bool.guard(
when: !is_valid_character(character, mode),
return: Error(encoding.InvalidCharacter(character)),
)
let #(should_quote, characters) = accumulator
let should_quote =
should_quote || should_encode_character(character, field_type)
case should_escape_character(character, field_type) {
True ->
Ok(#(
should_quote,
characters |> string.append("\\") |> string.append(character),
))
False -> Ok(#(should_quote, characters |> string.append(character)))
}
})
}
fn should_encode_character(
character: String,
field_type: encoding.FieldType,
) -> Bool {
should_escape_character(character, field_type)
|| {
field_type == encoding.Structured
&& string.contains("()<>@,;:.[]", character)
}
}
fn should_escape_character(
character: String,
field_type: encoding.FieldType,
) -> Bool {
field_type == encoding.Structured
&& string.compare(character, "\t") != order.Eq
&& {
string.compare(character, "\u{20}") == order.Lt
|| string.compare(character, "\\") == order.Eq
|| string.compare(character, "\"") == order.Eq
}
}
fn is_valid_character(character: String, mode: encoding.EncodingMode) -> Bool {
case mode {
encoding.Ascii ->
string.compare(character, "\u{1f}") == order.Gt
&& string.compare(character, "\u{80}") == order.Lt
encoding.Utf8 -> string.compare(character, "\u{1f}") == order.Gt
}
}
fn is_whitespace(character: String) -> Bool {
string.compare(character, "\t") == order.Eq
|| string.compare(character, " ") == order.Eq
}