Skip to main content

src/internal/encoder/text.gleam

/// Encodes quoted strings according to RFC 2045.
///
/// See the following link for reference:
/// - <https://tools.ietf.org/html/rfc2045#section-3.3>
import gleam/bool
import gleam/list
import gleam/order
import gleam/pair
import gleam/result
import gleam/string

import internal/encoder/encoding

/// Estimates the quoted string encoded the size of string in bytes,
/// taking the encoding mode and the maximum size of a line into
/// account.
pub fn estimate_encoded_size(
  of string: String,
  enforce field_type: encoding.FieldType,
  with mode: encoding.EncodingMode,
  maximum_size maximum_size: Int,
) -> Result(Int, encoding.EncoderError) {
  string
  |> chunk_on_whitespace()
  |> list.try_fold(0, fn(current_size, chunk) {
    case validate_chunk(chunk, field_type, mode, maximum_size - 2) {
      Ok(chunk_size) -> Ok(current_size + chunk_size)
      Error(_) as error -> error
    }
  })
}

/// Quoted string encode a string, using the specified encoding mode,
/// taking the preferred line size and maximum line size into account,
/// and pretending to start the first line at the passed start
/// position.
pub fn encode_string(
  encode string: String,
  enforce field_type: encoding.FieldType,
  with mode: encoding.EncodingMode,
  start position: Int,
  preferred_size preferred_size: Int,
  maximum_size maximum_size: Int,
) -> Result(List(String), encoding.EncoderError) {
  string
  |> chunk_on_whitespace()
  |> join_chunks(field_type, mode, position, preferred_size, maximum_size)
  |> result.map(list.reverse)
}

fn join_chunks(
  chunks: List(String),
  field_type: encoding.FieldType,
  mode: encoding.EncodingMode,
  count: Int,
  preferred_size: Int,
  maximum_size: Int,
) -> Result(List(String), encoding.EncoderError) {
  chunks
  |> list.try_fold(#(False, []), fn(accumulator, chunk) {
    let #(needs_quoting, chunks) = accumulator
    case encode_chunk(chunk, field_type, mode) {
      Ok(#(chunk_needs_quoting, encoded_chunk)) ->
        Ok(#(needs_quoting || chunk_needs_quoting, [encoded_chunk, ..chunks]))
      Error(error) -> Error(error)
    }
  })
  |> result.try(fn(result) {
    let #(should_quote, chunks) = result

    // The chunks are still in reversed order, take advantage of this by adding a
    // quote to the first chunk (which is actually the last in the result) if quoting
    // is needed. Immediately reverse back the chunks, so they're in the correct order.
    let chunks = case should_quote, chunks {
      True, [chunk, ..other_chunks] ->
        list.reverse([chunk <> "\"", ..other_chunks])
      True, [] | False, _ -> list.reverse(chunks)
    }

    // Add a quote to the start of the first chunk of needed.
    let chunks = case should_quote, chunks {
      True, [chunk, ..other_chunks] -> ["\"" <> chunk, ..other_chunks]
      True, [] | False, _ -> chunks
    }

    chunks
    |> list.try_fold(#(count, []), fn(accumulator, chunk) {
      let #(used_size, lines) = accumulator
      let chunk_size = string.byte_size(chunk)

      use <- bool.guard(
        when: chunk_size > maximum_size,
        return: Error(encoding.MaximumSizeExceeded(maximum_size)),
      )
      case lines {
        [] -> Ok(#(chunk_size, [chunk]))
        lines if used_size + chunk_size > preferred_size ->
          Ok(#(chunk_size, [chunk, ..lines]))
        [first_line, ..other_lines] ->
          Ok(#(used_size + chunk_size, [first_line <> chunk, ..other_lines]))
      }
    })
    |> result.map(pair.second)
  })
}

fn chunk_on_whitespace(value: String) -> List(String) {
  case value {
    "" -> []
    _ -> {
      value
      |> string.split("")
      |> list.index_fold([0], fn(accumulator, character, index) {
        case is_whitespace(character) {
          True -> [index, ..accumulator]
          False -> accumulator
        }
      })
      |> list.prepend(string.length(value))
      |> list.reverse()
      |> list.window_by_2()
      |> list.map(fn(chunk) {
        let #(start, end) = chunk
        string.slice(value, start, end - start)
      })
    }
  }
}

fn validate_chunk(
  chunk: String,
  field_type: encoding.FieldType,
  mode: encoding.EncodingMode,
  maximum_size: Int,
) -> Result(Int, encoding.EncoderError) {
  let chunk_size = string.byte_size(chunk)

  chunk
  |> string.split("")
  |> list.try_fold(chunk_size, fn(accumulator, character) {
    use <- bool.guard(
      when: !is_valid_character(character, mode),
      return: Error(encoding.InvalidCharacter(character)),
    )
    Ok(
      accumulator
      + bool.guard(should_escape_character(character, field_type), 1, fn() { 0 }),
    )
  })
  |> result.try(fn(chunk_size) {
    case chunk_size <= maximum_size {
      True -> Ok(chunk_size)
      False -> Error(encoding.MaximumSizeExceeded(maximum_size))
    }
  })
}

fn encode_chunk(
  chunk: String,
  field_type: encoding.FieldType,
  mode: encoding.EncodingMode,
) -> Result(#(Bool, String), encoding.EncoderError) {
  chunk
  |> string.split("")
  |> list.try_fold(#(False, ""), fn(accumulator, character) {
    use <- bool.guard(
      when: !is_valid_character(character, mode),
      return: Error(encoding.InvalidCharacter(character)),
    )
    let #(should_quote, characters) = accumulator
    let should_quote =
      should_quote || should_encode_character(character, field_type)
    case should_escape_character(character, field_type) {
      True ->
        Ok(#(
          should_quote,
          characters |> string.append("\\") |> string.append(character),
        ))
      False -> Ok(#(should_quote, characters |> string.append(character)))
    }
  })
}

fn should_encode_character(
  character: String,
  field_type: encoding.FieldType,
) -> Bool {
  should_escape_character(character, field_type)
  || {
    field_type == encoding.Structured
    && string.contains("()<>@,;:.[]", character)
  }
}

fn should_escape_character(
  character: String,
  field_type: encoding.FieldType,
) -> Bool {
  field_type == encoding.Structured
  && string.compare(character, "\t") != order.Eq
  && {
    string.compare(character, "\u{20}") == order.Lt
    || string.compare(character, "\\") == order.Eq
    || string.compare(character, "\"") == order.Eq
  }
}

fn is_valid_character(character: String, mode: encoding.EncodingMode) -> Bool {
  case mode {
    encoding.Ascii ->
      string.compare(character, "\u{1f}") == order.Gt
      && string.compare(character, "\u{80}") == order.Lt
    encoding.Utf8 -> string.compare(character, "\u{1f}") == order.Gt
  }
}

fn is_whitespace(character: String) -> Bool {
  string.compare(character, "\t") == order.Eq
  || string.compare(character, " ") == order.Eq
}