Skip to main content

src/internal/encoder/bit.gleam

//// Encode bit strings according to RFC 2045.
////
//// See the following link for reference:
//// - <https://tools.ietf.org/html/rfc2045#section-2.7>
//// - <https://tools.ietf.org/html/rfc2045#section-2.8>

import gleam/bool
import gleam/float
import gleam/int
import gleam/list
import gleam/option.{type Option, None, Some}
import gleam/order
import gleam/result
import gleam/string

import internal/encoder/encoding.{Ascii}

/// Estimates the `bits' bit encoded size of a string in bytes, taking
/// maximum size of a line into account.
pub fn estimate_encoded_size(
  of string: String,
  in bits: Int,
  with mode: encoding.EncodingMode,
  maximum_size maximum_size: Int,
) -> Result(Int, encoding.EncoderError) {
  let max_ascii = max_ascii_from_bits(bits, mode)

  string
  |> string.split("\r\n")
  |> list.try_fold(0, fn(size, line) {
    case validate_line(line, max_ascii, maximum_size) {
      Ok(line_size) -> Ok(size + line_size)
      Error(_) as error -> error
    }
  })
}

/// Bit encode a string with the required number of bits, using the
/// specified encoding mode and taking into account the maximum allowed
/// line size.
pub fn encode_string(
  encode string: String,
  in bits: Int,
  with mode: encoding.EncodingMode,
  maximum_size maximum_size: Int,
) -> Result(List(String), encoding.EncoderError) {
  case string {
    "" -> Ok([])
    _ -> {
      let max_ascii = max_ascii_from_bits(bits, mode)

      string
      |> string.split("\r\n")
      |> list.try_map(fn(line) {
        case validate_line(line, max_ascii, maximum_size) {
          Ok(_) -> Ok(line)
          Error(error) -> Error(error)
        }
      })
    }
  }
}

fn max_ascii_from_bits(
  bits: Int,
  mode: encoding.EncodingMode,
) -> Option(String) {
  case bits, mode {
    8, encoding.Utf8 -> None
    _, Ascii -> {
      int.power(2, int.to_float(bits))
      |> result.map(fn(ascii_code) {
        int.min(float.truncate(ascii_code) - 1, 127)
      })
      |> result.try(string.utf_codepoint)
      |> result.map(fn(codepoint) { string.from_utf_codepoints([codepoint]) })
      |> result.unwrap("")
      |> Some
    }
    _, encoding.Utf8 -> {
      int.power(2, int.to_float(bits))
      |> result.map(fn(ascii_code) { float.truncate(ascii_code) - 1 })
      |> result.try(string.utf_codepoint)
      |> result.map(fn(codepoint) { string.from_utf_codepoints([codepoint]) })
      |> result.unwrap("")
      |> Some
    }
  }
}

fn validate_line(
  line: String,
  max_ascii: Option(String),
  maximum_size: Int,
) -> Result(Int, encoding.EncoderError) {
  let line_size = string.byte_size(line)

  use <- bool.guard(
    when: line_size > maximum_size,
    return: Error(encoding.MaximumSizeExceeded(maximum_size)),
  )
  let validation =
    line
    |> string.split("")
    |> list.find(fn(character) { !is_valid_character(character, max_ascii) })

  case validation {
    Ok(character) -> Error(encoding.InvalidCharacter(character))
    Error(Nil) -> Ok(line_size)
  }
}

fn is_valid_character(character: String, max_ascii: Option(String)) -> Bool {
  string.compare(character, "\r") != order.Eq
  && string.compare(character, "\n") != order.Eq
  && string.compare(character, "\u{0}") == order.Gt
  && {
    max_ascii
    |> option.map(fn(ascii) { string.compare(character, ascii) == order.Lt })
    |> option.unwrap(True)
  }
}