defmodule Bumblebee.Vision.BlipFeaturizer do
alias Bumblebee.Shared
options = [
resize: [
default: true,
doc: "whether to resize the input to the given `:size`"
],
size: [
default: %{height: 384, width: 384},
doc: """
the size to resize the input to, given as `%{height: ..., width: ...}`. Only has
an effect if `:resize` is `true`
"""
],
resize_method: [
default: :bicubic,
doc:
"the resizing method, either of `:nearest`, `:bilinear`, `:bicubic`, `:lanczos3`, `:lanczos5`"
],
normalize: [
default: true,
doc: "whether or not to normalize the input with mean and standard deviation"
],
image_mean: [
default: [0.48145466, 0.4578275, 0.40821073],
doc: "the sequence of mean values for each channel, to be used when normalizing images"
],
image_std: [
default: [0.26862954, 0.26130258, 0.27577711],
doc:
"the sequence of standard deviations for each channel, to be used when normalizing images"
]
]
@moduledoc """
BLIP featurizer for image data.
## Configuration
#{Shared.options_doc(options)}
"""
defstruct Shared.option_defaults(options)
@behaviour Bumblebee.Featurizer
@behaviour Bumblebee.Configurable
alias Bumblebee.Utils.Image
@impl true
def config(featurizer, opts) do
Shared.put_config_attrs(featurizer, opts)
end
@impl true
def process_input(featurizer, images) do
images = List.wrap(images)
for image <- images do
images =
image
|> Image.to_batched_tensor()
|> Nx.as_type(:f32)
|> Image.normalize_channels(length(featurizer.image_mean))
if featurizer.resize do
%{height: height, width: width} = featurizer.size
NxImage.resize(images, {height, width}, method: featurizer.resize_method)
else
images
end
end
|> Nx.concatenate()
end
@impl true
def batch_template(featurizer, batch_size) do
%{height: height, width: width} = featurizer.size
num_channels = length(featurizer.image_mean)
Nx.template({batch_size, height, width, num_channels}, :f32)
end
@impl true
def process_batch(featurizer, images) do
images = NxImage.to_continuous(images, 0, 1)
images =
if featurizer.normalize do
NxImage.normalize(
images,
Nx.tensor(featurizer.image_mean),
Nx.tensor(featurizer.image_std)
)
else
images
end
%{"pixel_values" => images}
end
defimpl Bumblebee.HuggingFace.Transformers.Config do
def load(featurizer, data) do
import Shared.Converters
opts =
convert!(data,
resize: {"do_resize", boolean()},
size: {"size", image_size()},
resize_method: {"resample", resize_method()},
normalize: {"do_normalize", boolean()},
image_mean: {"image_mean", list(number())},
image_std: {"image_std", list(number())}
)
@for.config(featurizer, opts)
end
end
end