src/support/z_dropbox.erl

%% @author Marc Worrell <marc@worrell.nl>
%% @copyright 2009-2026 Marc Worrell
%% @doc Simple drop folder handler, monitors a directory and signals new files.
%%
%% Flow:
%% 1. a user uploads/moves a file to the drop folder directory
%% 2. Drop folder handler sees the file, moves it so a safe place, and notifies the file handler of it existance.
%% @end

%% Copyright 2009-2026 Marc Worrell
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%%     http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.

-module(z_dropbox).
-author("Marc Worrell <marc@worrell.nl>").
-behaviour(gen_server).

%% gen_server exports
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
-export([start_link/1]).

%% interface functions
-export([
    dropbox_dir/1,
    dropbox_processing_dir/1,
    dropbox_unhandled_dir/1,
    dropbox_handled_dir/1,
    scan/1
]).

-include_lib("kernel/include/file.hrl").
-include_lib("zotonic.hrl").

-record(state, {
    dropbox_dir :: binary(),
    processing_dir :: binary(),
    unhandled_dir :: binary(),
    handled_dir :: binary(),
    min_age :: integer(),
    max_age :: integer(),
    site :: atom(),
    context :: z:context()
}).

% Before a file is processed it must be at least this many seconds old.
% This ensures that files that are still written are not partially moved
% to the processing directory.
-define(FILE_MIN_AGE, 2).

% After 10 hours, all files in the processing dir are moved to unhandled.
-define(FILE_MAX_AGE, 36000).

% After 14 days, all files in the handled dir are deleted.
-define(HANDLED_MAX_AGE, 14*24*3600).

% Period in msec between scans of the dropbox dir.
-define(SCAN_INTERVAL, 10000).


%%====================================================================
%% API
%%====================================================================
-spec start_link(term()) -> {ok, pid()} | ignore | {error, term()}.
%% @doc Starts the drop folder server
start_link(Site) ->
    Name = z_utils:name_for_site(?MODULE, Site),
    gen_server:start_link({local, Name}, ?MODULE, Site, []).


%% @doc Perform a scan of the drop folder, periodically called by a timer and by mod_admin after
%% a file has been uploaded to the dropbox.
-spec scan(Context) -> ok when
    Context :: z:context().
scan(Context) ->
    gen_server:cast(Context#context.dropbox_server, scan).


%% @doc Directory used by dropbox to scan for new files. Drop a files here to let the
%% dropbox pick it up.
dropbox_dir(Context) ->
    DefaultDropBoxDir = z_path:files_subdir_ensure(<<"dropbox">>, Context),
    z_string:trim_right(config(dropbox_dir, Context, DefaultDropBoxDir), $/).

%% @doc Directory used by dropbox to contain files to be processed.
dropbox_processing_dir(Context) ->
    DefaultProcessingDir = z_path:files_subdir_ensure(<<"processing">>, Context),
    z_string:trim_right(config(dropbox_processing_dir, Context, DefaultProcessingDir), $/).

%% @doc After a file is processed a module can move it to the handled directory.
dropbox_handled_dir(Context) ->
    DefaultHandledDir = z_path:files_subdir_ensure(<<"handled">>, Context),
    z_string:trim_right(config(dropbox_handled_dir, Context, DefaultHandledDir),  $/).

%% @doc If a file could not be process it is moved to the unhandled directory.
dropbox_unhandled_dir(Context) ->
    DefaultUnhandledDir = z_path:files_subdir_ensure(<<"unhandled">>, Context),
    z_string:trim_right(config(dropbox_unhandled_dir,  Context, DefaultUnhandledDir),  $/).

%%====================================================================
%% gen_server callbacks
%%====================================================================

-spec init(term()) -> {ok, term()} | {ok, term(), timeout() | hibernate} | ignore | {stop, term()}.
%%                     {ok, State, Timeout} |
%%                     ignore               |
%%                     {stop, Reason}
%% @doc Initiates the server.  Options are: dropbox_dir, processing_dir, unhandled_dir, interval, max_age and min_age
init(Site) ->
    logger:set_process_metadata(#{
        site => Site,
        module => ?MODULE
    }),
    Context = z_context:new(Site),
    State = #state{
        dropbox_dir = dropbox_dir(Context),
        processing_dir = dropbox_processing_dir(Context),
        unhandled_dir = dropbox_unhandled_dir(Context),
        handled_dir = dropbox_handled_dir(Context),
        min_age = z_convert:to_integer(config(dropbox_min_age, Context, ?FILE_MIN_AGE)),
        max_age = z_convert:to_integer(config(dropbox_max_age, Context, ?FILE_MAX_AGE)),
        site = Site,
        context = Context
    },
    Interval = z_convert:to_integer(config(dropbox_interval, Context, ?SCAN_INTERVAL)),
    timer:apply_interval(Interval, ?MODULE, scan, [Context]),
    gen_server:cast(self(), cleanup),
    {ok, State}.

%%                                      {reply, Reply, State, Timeout} |
%%                                      {noreply, State} |
%%                                      {noreply, State, Timeout} |
%%                                      {stop, Reason, Reply, State} |
%%                                      {stop, Reason, State}
%% @doc Trap unknown calls
handle_call(Message, _From, State) ->
    {stop, {unknown_call, Message}, State}.


-spec handle_cast(term(), term()) -> {noreply, term()} | {noreply, term(), timeout() | hibernate} | {stop, term(), term()}.
%%                                  {noreply, State, Timeout} |
%%                                  {stop, eason, State}
%% @doc Scan the drop folder, broadcast found files.
handle_cast(scan, State) ->
    do_scan(State),
    z_utils:flush_message({'$gen_cast', scan}),
    {noreply, State};

% Move all files in the processing directory to the unhandled directory
handle_cast(cleanup, #state{ processing_dir = ProcDir, unhandled_dir = UnDir } = State) ->
    lists:foreach(fun(F) ->
                       move_file(ProcDir, F, true, UnDir)
                  end,
                  scan_directory(ProcDir)),
    {noreply, State};

%% @doc Trap unknown casts
handle_cast(Message, State) ->
    {stop, {unknown_cast, Message}, State}.


-spec handle_info(term(), term()) -> {noreply, term()} | {noreply, term(), timeout() | hibernate} | {stop, term(), term()}.
%%                                       {noreply, State, Timeout} |
%%                                       {stop, Reason, State}
%% @doc Handling all non call/cast messages
handle_info(_Info, State) ->
    {noreply, State}.


-spec terminate(term(), term()) -> ok.
%% @doc This function is called by a gen_server when it is about to
%% terminate. It should be the opposite of Module:init/1 and do any necessary
%% cleaning up. When it returns, the gen_server terminates with Reason.
%% The return value is ignored.
terminate(_Reason, _State) ->
    ok.

-spec code_change(term(), term(), term()) -> {ok, term()}.
%% @doc Convert process state when code is changed
code_change(_OldVsn, State, _Extra) ->
    {ok, State}.


%%====================================================================
%% support functions
%%====================================================================

config(Key, Context, Default) ->
    case m_site:get(Key, Context) of
        undefined -> Default;
        V -> V
    end.

%% @doc Perform a scan of the drop folder, broadcast all to be processed files.
-spec do_scan( #state{} ) -> ok.
do_scan(State) ->
    #state{
        processing_dir = ProcDir,
        dropbox_dir = DropDir,
        unhandled_dir = UnhandledDir,
        handled_dir = HandledDir,
        min_age = MinAge,
        max_age = MaxAge
    } = State,

    % Cleanup handled dir.
    HandledFiles = scan_directory(HandledDir),
    {_,ToRemoveHandled} = lists:foldl(fun(F, Acc) -> max_age_split(F, ?HANDLED_MAX_AGE, Acc) end,
                                       {[],[]},
                                       HandledFiles),
    lists:foreach(fun(F) -> file:delete(F) end, ToRemoveHandled),

    % Move all old files in the processing directory to the unhandled directory
    ProcFiles = scan_directory(ProcDir),
    {ToProcess,ToRemove} = lists:foldl(fun(F, Acc) -> max_age_split(F, MaxAge, Acc) end,
                                       {[],[]},
                                       ProcFiles),
    lists:foreach(fun(F) -> move_file(ProcDir, F, true, UnhandledDir) end, ToRemove),

    % Move all new drop folder files to the processing directory, touch them to ensure
    % that we move the files after set periods to handled or unhandled.
    AllDropFiles  = scan_directory(DropDir),
    SafeDropFiles = lists:foldl(fun(F, Acc)-> min_age_check(F, MinAge, Acc) end,
                                [],
                                AllDropFiles),
    Moved = lists:map(fun(F) ->
                          set_modified(F),
                          {F, move_file(DropDir, F, false, ProcDir)}
                      end,
                      SafeDropFiles),
    ToProcess1 = lists:foldl(   fun
                                    ({_, {ok, File}}, Acc) ->
                                        [File|Acc];
                                    ({F, {error, Reason}}, Acc) ->
                                        ?LOG_WARNING(#{
                                            text => <<"z_dropbox: Failed to move file">>,
                                            filename => F,
                                            to_dir => ProcDir,
                                            result => error,
                                            reason => Reason
                                        }),
                                        Acc
                                end,
                                ToProcess,
                                Moved),
    lists:foreach(
        fun(File) ->
            File1 = unicode:characters_to_binary(File),
            Basename = unicode:characters_to_binary(filename:basename(File)),
            case z_notifier:first(#dropbox_file{
                    filename = File1,
                    basename = Basename
                }, State#state.context)
            of
                ok ->
                    % Move the file to the handled directory.
                    ?LOG_INFO(#{
                        in => zotonic_core,
                        text => <<"Drop folder file has been processed, moved to handled">>,
                        result => ok,
                        file => File1,
                        basename => Basename,
                        processing_status => ok
                    }),
                    move_file(ProcDir, File1, true, HandledDir);
                {ok, ProcessingStatus} ->
                    % Leave the file, assume the module is still processing it.
                    ?LOG_INFO(#{
                        in => zotonic_core,
                        text => <<"Drop folder file is being processed, leaving in processing">>,
                        result => ok,
                        file => File1,
                        basename => Basename,
                        processing_status => ProcessingStatus
                    }),
                    ok;
                {error, Reason} ->
                    % Leave the file, assume the module is still processing it.
                    ?LOG_ERROR(#{
                        in => zotonic_core,
                        text => <<"Drop folder file was not handled by modules, moved to unhandled">>,
                        result => error,
                        reason => Reason,
                        file => File1,
                        basename => Basename
                    }),
                    move_file(ProcDir, File1, true, UnhandledDir);
                undefined ->
                    ?LOG_WARNING(#{
                        in => zotonic_core,
                        text => <<"Drop folder file was not handled by modules, moved to unhandled">>,
                        result => error,
                        reason => no_handler,
                        file => File1,
                        basename => Basename
                    }),
                    move_file(ProcDir, File1, true, UnhandledDir)
            end
        end,
        ToProcess1).

%% @doc Scan a directory, return list of regular files that do not start with a '.' anywhere in
%% their path. Do allow '..', as that might be part of the dropbox directory configuration.
scan_directory(Dir) ->
    Fs = filelib:fold_files(unicode:characters_to_list(Dir), "", true, fun(F,Acc) -> append_file(F, Acc) end, []),
    [ unicode:characters_to_binary(F) || F <- Fs ].

%% @doc Check if this is a file we are interested in, should not be part of a .git or other '.' directory
%% The file must also be a regular file, skip directories.
-spec append_file( file:filename_all(), list( file:filename_all() ) ) -> list( file:filename_all() ).
append_file(Filename, Acc) ->
    Parts = filename:split(Filename),
    case lists:any(fun is_dotfile/1, Parts) of
        true -> Acc;
        false -> [ Filename | Acc ]
    end.

is_dotfile(<<"..">>) -> false;
is_dotfile(<<".", _/binary>>) -> true;
is_dotfile("..") -> false;
is_dotfile("." ++ _) -> true;
is_dotfile(_) -> false.


min_age_check(File, MinAge, Acc) ->
    Mod     = filelib:last_modified(File),
    ModSecs = calendar:datetime_to_gregorian_seconds(Mod),
    Now     = calendar:local_time(),
    NowSecs = calendar:datetime_to_gregorian_seconds(Now),
    case NowSecs - ModSecs > MinAge of
        true -> [File|Acc];
        false -> Acc
    end.

max_age_split(File, MaxAge, {AccNew, AccOld}) ->
    Mod     = filelib:last_modified(File),
    ModSecs = calendar:datetime_to_gregorian_seconds(Mod),
    Now     = calendar:local_time(),
    NowSecs = calendar:datetime_to_gregorian_seconds(Now),
    case NowSecs - ModSecs > MaxAge of
        true ->  {AccNew,        [File|AccOld]};
        false -> {[File|AccNew], AccOld}
    end.


%% @doc Move a file relative to one directory to another directory. If the
%% target file exists then it is deleted before the file is moved there.
%% The file is touched after being moved, so that the file routines can check
%% the age of the file for later cleanup.
move_file(BaseDir, File, DeleteTarget, ToDir) ->
    case filelib:is_regular(File) of
        true ->
            Rel    = rel_file(BaseDir, File),
            Target = filename:join(ToDir,Rel),
            case filelib:is_dir(Target) of
                true -> file:del_dir(Target);
                false -> ok
            end,
            case DeleteTarget of
                true -> file:delete(Target);
                false -> ok
            end,
            case filelib:is_regular(Target) of
                false ->
                    case z_filelib:ensure_dir(Target) of
                        ok ->
                            case z_filelib:rename(File,Target) of
                                ok -> {ok, Target};
                                Error -> Error
                            end;
                        Error ->
                            Error
                    end;
                true ->
                    {error, eexist}
            end;
        false ->
            {error, enoent}
    end.

set_modified(Filename) ->
    case file:read_file_info(Filename, [ {time, universal} ]) of
        {ok, FInfo} ->
            FInfo1 = FInfo#file_info{ mtime = calendar:universal_time() },
            file:write_file_info(Filename, FInfo1, [ {time, universal} ]);
        {error, _} = Error ->
            Error
    end.

%% @doc Return the relative path of the file to a BaseDir
rel_file(BaseDir, File) ->
    Size = size(BaseDir),
    case binary:longest_common_prefix([ BaseDir, File ]) of
        Size ->
            <<_:Size/binary, _Sep, Rest/binary>> = File,
            Rest;
        _ ->
            filename:basename(File)
    end.