%% Licensed under the Apache License, Version 2.0 (the "License"); you may
%% not use this file except in compliance with the License. You may obtain
%% a copy of the License at <http://www.apache.org/licenses/LICENSE-2.0>
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% Alternatively, you may use this file under the terms of the GNU Lesser
%% General Public License (the "LGPL") as published by the Free Software
%% Foundation; either version 2.1, or (at your option) any later version.
%% If you wish to allow use of your version of this file only under the
%% terms of the LGPL, you should delete the provisions above and replace
%% them with the notice and other provisions required by the LGPL; see
%% <http://www.gnu.org/licenses/>. If you do not delete the provisions
%% above, a recipient may use your version of this file under the terms of
%% either the Apache License or the LGPL.
%%
%% @author Richard Carlsson <carlsson.richard@gmail.com>
%% @copyright 2006-2009 Richard Carlsson
%% @doc Erlang file monitoring service
%%
%% The behaviour of this service is inspired by the open source FAM
%% daemon ([http://oss.sgi.com/projects/fam/]). It allows file system
%% paths to be monitored, so that a message will be sent to the client
%% process whenever a status change is detected. Currently, the only
%% supported method of detection is by regular polling by the server.
%% While it is not optimal, polling has less overhead than might be
%% expected, and is portable across platforms. The polling interval can
%% be adjusted; by default the server polls all monitored paths every 5
%% seconds. Recursive (automatic) monitoring is supported. The server
%% keeps track of its client processes, and removes all their monitors
%% if they should die.
%%
%% == Event messages ==
%%
%% When a new monitor is set up, or a change is detected, an event
%% message is sent to the client. These have the following general form:
%% <pre>{@type @{file_monitor, Ref::monitor(), Event@}}</pre>
%% where `Ref' is the monitor reference returned when the monitor was
%% set up, and `Event' is one of the following:
%% <ul>
%% <li>{@type @{found, Path::binary(), Type, Info::#file_info@{@},
%% Entries::[{added|deleted, Name::binary()@}]@}}</li>
%% <li>{@type @{changed, Path::binary(), Type, Info::#file_info@{@},
%% Entries::[{added|deleted, Name::binary()@}]@}}</li>
%% <li>{@type @{error, Path::binary(), Type, PosixError::atom()@}}</li>
%% </ul>
%% where `Path' is the watched path (as a binary), `Type' is the type of
%% monitoring being performed (either `file' or `directory'), `Info' is
%% a `file_info' record as defined in `kernel/include/file.hrl', and
%% `Entries' is a list of tuples `{added, binary()}' and `{deleted,
%% binary()}' describing changes to the directory entries if `Type' is
%% `directory', otherwise this is always the empty list. For a `found'
%% event, all entries are `{added, Name}'.
%%
%% A `found' event is sent when a monitor is initially set up, if the
%% path can be read. After that, whenever a change in status is
%% detected, a `changed' event is sent. If the file does not exist or
%% could for some other reason not be accessed, an `error' event is sent
%% (both initially and for subsequent changes). In other words, the
%% first event for a path is always either `found' or `error', and later
%% events are either `changed' or `error'.
%%
%% === Detection of file type changes ===
%%
%% If the object found at a path changes type in the interval between
%% two polls, for example if a directory is replaced by a file with the
%% same name, or vice versa, the file monitor server will detect this
%% and dispatch an `enoent' error event before the new status event. A
%% client can thus rely on always seeing the old file disappear before
%% any change that reports a different file type.
%%
%% == Monitoring types ==
%%
%% There are two ways in which a path can be monitored: as a `file',
%% meaning that we are interested only in the object found at that path,
%% or as a `directory', meaning that we expect the path to point to a
%% directory, and we are also interested in the list of entries of that
%% directory.
%%
%% If a path is monitored as a directory, and the object at the path
%% exists but is not a directory, an `enotdir' error event will be
%% generated. An existing directory can however both be monitored as a
%% directory and as a file - the difference is that in the latter case,
%% the reported list of entries will always be empty.
%%
%% == Automatic (recursive) monitoring ==
%%
%% Automatic monitoring (automonitoring for short) can be used to watch
%% a single file of any type, or a whole directory tree. The monitoring
%% type (`file' or `directory') used for any path is based on the actual
%% type of object found at the path (`directory' if the object is a
%% readable directory, and `file' otherwise). If the object is replaced
%% by another of different type, the monitoring type will change
%% automatically.
%%
%% When a directory becomes automonitored, all of its entries will also
%% be automatically monitored, recursively. As entries are created or
%% deleted in an automonitored directory, they will be dynamically added
%% or removed, respectively, from being monitored. The root path used to
%% create the automonitor will however always remain monitored (even if
%% the object temporarily or permanently disappears) until the server is
%% told to delete the monitor.
%%
%% The event messages sent to the client are the same as if manual
%% monitoring was done. A newly discovered path will be reported by a
%% `found' (or possibly, by an `error' event), and subsequent changes on
%% that path are reported by `changed' and `error' events. If the
%% monitoring type is changed, a new `found' event is sent, and so on.
-module(zotonic_filewatcher_monitor).
-behaviour(gen_server).
-export([monitor_file/1, monitor_file/2, monitor_file/3, monitor_dir/1,
monitor_dir/2, monitor_dir/3, automonitor/1, automonitor/2,
automonitor/3, demonitor/1, demonitor/2, demonitor_file/2,
demonitor_file/3, demonitor_dir/2, demonitor_dir/3,
get_interval/0, get_interval/1, set_interval/1, set_interval/2,
normalize_path/1]).
-export([start/0, start/1, start/2, start_link/0, start_link/1,
start_link/2, stop/0, stop/1]).
-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
code_change/3, terminate/2]).
-compile({no_auto_import,[monitor/2]}).
-compile({no_auto_import,[demonitor/2]}).
-include_lib("kernel/include/file.hrl").
%% NOTE: Monitored paths should be absolute, but this is not checked.
%%
%% We never rewrite the paths, e.g. from relative to absolute, but we
%% convert every path into a binary internally, for the sake of
%% comparisons, and return it to the caller for reference.
%%
%% @type filename() = binary() | atom() | [char() | filename()]. This is
%% an "extended IO-list", that allows atoms as well as binaries to occur
%% either on their own or embedded in a list or deep list. The intent of
%% this is to accept any file name that can be used by the standard
%% library module `file', as well as any normal IO-list, and any list
%% that is formed by combining such fragments.
%%
%% @type options() = [term()]. A list of options.
%%
%% @type server_ref() = pid() | atom() | {Node::atom(), atom()} |
%% {global, atom()}. A reference to a running server. See {@link
%% //stdlib/gen_server:call/3} for more information.
-define(DEFAULT_INTERVAL, 5000). % change with option interval
-define(MIN_INTERVAL, 100).
-define(TIME_TO_STABLE, 1100). % (timestamps have second resolution)
-define(SERVER, ?MODULE).
-define(MSGTAG, ?SERVER).
%% % @type object() = {file|directory, filename()}
%% @type monitor() = reference(). A monitor reference.
-record(state, {poll=true, % boolean(), false if polling is disabled
interval, % polling interval (milliseconds)
files, % map: file path -> #entry{}
dirs, % map: directory path -> #entry{}
autodirs, % map: directory path -> monitor() -> entries
refs, % map: monitor() -> #monitor_info{}
clients % map: client Pid -> #client_info{}
}).
-record(entry, {info = undefined, % #file_info{} or posix atom
dir = [], % directory entries (if any)
stable = 0, % integer(), millis until dir stable
monitors = sets:new() % set(monitor())
}).
-record(client_info, {monitor, % erlang:monitor/2 reference
refs % set(monitor()); monitors owned by client
}).
-record(monitor_info, {pid, % client Pid
auto, % boolean(), true for an automonitor
objects % set(object())
}).
%%
%% User interface
%%
%% @spec (filename()) ->
%% {ok, monitor(), binary()} | {error, not_owner | automonitor}
%% @equiv monitor_file(Path, [])
monitor_file(Path) ->
monitor_file(Path, []).
%% @spec (filename(), options()) ->
%% {ok, monitor(), binary()} | {error, not_owner | automonitor}
%% @equiv monitor_file(file_monitor, Path, Opts)
monitor_file(Path, Opts) ->
monitor_file(?SERVER, Path, Opts).
%% @spec (server_ref(), filename(), options()) ->
%% {ok, monitor(), binary()} | {error, not_owner | automonitor}
%% @doc Monitors the specified file path. Returns the monitor reference
%% as well as the monitored path as a binary.
%%
%% Options:
%% <ul>
%% <li>{@type @{monitor, monitor()@}}: specifies a reference for
%% identifying the monitor to which the path should be added. The
%% monitor need not already exist, but if it does, only the same
%% process is allowed to add paths to it, and paths may not be added
%% manually to an automonitor.</li>
%% </ul>
monitor_file(Server, Path, Opts) ->
monitor(Server, Path, Opts, file).
%% @spec (filename()) ->
%% {ok, monitor(), binary()} | {error, not_owner | automonitor}
%% @equiv monitor_dir(Path, [])
monitor_dir(Path) ->
monitor_dir(Path, []).
%% @spec (filename(), options()) ->
%% {ok, monitor(), binary()} | {error, not_owner | automonitor}
%% @equiv monitor_dir(file_monitor, Path, Opts)
monitor_dir(Path, Opts) ->
monitor_dir(?SERVER, Path, Opts).
%% @spec (server_ref(), filename(), options()) ->
%% {ok, monitor(), binary()} | {error, not_owner | automonitor}
%% @doc Monitors the specified directory path. Returns the monitor
%% reference as well as the monitored path as a binary.
%%
%% Options: see {@link monitor_file/3}.
monitor_dir(Server, Path, Opts) ->
monitor(Server, Path, Opts, directory).
%% not exported
monitor(Server, Path, Opts, Type) ->
FlatPath = normalize_path(Path),
Ref = case proplists:get_value(monitor, Opts) of
R when is_reference(R) ; R =:= undefined -> R;
_ -> erlang:error(badarg)
end,
Cmd = {monitor, self(), {Type, FlatPath}, Ref},
case gen_server:call(Server, Cmd) of
{ok, Ref1} -> {ok, Ref1, FlatPath};
{error, Reason} -> {error, Reason}
end.
%% @spec (filename()) -> {ok, monitor(), binary()}
%% @equiv automonitor(Path, [])
automonitor(Path) ->
automonitor(Path, []).
%% @spec (filename(), options()) -> {ok, monitor(), binary()}
%% @equiv automonitor(file_monitor, Path, Opts)
automonitor(Path, Opts) ->
automonitor(?SERVER, Path, Opts).
%% @spec (server_ref(), filename(), options()) -> {ok, monitor(), binary()}
%% @doc Automonitors the specified path. Returns the monitor reference as
%% well as the monitored path as a binary.
%%
%% Options: none at present.
automonitor(Server, Path, _Opts) ->
FlatPath = normalize_path(Path),
{ok, Ref} = gen_server:call(Server, {automonitor, self(), FlatPath}),
{ok, Ref, FlatPath}.
%% @spec (monitor()) -> ok | {error, not_owner}
%% @equiv demonitor(file_monitor, Ref)
demonitor(Ref) ->
demonitor(?SERVER, Ref).
%% @spec (server_ref(), monitor()) -> ok | {error, not_owner}
%% @doc Deletes the specified monitor. This can only be done by the
%% process that created the monitor.
demonitor(Server, Ref) when is_reference(Ref) ->
ok = gen_server:call(Server, {demonitor, self(), Ref}).
%% @spec (filename(), monitor()) -> ok | {error, not_owner}
%% @equiv demonitor_file(file_monitor, Path, Ref)
demonitor_file(Path, Ref) ->
demonitor_file(?SERVER, Path, Ref).
%% @spec (server_ref(), filename(), monitor()) -> ok | {error, not_owner}
%% @doc Removes the file path from the specified monitor. This can only
%% be done by the process that created the monitor.
demonitor_file(Server, Path, Ref) ->
demonitor(Server, Path, Ref, file).
%% @spec (filename(), monitor()) -> ok | {error, not_owner}
%% @equiv demonitor_dir(file_monitor, Path, Ref)
demonitor_dir(Path, Ref) ->
demonitor_dir(?SERVER, Path, Ref).
%% @spec (server_ref(), filename(), monitor()) -> ok | {error, not_owner}
%% @doc Removes the directory path from the specified monitor. This can
%% only be done by the process that created the monitor.
demonitor_dir(Server, Path, Ref) ->
demonitor(Server, Path, Ref, directory).
%% not exported
demonitor(Server, Path, Ref, Type) when is_reference(Ref) ->
FlatPath = normalize_path(Path),
ok = gen_server:call(Server, {demonitor, self(), {Type, FlatPath}, Ref}).
%% @spec () -> integer()
%% @equiv get_interval(file_monitor)
get_interval() ->
get_interval(?SERVER).
%% @spec (server_ref()) -> integer()
%% @doc Returns the current polling interval.
get_interval(Server) ->
gen_server:call(Server, get_interval).
%% @spec (integer()) -> ok
%% @equiv set_interval(file_monitor, Time)
set_interval(Time) ->
set_interval(?SERVER, Time).
%% @spec (server_ref(), integer()) -> ok
%% @doc Sets the polling interval. Units are in milliseconds.
set_interval(Server, Time) when is_integer(Time) ->
gen_server:call(Server, {set_interval, Time}).
%% @spec () -> {ok, ServerPid::pid()} | ignore | {error, any()}
%% @equiv start([])
start() ->
start([]).
%% @spec (options()) -> {ok, ServerPid::pid()} | ignore | {error, any()}
%% @equiv start({local, file_monitor}, Options)
start(Options) ->
start({local, ?SERVER}, Options).
%% @spec ({local, atom()} | {global, atom()} | undefined, options()) ->
%% {ok, ServerPid::pid()} | ignore | {error, any()}
%% @doc Starts the server and registers it using the specified name.
%% If the name is `undefined', the server will not be registered. See
%% {@link //stdlib/gen_server:start_link/4} for details about the return
%% value.
%%
%% Options:
%% <ul>
%% <li>{@type {interval, Milliseconds::integer()@}}</li>
%% </ul>
start(undefined, Options) ->
gen_server:start(?MODULE, Options, []);
start(Name, Options) ->
gen_server:start(Name, ?MODULE, Options, []).
%% @spec () -> {ok, ServerPid::pid()} | ignore | {error, any()}
%% @equiv start_link([])
start_link() ->
start_link([]).
%% @spec (options()) -> {ok, ServerPid::pid()} | ignore | {error, any()}
%% @equiv start_link({local, file_monitor}, Options)
start_link(Options) ->
start_link({local, ?SERVER}, Options).
%% @spec ({local, atom()} | {global, atom()} | undefined, options()) ->
%% {ok, ServerPid::pid()} | ignore | {error, any()}
%% @doc Starts the server, links it to the current process, and
%% registers it using the specified name. If the name is `undefined',
%% the server will not be registered. See {@link
%% //stdlib/gen_server:start_link/4} for details about the return value.
%%
%% Options: see {@link start/2}.
start_link(undefined, Options) ->
gen_server:start_link(?MODULE, Options, []);
start_link(Name, Options) ->
gen_server:start_link(Name, ?MODULE, Options, []).
%% @spec () -> ok
%% @equiv stop(file_monitor)
stop() ->
stop(?SERVER).
%% @spec (server_ref()) -> ok
%% @doc Stops the specified server.
stop(Server) ->
gen_server:call(Server, stop),
ok.
%%
%% gen_server callbacks
%%
%% @private
init(Options) ->
Time = safe_interval(proplists:get_value(interval, Options)),
St = #state{interval = Time,
files = dict:new(),
dirs = dict:new(),
autodirs = dict:new(),
clients = dict:new(),
refs = dict:new()},
set_timer(St),
{ok, St}.
%% Note that we create all references on the server side, to be
%% consistent (and in case it will matter, to ensure that the server
%% mostly uses references local to its own node).
%% @private
handle_call({monitor, Pid, Object, undefined}, From, St) ->
handle_call({monitor, Pid, Object, make_ref()}, From, St);
handle_call({monitor, Pid, Object, Ref}, _From, St)
when is_pid(Pid), is_reference(Ref) ->
try add_monitor(Object, Pid, Ref, St) of
St1 -> {reply, {ok, Ref}, register_client(Pid, Ref, St1)}
catch
not_owner ->
{reply, {error, not_owner}, St};
automonitor ->
{reply, {error, automonitor}, St}
end;
handle_call({demonitor, Pid, Ref}, _From, St) when is_reference(Ref) ->
try delete_monitor(Pid, Ref, St) of
St1 -> {reply, ok, St1}
catch
not_owner ->
{reply, {error, not_owner}, St}
end;
handle_call({demonitor, Pid, Object, Ref}, _From, St) when is_reference(Ref) ->
try demonitor_path(Pid, Ref, Object, St) of
St1 -> {reply, ok, St1}
catch
not_owner ->
{reply, {error, not_owner}, St}
end;
handle_call({automonitor, Pid, Path}, _From, St) when is_pid(Pid) ->
%% it shouldn't be possible to get exceptions due to wrong owner or
%% non-automonitor type here, since we always create a new reference
Ref = make_ref(),
St1 = unsafe_automonitor_path(Path, Pid, Ref, St),
{reply, {ok, Ref}, register_client(Pid, Ref, St1)};
handle_call(get_interval, _From, St) ->
{reply, St#state.interval, St};
handle_call({set_interval, Time}, _From, St) ->
{reply, ok, St#state{interval=safe_interval(Time)}};
handle_call(stop, _From, St) ->
{stop, normal, ok, St}.
%% @private
handle_cast(_, St) ->
{noreply, St}.
%% @private
handle_info({?MSGTAG, Ref, Event}, St) ->
%% auto-monitoring event to self
case dict:find(Ref, St#state.refs) of
{ok, #monitor_info{pid=Pid}} ->
{noreply, autoevent(Event, Pid, Ref, St)};
error ->
%% could happen if this event was already in the queue when
%% we processed the deletion of the same reference, so just
%% ignore the message
{noreply, St}
end;
handle_info(poll, St) ->
{noreply, set_timer(poll(St))};
handle_info(enable_poll, St) ->
{noreply, St#state{poll=true}};
handle_info({'DOWN', _Ref, process, Pid, _Info}, St) ->
{noreply, remove_client(Pid, St)};
handle_info(_, St) ->
{noreply, St}.
%% @private
code_change(_OldVsn, St, _Extra) ->
{ok, St}.
%% @private
terminate(_Reason, _St) ->
ok.
%%
%% Internal functions
%%
%% We allow paths as binaries, atoms, or "extended io-lists" that may
%% contain atoms as well as binaries. This is flattened into a single
%% binary (currently assuming that the input uses an 8-bit encoding).
%% A single character is not a valid path; it must be within a list.
%% @spec (filename()) -> binary()
%% @doc Flattens the given path to a single binary.
normalize_path(Path) when is_binary(Path) -> Path;
normalize_path(Path) ->
list_to_binary(flatten_onto(Path, [])).
flatten_onto([X | Xs], As) when is_integer(X), X >= 0, X =< 255 ->
[X | flatten_onto(Xs, As)];
flatten_onto([X | Xs], As) ->
flatten_onto(X, flatten_onto(Xs, As));
flatten_onto([], As) ->
As;
flatten_onto(X, As) when is_atom(X) ->
atom_to_list(X) ++ As;
flatten_onto(X, As) when is_binary(X) ->
binary_to_list(X) ++ As;
flatten_onto(_, _) ->
erlang:error(badarg).
join_to_path(Path, File) when is_binary(Path), is_binary(File) ->
normalize_path(filename:join(binary_to_list(Path),
binary_to_list(File))).
safe_interval(N) when is_integer(N) ->
min(16#FFFFffff, max(N, ?MIN_INTERVAL));
safe_interval(_) -> ?DEFAULT_INTERVAL.
set_timer(St) ->
erlang:send_after(St#state.interval, self(), poll),
St.
%% Handling of auto-monitoring events
%%
%% - If a new entry of an automonitored directory is discovered, make it
%% too automonitored (this does recursive monitoring by definition).
%%
%% - If an entry is deleted from an automonitored directory, remove any
%% automonitor from it and its subdirectories recursively. Note that
%% by definition, this will never auto-remove the top automonitored
%% directory.
%%
%% - Because of the special status of the top directory (the path given
%% to automonitor), we do not allow the user to add/remove paths to an
%% existing automonitor or pass a user-specified reference.
%%
%% - Note that to be able to demonitor directories recursively, we must
%% track the automonitored directory entries for each directory and
%% monitor pair (since the directory itself can no longer be read,
%% there is no other way we can know which subentries to demonitor).
%%
%% - An automonitored non-directory that changes type to directory, or
%% vice versa, should should cause recreation of the monitor to match
%% the new type.
%%
%% - Errors on automonitored files are assumed to be intermittent, i.e.,
%% not even enoent should in itself cause demonitoring - that is done
%% only if the containing directory reports that the file is removed.
%%
%% - Errors on automonitored directories cause immediate demonitoring of
%% the entries of the directory, but not the directory itself.
autoevent({_Tag, Path, Type, #file_info{}=Info, _Files}, Pid, Ref, St)
when (((Type =:= file) and (Info#file_info.type =:= directory)) orelse
((Type =:= directory) and (Info#file_info.type =/= directory))) ->
%% monitor type mismatch detected
autoremonitor_path(Path, Pid, Ref, St);
autoevent({_Tag, Path, directory, #file_info{}=Info, Files}, Pid, Ref, St0)
when Info#file_info.type =:= directory ->
%% add/remove automonitoring to/from all added/deleted entries
lists:foldl(fun ({added, File}, St) ->
St1 = add_autodir_entry(Path, File, Ref, St),
automonitor_path(join_to_path(Path, File),
Pid, Ref, St1);
({deleted, File}, St) ->
St1 = remove_autodir_entry(Path, File, Ref, St),
autodemonitor_path(join_to_path(Path, File),
Pid, Ref, St1)
end,
St0, Files);
autoevent({error, Path, directory, enotdir}, Pid, Ref, St) ->
%% monitor type mismatch detected
autoremonitor_path(Path, Pid, Ref, St);
autoevent({error, Path, directory, _}, Pid, Ref, St) ->
%% only demonitor subdirectories/files
autodemonitor_dir_entries(Path, Pid, Ref, St);
autoevent(_Event, _Pid, _Ref, St) ->
St.
%% monitor type mismatch detected - recreate it to get correct type
autoremonitor_path(Path, Pid, Ref, St) ->
automonitor_path(Path, Pid, Ref, autodemonitor_path(Path, Pid, Ref, St)).
automonitor_path(Path, Pid, Ref, St) ->
%% Pid should be a known client, otherwise do nothing
case dict:is_key(Pid, St#state.clients) of
true ->
try unsafe_automonitor_path(Path, Pid, Ref, St)
catch
throw:_ -> St
end;
false ->
St
end.
%% see add_monitor for possible thrown exceptions
unsafe_automonitor_path(Path, Pid, Ref, St) ->
Object = case file:read_file_info(binary_to_list(Path)) of
{ok, #file_info{type=directory}} ->
{directory, Path};
_ ->
{file, Path} % also for errors
end,
add_automonitor(Object, Pid, Ref, St).
autodemonitor_path(Path, Pid, Ref, St0) ->
St1 = try demonitor_path(Pid, Ref, {file, Path}, St0)
catch
not_owner -> St0
end,
St2 = try demonitor_path(Pid, Ref, {directory, Path}, St1)
catch
not_owner -> St1
end,
autodemonitor_dir_entries(Path, Pid, Ref, St2).
autodemonitor_dir_entries(Path, Pid, Ref, St0) ->
Dirs0 = St0#state.autodirs,
case dict:find(Path, Dirs0) of
{ok, Map0} ->
case dict:find(Ref, Map0) of
{ok, Set} ->
Map = dict:erase(Ref, Map0),
%% purge empty entries to save space
Dirs = case dict:size(Map) > 0 of
true -> dict:store(Path, Map, Dirs0);
false -> dict:erase(Path, Dirs0)
end,
St1 = St0#state{autodirs = Dirs},
sets:fold(fun (File, St) ->
P = join_to_path(Path, File),
autodemonitor_path(P, Pid, Ref, St)
end,
St1, Set);
error ->
St0
end;
error ->
St0
end.
%% tracking subentries of automonitored directories, in order to enable
%% efficient recursive demonitoring
add_autodir_entry(Path, File, Ref, St) ->
Map = case dict:find(Path, St#state.autodirs) of
{ok, Map0} ->
Set = case dict:find(Ref, Map0) of
{ok, Entries} -> Entries;
error -> sets:new()
end,
dict:store(Ref, sets:add_element(File, Set), Map0);
error ->
dict:store(Ref, sets:add_element(File, sets:new()),
dict:new())
end,
St#state{autodirs = dict:store(Path, Map, St#state.autodirs)}.
remove_autodir_entry(Path, File, Ref, St) ->
Dirs0 = St#state.autodirs,
case dict:find(Path, Dirs0) of
{ok, Map0} ->
case dict:find(Ref, Map0) of
{ok, Set0} ->
%% purge empty entries to save space
Set = sets:del_element(File, Set0),
Map = case sets:size(Set) > 0 of
true -> dict:store(Ref, Set, Map0);
false -> dict:erase(Ref, Map0)
end,
Dirs = case dict:size(Map) > 0 of
true -> dict:store(Path, Map, Dirs0);
false -> dict:erase(Path, Dirs0)
end,
St#state{autodirs = Dirs};
error -> St
end;
error ->
St
end.
%% client monitoring (once a client, always a client - until death)
register_client(Pid, Ref, St) ->
Info = case dict:find(Pid, St#state.clients) of
{ok, OldInfo} -> OldInfo;
error ->
Monitor = erlang:monitor(process, Pid),
#client_info{monitor = Monitor, refs = sets:new()}
end,
Refs = sets:add_element(Ref, Info#client_info.refs),
St#state{clients = dict:store(Pid, Info#client_info{refs = Refs},
St#state.clients)}.
remove_client(Pid, St) ->
case dict:find(Pid, St#state.clients) of
{ok, #client_info{monitor = Monitor, refs = Refs}} ->
erlang:demonitor(Monitor, [flush]),
purge_client(Pid, Refs, St);
error ->
St
end.
purge_client(Pid, Refs, St0) ->
sets:fold(fun (Ref, St) ->
%% the Pid *should* be the owner here, so
%% a not_owner exception should not happen
delete_monitor(Pid, Ref, St)
end,
St0#state{clients = dict:erase(Pid, St0#state.clients)},
Refs).
%% Adding a new monitor; throws 'not_owner' if the monitor reference is
%% already registered for another Pid; throws 'automonitor' if the
%% reference is already registered with a different type.
add_monitor(Object, Pid, Ref, St) ->
add_monitor(Object, Pid, Ref, St, false).
add_automonitor(Object, Pid, Ref, St) ->
add_monitor(Object, Pid, Ref, St, true).
add_monitor(Object, Pid, Ref, St, Auto) ->
Info = case dict:find(Ref, St#state.refs) of
{ok, #monitor_info{pid = Pid, auto = Auto}=OldInfo} ->
OldInfo;
{ok, #monitor_info{pid = Pid}} ->
throw(automonitor);
{ok, #monitor_info{}} ->
throw(not_owner);
error ->
#monitor_info{pid = Pid, auto = Auto,
objects = sets:new()}
end,
NewObjects = sets:add_element(Object, Info#monitor_info.objects),
Refs = dict:store(Ref, Info#monitor_info{objects = NewObjects},
St#state.refs),
monitor_path(Object, Ref, St#state{refs = Refs}).
%% We must separate the namespaces for files and dirs; there may be
%% simultaneous file and directory monitors for the same path, and a
%% file may be deleted and replaced by a directory of the same name, or
%% vice versa. The client should know (more or less) if a path is
%% expected to refer to a file or a directory.
monitor_path({file, Path}, Ref, St) ->
St#state{files = monitor_path(Path, Ref, file, St#state.files, St)};
monitor_path({directory, Path}, Ref, St) ->
St#state{dirs = monitor_path(Path, Ref, directory, St#state.dirs, St)}.
%% Adding a new monitor forces an immediate poll of the path, such that
%% previous monitors only see any real change, while the new monitor
%% either gets {found, ...} or {error, ...}.
monitor_path(Path, Ref, Type, Dict, St) ->
Entry = case dict:find(Path, Dict) of
{ok, OldEntry} -> poll_file(Path, OldEntry, Type, St);
error -> new_entry(Path, Type, St)
end,
event(#entry{}, dummy_entry(Entry, Ref), Type, Path, St),
NewEntry = Entry#entry{monitors =
sets:add_element(Ref, Entry#entry.monitors)},
dict:store(Path, NewEntry, Dict).
dummy_entry(Entry, Ref) ->
Entry#entry{monitors = sets:add_element(Ref, sets:new())}.
new_entry(Path, Type, St) ->
refresh_entry(Path, #entry{monitors = sets:new()}, Type,
St#state.interval).
%% Deleting a monitor by reference; throws not_owner if the monitor
%% reference is owned by another Pid. The client_info entry may already
%% have been deleted if we come from purge_client().
delete_monitor(Pid, Ref, St0) ->
St1 = case dict:find(Pid, St0#state.clients) of
{ok, #client_info{refs = Refs}=I} ->
NewRefs = sets:del_element(Ref, Refs),
St0#state{clients =
dict:store(Pid, I#client_info{refs = NewRefs},
St0#state.clients)};
error ->
St0
end,
case dict:find(Ref, St1#state.refs) of
{ok, #monitor_info{pid = Pid, objects = Objects}} ->
sets:fold(fun (Object, St) ->
purge_monitor_path(Ref, Object, St)
end,
St1#state{refs = dict:erase(Ref, St1#state.refs)},
Objects);
{ok, #monitor_info{}} -> throw(not_owner);
error ->
St1
end.
%% Deleting a particular path from a monitor. Throws not_owner if the
%% monitor reference is owned by another Pid.
demonitor_path(Pid, Ref, Object, St) ->
case dict:find(Ref, St#state.refs) of
{ok, #monitor_info{pid = Pid, objects = Objects}=I} ->
St1 = purge_monitor_path(Ref, Object, St),
I1 = I#monitor_info{objects=sets:del_element(Object, Objects)},
St1#state{refs = dict:store(Ref, I1, St1#state.refs)};
{ok, #monitor_info{}} -> throw(not_owner);
error ->
St
end.
%% Deleting a particular monitor from a path.
purge_monitor_path(Ref, {file, Path}, St) ->
St#state{files = purge_monitor_path_1(Path, Ref, St#state.files)};
purge_monitor_path(Ref, {directory, Path}, St) ->
St#state{dirs = purge_monitor_path_1(Path, Ref, St#state.dirs)}.
purge_monitor_path_1(Path, Ref, Dict) ->
case dict:find(Path, Dict) of
{ok, Entry} ->
Monitors = sets:del_element(Ref, Entry#entry.monitors),
case sets:size(Monitors) > 0 of
true ->
dict:store(Path, Entry#entry{monitors = Monitors}, Dict);
false ->
dict:erase(Path, Dict)
end;
error ->
Dict
end.
%% Generating events upon state changes by comparing old and new states
%%
%% Event formats:
%% {found, Path, Type, #file_info{}, Files}
%% {changed, Path, Type, #file_info{}, Files}
%% {error, Path, Type, PosixAtom}
%%
%% Type is file or directory, as specified by the monitor type, not by
%% the actual type on disk. If Type is file, Files is always []. If Type
%% is directory, Files is a list of {added, FileName} and {deleted,
%% FileName}, where FileName is on basename form, i.e., without any
%% directory component.
%%
%% When a new monitor is installed for a path, an initial {found,...}
%% or {error,...} event will be sent to the monitor owner.
%%
%% Subsequent events will be either {changed,...} or {error,...}.
%%
%% The monitor reference is not included in the event descriptor itself,
%% but is part of the main message format; see cast/2.
%%
%% Note that we never compare directory entry lists here; if there might
%% have been changes, the timestamps should also be different - see
%% refresh_entry() below for more details.
event(#entry{info = Info, stable = Stable},
#entry{info = Info, stable = Stable}, _Type, _Path, _St) ->
ok; % no change in state
event(#entry{info = undefined}, #entry{info = NewInfo}=Entry,
Type, Path, St)
when not is_atom(NewInfo) ->
%% file or directory exists, for a fresh monitor
Diff = diff_lists(Entry#entry.dir, []),
cast({found, Path, Type, NewInfo, Diff}, Entry#entry.monitors, St);
event(OldEntry, #entry{info = NewInfo}=Entry, Type, Path, St)
when is_atom(NewInfo) ->
%% file or directory is not available
type_change_event(OldEntry#entry.info, NewInfo, Type, Path, Entry, St),
cast({error, Path, Type, NewInfo}, Entry#entry.monitors, St);
event(OldEntry, #entry{info = NewInfo}=Entry, Type, Path, St) ->
%% a file or directory has changed or become readable again after an error
type_change_event(OldEntry#entry.info, NewInfo, Type, Path, Entry, St),
Diff = diff_lists(Entry#entry.dir, OldEntry#entry.dir),
if Diff =:= [],
Entry#entry.stable =/= OldEntry#entry.stable,
NewInfo =:= OldEntry#entry.info ->
%% if only the time-to-stable has changed, we must not
%% broadcast an event unless there really was a change in
%% the directory list
ok;
true ->
cast({changed, Path, Type, NewInfo, Diff},
Entry#entry.monitors, St)
end.
%% sudden changes in file type cause an 'enoent' error report before the
%% new status, so that clients do not need to detect this themselves
type_change_event(#file_info{type = T}, #file_info{type = T}, _, _, _, _) ->
ok;
type_change_event(#file_info{}, #file_info{}, Type, Path, Entry, St) ->
cast_enoent(Type, Path, Entry, St);
type_change_event(#file_info{type = directory}, enotdir, Type, Path, Entry,
St) ->
cast_enoent(Type, Path, Entry, St);
type_change_event(enotdir, #file_info{type = directory}, Type, Path, Entry,
St) ->
cast_enoent(Type, Path, Entry, St);
type_change_event(_, _, _, _, _, _) ->
ok.
cast_enoent(Type, Path, Entry, St) ->
cast({error, Path, Type, enoent}, Entry#entry.monitors, St).
poll(#state{poll=false}=St) ->
St;
poll(#state{poll=true}=St) ->
Files = dict:map(fun (Path, Entry) ->
poll_file(Path, Entry, file, St)
end,
St#state.files),
Dirs = dict:map(fun (Path, Entry) ->
poll_file(Path, Entry, directory, St)
end,
St#state.dirs),
%% polling will now be disabled until all automonitoring events have
%% been processed:
self() ! enable_poll,
St#state{poll=false, files = Files, dirs = Dirs}.
poll_file(Path, Entry, Type, St) ->
NewEntry = refresh_entry(Path, Entry, Type, St#state.interval),
event(Entry, NewEntry, Type, Path, St),
NewEntry.
%% We want to minimize the polling cost, so we only list directories
%% when they have new timestamps, or are still considered "unstable".
%% This requires some explanation: Recall that timestamps have
%% whole-second resolution. If we listed a directory during the same
%% second that it was (or is still being) modified, it is possible that
%% we did not see all the additions/deletions that were made that second
%% - and in that case, we might not detect those changes until the
%% timestamp changes again (maybe never). (This really happens - in
%% particular in test suites...) But we cannot use the system clock for
%% comparisons, because the file system might be on another clock (or
%% another time zone setting), and read-access timestamps are also not
%% reliable (not consistently updated on all platforms and not at all on
%% some file systems). Therefore we do our own internal approximation of
%% passed time (#state.time), based on the refresh cycle.
%% Furthermore, we should make sure that the timestamps we use are
%% always from after the directory was last listed, so that we never
%% report an added entry that was actually created later than the
%% directory timestamp.
refresh_entry(Path, Entry, Type, Delta) when is_binary(Path) ->
refresh_entry_0(binary_to_list(Path), Entry, Type, Delta,
Entry#entry.info).
refresh_entry_0(Path, Entry, Type, Delta, OldInfo) ->
refresh_entry_1(Path, Entry, Type, Delta, OldInfo, false).
refresh_entry_1(Path, Entry, Type, Delta, OldInfo, Break) ->
NewInfo = get_file_info(Path),
case Type of
directory when not is_atom(NewInfo) ->
case NewInfo#file_info.type of
directory when is_atom(OldInfo)
; NewInfo#file_info.mtime =/= OldInfo#file_info.mtime ->
%% The info has changed, so we are forced to refresh
%% the directory listing. We set the time-to-stable,
%% and loop to ensure that the info is always later
%% than the listing. See below for more details.
refresh_entry_2(Path, Entry, Type, Delta, NewInfo,
?TIME_TO_STABLE);
directory when Entry#entry.stable > 0, not Break ->
%% If both the old and new timestamps exist and the
%% modification time has not changed, but the entry
%% is not yet stable (and we didn't loop just now),
%% we need to refresh the directory list again and
%% decrement the time-to-stable. Note that we test
%% *before* we decrement, which ensures that this
%% happens at least once after the initial listing
%% regardless of the value of Delta. (See the
%% discussion about timestamp resolution above.)
refresh_entry_2(Path, Entry, Type, Delta, NewInfo,
max(0, Entry#entry.stable - Delta));
directory ->
Entry#entry{info = NewInfo};
_ ->
%% attempting to monitor a non-directory as a
%% directory is reported as an 'enotdir' error
Entry#entry{info = enotdir, dir = [], stable = 0}
end;
_ ->
%% If we're not monitoring this path as a directory, or we
%% got an error, we don't care what kind of object it is,
%% but just track its status. To handle the case of an error
%% on a directory type monitor, we make sure to reset the
%% list of directory entries.
Entry#entry{info = NewInfo, dir = [], stable = 0}
end.
refresh_entry_2(Path, Entry, Type, Delta, Info, Stable) ->
%% refresh directory list, update time-to-stable, and loop to
%% re-read the info, but also ensure that we do not trigger the
%% stability rule again until the next poll
refresh_entry_1(Path,
Entry#entry{info = Info,
dir = list_dir(Path),
stable = Stable},
Type, Delta, Info, true).
%% We clear some fields of the file_info so that we only trigger on real
%% changes; see the //kernel/file.erl manual and file.hrl for details.
get_file_info(Path) when is_list(Path) ->
case file:read_file_info(Path) of
{ok, Info} ->
Info#file_info{access = undefined,
atime = undefined};
{error, Error} ->
Error % posix error code as atom
end.
%% Listing the members of a directory; note that it yields the empty
%% list if it fails - this is not the place for error detection.
%% mw 20151023: patch to ignore Zotonic "files" directories containing all uploaded
%% and preview files. This speeds up the scanning considerably.
list_dir("." ++ _) ->
[];
list_dir(Path) when is_list(Path) ->
Dirname = filename:dirname(Path),
Files = case file:list_dir(Path) of
{ok, Fs} ->
Fs1 = lists:filter(
fun(F) ->
not zotonic_filewatcher_handler:is_file_blocked(Dirname, F)
end,
Fs),
lists:map( fun normalize_path/1, Fs1 );
{error, _} -> []
end,
lists:sort(Files).
%% both lists must be sorted for this diff to work
diff_lists([F1 | Fs1], [F2 | _]=Fs2) when F1 < F2 ->
[{added, F1} | diff_lists(Fs1, Fs2)];
diff_lists([F1 | _]=Fs1, [F2 | Fs2]) when F1 > F2 ->
[{deleted, F2} | diff_lists(Fs1, Fs2)];
diff_lists([_ | Fs1], [_ | Fs2]) ->
diff_lists(Fs1, Fs2);
diff_lists([F | Fs1], Fs2) ->
[{added, F} | diff_lists(Fs1, Fs2)];
diff_lists(Fs1, [F | Fs2]) ->
[{deleted, F} | diff_lists(Fs1, Fs2)];
diff_lists([], []) ->
[].
%% Multicasting events to clients. The message has the form
%% {file_monitor, MonitorReference, Event}, where Event is described in
%% more detail above, and 'file_monitor' is the name of this module.
cast(Message, Monitors, St) ->
sets:fold(fun (Ref, Msg) ->
case dict:find(Ref, St#state.refs) of
{ok, #monitor_info{pid = Pid, auto = Auto}} ->
Pid ! {?MSGTAG, Ref, Msg},
case Auto of
true -> self() ! {?MSGTAG, Ref, Msg};
false -> ok
end
end,
Msg % note that this is a fold, not a map
end,
Message, Monitors).