# any implementations that deal with filesystem access should go in this file
defmodule LangChain.Retriever.FileSystemProvider do
@moduledoc """
A filesystem implementation of the LangChain.Retriever protocol.
"""
defstruct []
defimpl LangChain.Retriever do
@doc """
Retrieves relevant documents from the file system based on the provided query.
The query should be a map with the following keys:
* `:path` (required) - The path to the file or directory to start the search from.
* `:recursive` (optional, default: false) - If true, search for files in subdirectories as well.
* `:file_extensions` (optional, default: []) - A list of file extensions to include in the results. If empty, all file extensions are included.
* `:ignore_extensions` (optional, default: []) - A list of file extensions to exclude from the results.
Examples:
# Read the contents of a specific file
Retriever.get_relevant_documents(provider, %{path: "path/to/file.ex"})
# Read the contents of all files in a specific directory
Retriever.get_relevant_documents(provider, %{path: "path/to/directory"})
# Read the contents of all .ex files in a directory, including subdirectories
Retriever.get_relevant_documents(provider, %{path: "path/to/directory", recursive: true, file_extensions: [".ex"]})
# Read the contents of all files in a directory, excluding .js files
Retriever.get_relevant_documents(provider, %{path: "path/to/directory", ignore_extensions: [".js"]})
Returns a list of file contents or {:error, :invalid_path} if the provided path is invalid.
"""
def get_relevant_documents(_provider, %{path: path} = query) do
recursive = Map.get(query, :recursive, true)
file_extensions = Map.get(query, :file_extensions, [])
ignore_extensions = Map.get(query, :ignore_extensions, [])
cond do
File.regular?(path) ->
# If the path is a file, read its contents and return it as a string inside a list
process_file(path, file_extensions, ignore_extensions)
File.dir?(path) ->
# If the path is a directory, list its contents
process_directory(path, recursive, file_extensions, ignore_extensions)
true ->
# If the path is neither a file nor a directory, return an error tuple
{:error, :invalid_path}
end
end
defp process_file(path, file_extensions, ignore_extensions) do
if valid_file?(path, file_extensions, ignore_extensions) do
{:ok, file_contents} = File.read(path)
[file_contents]
else
[]
end
end
defp process_directory(path, recursive, file_extensions, ignore_extensions) do
{:ok, files} = File.ls(path)
Enum.reduce(files, [], fn file, acc ->
file_path = Path.join(path, file)
if File.dir?(file_path) and recursive do
acc ++ process_directory(file_path, recursive, file_extensions, ignore_extensions)
else
acc ++ process_file(file_path, file_extensions, ignore_extensions)
end
end)
end
defp valid_file?(path, file_extensions, ignore_extensions) do
extension = Path.extname(path)
(Enum.empty?(file_extensions) or Enum.member?(file_extensions, extension)) and
not Enum.member?(ignore_extensions, extension)
end
end
end
# # any implementations that deal with filesystem access should go in this file
# defmodule LangChain.Retriever.FileSystemProvider do
# @moduledoc """
# A filesystem implementation of the LangChain.Retriever protocol.
# Use this to read in files and folders from your local filesystem as strings
# so DocumentLoader can make machine-friendly vector embeddings out of them.
# """
# defstruct []
# defimpl LangChain.Retriever do
# def get_relevant_documents(_provider, path) do
# cond do
# File.regular?(path) ->
# # If the path is a file, read its contents and return it as a string inside a list
# {:ok, file_contents} = File.read(path)
# [file_contents]
# File.dir?(path) ->
# # If the path is a directory, list its contents
# {:ok, files} = File.ls(path)
# Enum.reduce(files, [], fn file, acc ->
# # For each file in the directory, read its contents and add it to the accumulator list
# file_path = Path.join(path, file)
# {:ok, file_contents} = File.read(file_path)
# acc ++ [file_contents]
# end)
# true ->
# # If the path is neither a file nor a directory, return an error tuple
# {:error, :invalid_path}
# end
# end
# end
# end