diff --git a/application/Dockerfile b/application/Dockerfile index d076bc415..acbd8cb73 100644 --- a/application/Dockerfile +++ b/application/Dockerfile @@ -85,4 +85,4 @@ EXPOSE 7091 USER appuser # Start Gunicorn -CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"] \ No newline at end of file +CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "localhost:7091", "application.wsgi:app"] \ No newline at end of file diff --git a/application/api/user/routes.py b/application/api/user/routes.py index c409e69ab..33e572367 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -420,7 +420,10 @@ def get(self): task_meta = task.info except Exception as err: return make_response(jsonify({"success": False, "error": str(err)}), 400) - + + if isinstance(task_meta, Exception): + task_meta = str(task_meta) + return make_response(jsonify({"status": task.status, "result": task_meta}), 200) diff --git a/application/parser/remote/dropbox_loader.py b/application/parser/remote/dropbox_loader.py new file mode 100644 index 000000000..ef503d6d1 --- /dev/null +++ b/application/parser/remote/dropbox_loader.py @@ -0,0 +1,24 @@ +from application.parser.remote.base import BaseRemote +from langchain_community.document_loaders import DropboxLoader + +class DropboxLoaderRemote(BaseRemote): + def load_data(self, inputs): + data = eval(inputs) + access_token = data.get("access_token") + folder_path = data.get("folder_path", "") + recursive = True + + self.loader = DropboxLoader( + dropbox_access_token=access_token, + dropbox_folder_path=folder_path, + recursive=recursive + ) + + try: + documents = self.loader.load() + print(f"Loaded {len(documents)} documents from Dropbox") + return documents + except Exception as e: + print(f"Error loading documents from Dropbox: {e}") + + diff --git a/application/parser/remote/remote_creator.py b/application/parser/remote/remote_creator.py index 026abd76c..f87b5a9b6 100644 --- a/application/parser/remote/remote_creator.py +++ b/application/parser/remote/remote_creator.py @@ -2,6 +2,7 @@ from application.parser.remote.crawler_loader import CrawlerLoader from application.parser.remote.web_loader import WebLoader from application.parser.remote.reddit_loader import RedditPostsLoaderRemote +from application.parser.remote.dropbox_loader import DropboxLoaderRemote from application.parser.remote.github_loader import GitHubLoader @@ -11,6 +12,7 @@ class RemoteCreator: "sitemap": SitemapLoader, "crawler": CrawlerLoader, "reddit": RedditPostsLoaderRemote, + "dropbox" : DropboxLoaderRemote, "github": GitHubLoader, } diff --git a/frontend/src/upload/Upload.tsx b/frontend/src/upload/Upload.tsx index c09bab533..c0c358eb4 100644 --- a/frontend/src/upload/Upload.tsx +++ b/frontend/src/upload/Upload.tsx @@ -24,6 +24,10 @@ function Upload({ const [docName, setDocName] = useState(''); const [urlName, setUrlName] = useState(''); const [url, setUrl] = useState(''); + const [dropboxData, setDropboxData] = useState({ + access_token: '', + folder_path: '', + }); const [repoUrl, setRepoUrl] = useState(''); // P3f93 const [redditData, setRedditData] = useState({ client_id: '', @@ -49,6 +53,7 @@ function Upload({ // { label: 'Sitemap', value: 'sitemap' }, { label: 'Link', value: 'url' }, { label: 'Reddit', value: 'reddit' }, + { label: 'Dropbox', value: 'dropbox' }, { label: 'GitHub', value: 'github' }, // P3f93 ]; @@ -240,6 +245,14 @@ function Upload({ formData.set('name', 'other'); formData.set('data', JSON.stringify(redditData)); } + if ( + urlType.value === 'dropbox' && + dropboxData.access_token.length > 0 && + dropboxData.folder_path.length > 0 + ) { + formData.set('name', 'other'); + formData.set('data', JSON.stringify(dropboxData)); + } if (urlType.value === 'github') { formData.append('repo_url', repoUrl); // Pdeac } @@ -255,6 +268,7 @@ function Upload({ setProgress({ type: 'TRAINING', percentage: 0, taskId: task_id }); }, 3000); }; + xhr.open('POST', `${apiHost + '/api/remote'}`); xhr.send(formData); }; @@ -275,7 +289,9 @@ function Upload({ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'], 'text/csv': ['.csv'], - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'], + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': [ + '.xlsx', + ], }, }); @@ -288,6 +304,11 @@ function Upload({ ...redditData, [name]: value.split(',').map((item) => item.trim()), }); + } else if (name in dropboxData) { + setDropboxData({ + ...dropboxData, + [name]: value, + }); } else setRedditData({ ...redditData, @@ -382,7 +403,42 @@ function Upload({ size="w-full" rounded="3xl" /> - {urlType.label !== 'Reddit' && urlType.label !== 'GitHub' ? ( + {urlType.label === 'Dropbox' ? ( +