|
| 1 | +from pathlib import Path |
| 2 | +from typing import List |
| 3 | + |
1 | 4 | import llm |
| 5 | +from llm import Toolbox |
| 6 | +from readonly_fs_tools import ( |
| 7 | + FileWindow, |
| 8 | + Globber, |
| 9 | + Grepper, |
| 10 | + OutputBudget, |
| 11 | + Sandbox, |
| 12 | + Viewer, |
| 13 | +) |
| 14 | + |
| 15 | + |
| 16 | +class ReadonlyFsTools(Toolbox): |
| 17 | + """Tools for reading and searching files in a sandboxed filesystem. |
| 18 | +
|
| 19 | + These tools provide the ability to: |
| 20 | + - List files and directories using advanced glob patterns (`glob`) |
| 21 | + - Search for text or code with regular expressions across file sets (`grep`) |
| 22 | + - View and read file contents in controlled, windowed slices (`view`) |
| 23 | +
|
| 24 | + Best practices: |
| 25 | + - Use `glob` to discover files before searching or viewing to understand broad context. |
| 26 | + - Use `grep` to narrow down specific content or patterns of interest. |
| 27 | + - Use `view` to read additional context in manageable chunks. |
| 28 | + """ |
| 29 | + |
| 30 | + def __init__( |
| 31 | + self, |
| 32 | + *, |
| 33 | + sandbox_dir: str = str(Path.cwd()), |
| 34 | + blocked_files: List[str] = [], |
| 35 | + allow_hidden: bool = False, |
| 36 | + output_limit: int = 10000, |
| 37 | + ) -> None: |
| 38 | + super().__init__() |
| 39 | + sandbox = Sandbox( |
| 40 | + sandbox_dir=Path(sandbox_dir), |
| 41 | + blocked_files=[Path(fname) for fname in blocked_files], |
| 42 | + allow_hidden=allow_hidden, |
| 43 | + ) |
| 44 | + self._output_limit = output_limit |
| 45 | + self._globber = Globber.from_sandbox(sandbox) |
| 46 | + self._grepper = Grepper.from_sandbox(sandbox) |
| 47 | + self._viewer = Viewer.from_sandbox(sandbox) |
| 48 | + |
| 49 | + def glob(self, glob_patterns: List[str]) -> str: |
| 50 | + """Find files matching glob patterns within the sandbox directory. |
| 51 | +
|
| 52 | + Use this tool to discover files in a codebase or directory structure. Supports |
| 53 | + standard glob patterns including wildcards and recursive search. |
| 54 | +
|
| 55 | + Args: |
| 56 | + patterns: List of glob patterns to match files against. Common patterns: |
| 57 | + - "*.py" - All Python files in current directory |
| 58 | + - "**/*.py" - All Python files recursively in all subdirectories |
| 59 | + - "src/**/*.js" - All JavaScript files in src/ directory tree |
| 60 | + - "test_*.py" - Python files starting with "test_" |
| 61 | + - "**/README.md" - All README.md files in any subdirectory |
| 62 | +
|
| 63 | + Returns: |
| 64 | + String representation of `GlobOutput` containing: |
| 65 | + - `paths`: List of matched file paths relative to the sandbox directory. |
| 66 | + - `truncated`: Boolean indicating if the output was truncated due to size limits. |
| 67 | +
|
| 68 | + Examples: |
| 69 | + Find all Python files in the sandbox root: |
| 70 | + ``` |
| 71 | + toolbox.glob(["*.py"]) |
| 72 | + # Returns: |
| 73 | + # GlobOutput(paths=[Path('main.py'), Path('utils.py'), Path('config.py')], truncated=False) |
| 74 | + ``` |
| 75 | +
|
| 76 | + Find configuration files recursively: |
| 77 | + ``` |
| 78 | + toolbox.glob(["**/*.json", "**/*.yaml", "**/*.toml"]) |
| 79 | + # Returns: |
| 80 | + # GlobOutput(paths=[Path('config/settings.json'), Path('docker-compose.yaml'), Path('pyproject.toml')], truncated=False) |
| 81 | + ``` |
| 82 | +
|
| 83 | + Find test files: |
| 84 | + ``` |
| 85 | + toolbox.glob(["test_*.py", "**/test_*.py", "**/tests/*.py"]) |
| 86 | + ``` |
| 87 | +
|
| 88 | + Note: |
| 89 | + - Patterns are relative to the sandbox directory |
| 90 | + - Hidden files (starting with '.') are excluded unless allow_hidden=True |
| 91 | + - Blocked files are automatically filtered out |
| 92 | + - Results are deduplicated if multiple patterns match the same file |
| 93 | + """ |
| 94 | + return self._globber.glob(glob_patterns, OutputBudget(limit=self._output_limit)) |
| 95 | + |
| 96 | + def grep(self, search_regex: str, glob_patterns: List[str]) -> str: |
| 97 | + """Search for regex patterns within files matching glob patterns. |
| 98 | +
|
| 99 | + Use this tool to find specific content, code patterns, or text within files. |
| 100 | + Combines file discovery (via glob patterns) with content searching (via regex). |
| 101 | +
|
| 102 | + Args: |
| 103 | + search_regex: Regular expression pattern to search for. Examples: |
| 104 | + - "def \w+" - Function definitions (Python) |
| 105 | + - "class \w+" - Class definitions |
| 106 | + - "import \w+" - Import statements |
| 107 | + - "TODO|FIXME" - Code comments with tasks |
| 108 | + - "@\w+\(" - Decorators (Python) |
| 109 | + - "function \w+\(" - JavaScript functions |
| 110 | + - "\\b\w+Error\\b" - Error class names |
| 111 | + - "https?://[^\s]+" - URLs |
| 112 | + - "\d{4}-\d{2}-\d{2}" - Dates in YYYY-MM-DD format |
| 113 | +
|
| 114 | + file_patterns: List of glob patterns to limit search scope. Same format |
| 115 | + as glob() method patterns. Examples: |
| 116 | + - ["*.py"] - Search only Python files in the sandbox root |
| 117 | + - ["**/*.js", "**/*.ts"] - Search JavaScript and TypeScript files |
| 118 | + - ["src/**/*"] - Search all files in src directory |
| 119 | + - ["app/main.py", "app/utils.py"] - Search specific files |
| 120 | +
|
| 121 | + Returns: |
| 122 | + String representation of `GrepOutput` containing: |
| 123 | + - `matches`: List of matching lines as FileContent objects. |
| 124 | + - `truncated`: Boolean indicating if the output was truncated due to size limits. |
| 125 | +
|
| 126 | + Examples: |
| 127 | + Find function definitions in Python files: |
| 128 | + ``` |
| 129 | + toolbox.grep("def \w+", ["**/*.py"]) |
| 130 | + # Returns: |
| 131 | + # === src/main.py === |
| 132 | + # def process_data(): |
| 133 | + # |
| 134 | + # def validate_input(): |
| 135 | + # |
| 136 | + # === utils/helpers.py === |
| 137 | + # def format_output(): |
| 138 | + # GrepOutput( |
| 139 | + # matches=[ |
| 140 | + # FileContent( |
| 141 | + # path=Path('src/main.py'), |
| 142 | + # contents='def process_data():\n', |
| 143 | + # FileWindow(line_offset=23, line_count=1), |
| 144 | + # ), |
| 145 | + # FileContent( |
| 146 | + # path=Path('src/main.py'), |
| 147 | + # contents='def validate_input():\n', |
| 148 | + # FileWindow(line_offset=45, line_count=1), |
| 149 | + # ), |
| 150 | + # FileContent( |
| 151 | + # path=Path('utils/helpers.py'), |
| 152 | + # contents='def format_output():\n', |
| 153 | + # FileWindow(line_offset=12, line_count=1), |
| 154 | + # ), |
| 155 | + # ], |
| 156 | + # truncated=False, |
| 157 | + # ) |
| 158 | + ``` |
| 159 | +
|
| 160 | + Find TODO comments across codebase: |
| 161 | + ``` |
| 162 | + toolbox.grep("TODO|FIXME|XXX", ["**/*.py", "**/*.js", "**/*.md"]) |
| 163 | + # Returns: |
| 164 | + # GrepOutput( |
| 165 | + # matches=[ |
| 166 | + # FileContent( |
| 167 | + # path=Path('src/parser.py'), |
| 168 | + # contents='# TODO: Implement error handling\n', |
| 169 | + # FileWindow(line_offset=10, line_count=1), |
| 170 | + # ), |
| 171 | + # FileContent( |
| 172 | + # path=Path('docs/README.md'), |
| 173 | + # contents='<!-- FIXME: Update installation instructions -->\n', |
| 174 | + # FileWindow(line_offset=5, line_count=1), |
| 175 | + # ), |
| 176 | + # ], |
| 177 | + # truncated=False, |
| 178 | + # ) |
| 179 | + ``` |
| 180 | +
|
| 181 | + Find import statements: |
| 182 | + ``` |
| 183 | + toolbox.grep("^from .* import|^import .*", ["**/*.py"]) |
| 184 | + ``` |
| 185 | +
|
| 186 | + Find configuration values: |
| 187 | + ``` |
| 188 | + toolbox.grep('"[A-Z_]+"\s*:', ["**/*.json"]) |
| 189 | + ``` |
| 190 | +
|
| 191 | + Note: |
| 192 | + - Regex patterns use line-by-line matching (not multiline) |
| 193 | + - Case-sensitive by default (use (?i) prefix for case-insensitive) |
| 194 | + - Each matching line is returned as a separate result |
| 195 | + - Content is searched in UTF-8 encoding with error tolerance |
| 196 | + - Binary files are handled gracefully (may produce garbled text) |
| 197 | + """ |
| 198 | + return self._grepper.grep( |
| 199 | + search_regex, glob_patterns, OutputBudget(limit=self._output_limit) |
| 200 | + ) |
| 201 | + |
| 202 | + def view( |
| 203 | + self, |
| 204 | + file_path: str, |
| 205 | + line_offset: int = 0, |
| 206 | + line_count: int = 100, |
| 207 | + ) -> str: |
| 208 | + """Read and view contents of a specific file within a defined window. |
| 209 | +
|
| 210 | + Use this tool to examine file contents, understand code structure, or read |
| 211 | + documentation. Supports windowed reading to focus on specific sections. |
| 212 | +
|
| 213 | + Args: |
| 214 | + file_path: Path to the file to read, relative to sandbox directory. |
| 215 | + Examples: "main.py", "src/utils.py", "docs/README.md" |
| 216 | +
|
| 217 | + line_offset: Starting line number (0-based). Use to skip to specific |
| 218 | + sections of large files. Defaults to 0 (start of file). |
| 219 | +
|
| 220 | + line_count: Number of lines to read from the starting offset. |
| 221 | + Defaults to 100 lines. Adjust based on file size and needs. |
| 222 | +
|
| 223 | + Returns: |
| 224 | + String representation of `ViewOutput` containing: |
| 225 | + - `view`: FileContent object containing the path, contents, and line window read. |
| 226 | + - `truncated`: Boolean indicating if the output was truncated due to size limits. |
| 227 | +
|
| 228 | + Examples: |
| 229 | + Read beginning of a file: |
| 230 | + ``` |
| 231 | + toolbox.view("main.py") |
| 232 | + # Returns: |
| 233 | + # ViewOutput( |
| 234 | + # view=FileContent( |
| 235 | + # path=Path('main.py'), |
| 236 | + # contents='#!/usr/bin/env python3\n\"\"\"Main application entry point.\"\""\n\nimport sys\nfrom pathlib import Path\n...', |
| 237 | + # FileWindow(line_offset=0, line_count=50), |
| 238 | + # ), |
| 239 | + # truncated=False, |
| 240 | + # ) |
| 241 | + ``` |
| 242 | +
|
| 243 | + Read specific section of a large file: |
| 244 | + ``` |
| 245 | + toolbox.view("src/parser.py", line_offset=100, line_count=20) |
| 246 | + # Returns: |
| 247 | + # === src/parser.py (lines 101-120) === |
| 248 | + # def parse_expression(self, tokens): |
| 249 | + # \"\"\"Parse mathematical expression.\"\"\" |
| 250 | + # ... |
| 251 | + # ViewOutput( |
| 252 | + # view=FileContent( |
| 253 | + # path=Path('src/parser.py'), |
| 254 | + # contents='def parse_expression(self, tokens):\n \"\"\"Parse mathematical expression.\"\"\"\n...', |
| 255 | + # FileWindow(line_offset=100, line_count=20), |
| 256 | + # ), |
| 257 | + # truncated=False, |
| 258 | + # ) |
| 259 | + ``` |
| 260 | +
|
| 261 | + Read configuration file: |
| 262 | + ``` |
| 263 | + toolbox.view("pyproject.toml", line_count=30) |
| 264 | + ``` |
| 265 | +
|
| 266 | + Read end of a log file (if you know approximate size): |
| 267 | + ``` |
| 268 | + toolbox.view("logs/app.log", line_offset=1000, line_count=50) |
| 269 | + ``` |
| 270 | +
|
| 271 | + Read just a few lines to check file format: |
| 272 | + ``` |
| 273 | + toolbox.view("data.csv", line_count=5) |
| 274 | + # Returns: |
| 275 | + # ViewOutput( |
| 276 | + # view=FileContent( |
| 277 | + # path=Path('data.csv'), |
| 278 | + # contents='name,age,city\nJohn,25,NYC\nJane,30,LA\nBob,35,Chicago\nAlice,28,Boston\n', |
| 279 | + # FileWindow(line_offset=0, line_count=5), |
| 280 | + # ), |
| 281 | + # truncated=False, |
| 282 | + # ) |
| 283 | + ``` |
2 | 284 |
|
| 285 | + Note: |
| 286 | + - File paths are relative to the sandbox directory |
| 287 | + - Line numbers in output are 0-indexed (i.e., number of lines offset from top) |
| 288 | + - Files are read with UTF-8 encoding and error tolerance |
| 289 | + - Binary files may produce garbled output but won't crash |
| 290 | + - Empty files return empty content (not an error) |
| 291 | + - If line_offset is beyond file end, returns empty content |
| 292 | + - Large files are handled efficiently (streaming, not full load) |
3 | 293 |
|
4 | | -def view(input: str) -> str: |
5 | | - return f"hello {input}" |
| 294 | + Best Practices: |
| 295 | + - Use smaller line_count for initial exploration |
| 296 | + - Use line_offset to jump to known sections (after using grep) |
| 297 | + - Check file size first with glob if dealing with very large files |
| 298 | + - For code review, start with line_count=20-50 to get context |
| 299 | + """ |
| 300 | + return self._viewer.view( |
| 301 | + Path(file_path), |
| 302 | + FileWindow(line_offset=line_offset, line_count=line_count), |
| 303 | + OutputBudget(limit=self._output_limit), |
| 304 | + ) |
6 | 305 |
|
7 | 306 |
|
8 | 307 | @llm.hookimpl |
9 | 308 | def register_tools(register) -> None: |
10 | | - register(view) |
| 309 | + register(ReadonlyFsTools) |
0 commit comments