Skip to content

Commit eded75d

Browse files
Add DetectSecrets validator (#397)
* Add DetectSecrets validator * Fix linting issues * Add class docstring, some fixes and check for multi-line string * Add dev requirement for setup.py * Add unit tests * Add integration tests * Add walkthrough notebook * Add strong type str to value * Raise warning instead of error and add '\n' for non-multi-line strings, add try-except blocks while creating and deleting file, and while detecting secrets, add caveats in documentation * Update integration tests * Update warning
1 parent d43a88d commit eded75d

File tree

8 files changed

+626
-10
lines changed

8 files changed

+626
-10
lines changed
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"## Check whether an LLM-generated code response contains secrets\n",
8+
"\n",
9+
"### Using the `DetectSecrets` validator\n",
10+
"\n",
11+
"This is a simple walkthrough of how to use the `DetectSecrets` validator to check whether an LLM-generated code response contains secrets. It utilizes the `detect-secrets` library, which is a Python library that scans code files for secrets. The library is available on GitHub at [this link](https://github.com/Yelp/detect-secrets).\n"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": 1,
17+
"metadata": {},
18+
"outputs": [
19+
{
20+
"name": "stdout",
21+
"output_type": "stream",
22+
"text": [
23+
"\n",
24+
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3\u001b[0m\n",
25+
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
26+
]
27+
}
28+
],
29+
"source": [
30+
"# Install the necessary packages\n",
31+
"! pip install detect-secrets -q"
32+
]
33+
},
34+
{
35+
"cell_type": "code",
36+
"execution_count": 2,
37+
"metadata": {},
38+
"outputs": [],
39+
"source": [
40+
"# Import the guardrails package\n",
41+
"# and the DetectSecrets validator\n",
42+
"import guardrails as gd\n",
43+
"from guardrails.validators import DetectSecrets\n",
44+
"from rich import print"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": 3,
50+
"metadata": {},
51+
"outputs": [],
52+
"source": [
53+
"# Create a Guard object with this validator\n",
54+
"# Here, we'll specify that we want to fix\n",
55+
"# if the validator detects secrets\n",
56+
"\n",
57+
"guard = gd.Guard.from_string(\n",
58+
" validators=[DetectSecrets(on_fail=\"fix\")],\n",
59+
" description=\"testmeout\",\n",
60+
")"
61+
]
62+
},
63+
{
64+
"cell_type": "code",
65+
"execution_count": 4,
66+
"metadata": {},
67+
"outputs": [
68+
{
69+
"data": {
70+
"text/html": [
71+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
72+
"import os\n",
73+
"import openai\n",
74+
"\n",
75+
"SECRET_TOKEN = <span style=\"color: #008000; text-decoration-color: #008000\">\"********\"</span>\n",
76+
"\n",
77+
"ADMIN_CREDENTIALS = <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">\"username\"</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"admin\"</span>, <span style=\"color: #008000; text-decoration-color: #008000\">\"password\"</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"********\"</span><span style=\"font-weight: bold\">}</span>\n",
78+
"\n",
79+
"\n",
80+
"openai.api_key = <span style=\"color: #008000; text-decoration-color: #008000\">\"********\"</span>\n",
81+
"COHERE_API_KEY = <span style=\"color: #008000; text-decoration-color: #008000\">\"********\"</span>\n",
82+
"\n",
83+
"</pre>\n"
84+
],
85+
"text/plain": [
86+
"\n",
87+
"import os\n",
88+
"import openai\n",
89+
"\n",
90+
"SECRET_TOKEN = \u001b[32m\"********\"\u001b[0m\n",
91+
"\n",
92+
"ADMIN_CREDENTIALS = \u001b[1m{\u001b[0m\u001b[32m\"username\"\u001b[0m: \u001b[32m\"admin\"\u001b[0m, \u001b[32m\"password\"\u001b[0m: \u001b[32m\"********\"\u001b[0m\u001b[1m}\u001b[0m\n",
93+
"\n",
94+
"\n",
95+
"openai.api_key = \u001b[32m\"********\"\u001b[0m\n",
96+
"COHERE_API_KEY = \u001b[32m\"********\"\u001b[0m\n",
97+
"\n"
98+
]
99+
},
100+
"metadata": {},
101+
"output_type": "display_data"
102+
}
103+
],
104+
"source": [
105+
"# Let's run the validator on a dummy code snippet\n",
106+
"# that contains few secrets\n",
107+
"code_snippet = \"\"\"\n",
108+
"import os\n",
109+
"import openai\n",
110+
"\n",
111+
"SECRET_TOKEN = \"DUMMY_SECRET_TOKEN_abcdefgh\"\n",
112+
"\n",
113+
"ADMIN_CREDENTIALS = {\"username\": \"admin\", \"password\": \"dummy_admin_password\"}\n",
114+
"\n",
115+
"\n",
116+
"openai.api_key = \"sk-blT3BlbkFJo8bdtYwDLuZT\"\n",
117+
"COHERE_API_KEY = \"qdCUhtsCtnixTRfdrG\"\n",
118+
"\"\"\"\n",
119+
"\n",
120+
"# Parse the code snippet\n",
121+
"output = guard.parse(\n",
122+
" llm_output=code_snippet,\n",
123+
")\n",
124+
"\n",
125+
"# Print the output\n",
126+
"print(output)"
127+
]
128+
},
129+
{
130+
"cell_type": "markdown",
131+
"metadata": {},
132+
"source": [
133+
"As you can see here, our validator detected the secrets within the provided code snippet. The detected secrets were then masked with asterisks.\n"
134+
]
135+
},
136+
{
137+
"cell_type": "code",
138+
"execution_count": 5,
139+
"metadata": {},
140+
"outputs": [
141+
{
142+
"data": {
143+
"text/html": [
144+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
145+
"import os\n",
146+
"import openai\n",
147+
"\n",
148+
"companies = <span style=\"font-weight: bold\">[</span><span style=\"color: #008000; text-decoration-color: #008000\">\"google\"</span>, <span style=\"color: #008000; text-decoration-color: #008000\">\"facebook\"</span>, <span style=\"color: #008000; text-decoration-color: #008000\">\"amazon\"</span>, <span style=\"color: #008000; text-decoration-color: #008000\">\"microsoft\"</span>, <span style=\"color: #008000; text-decoration-color: #008000\">\"apple\"</span><span style=\"font-weight: bold\">]</span>\n",
149+
"for company in companies:\n",
150+
" <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">print</span><span style=\"font-weight: bold\">(</span>company<span style=\"font-weight: bold\">)</span>\n",
151+
"\n",
152+
"</pre>\n"
153+
],
154+
"text/plain": [
155+
"\n",
156+
"import os\n",
157+
"import openai\n",
158+
"\n",
159+
"companies = \u001b[1m[\u001b[0m\u001b[32m\"google\"\u001b[0m, \u001b[32m\"facebook\"\u001b[0m, \u001b[32m\"amazon\"\u001b[0m, \u001b[32m\"microsoft\"\u001b[0m, \u001b[32m\"apple\"\u001b[0m\u001b[1m]\u001b[0m\n",
160+
"for company in companies:\n",
161+
" \u001b[1;35mprint\u001b[0m\u001b[1m(\u001b[0mcompany\u001b[1m)\u001b[0m\n",
162+
"\n"
163+
]
164+
},
165+
"metadata": {},
166+
"output_type": "display_data"
167+
}
168+
],
169+
"source": [
170+
"# Let's run the validator on a dummy code snippet\n",
171+
"# that does not contain any secrets\n",
172+
"code_snippet = \"\"\"\n",
173+
"import os\n",
174+
"import openai\n",
175+
"\n",
176+
"companies = [\"google\", \"facebook\", \"amazon\", \"microsoft\", \"apple\"]\n",
177+
"for company in companies:\n",
178+
" print(company)\n",
179+
"\"\"\"\n",
180+
"\n",
181+
"# Parse the code snippet\n",
182+
"output = guard.parse(\n",
183+
" llm_output=code_snippet,\n",
184+
")\n",
185+
"\n",
186+
"# Print the output\n",
187+
"print(output)"
188+
]
189+
},
190+
{
191+
"cell_type": "markdown",
192+
"metadata": {},
193+
"source": [
194+
"As you can see here, the provided code snippet does not contain any secrets and the validator here also did not have any false positives!\n"
195+
]
196+
},
197+
{
198+
"cell_type": "markdown",
199+
"metadata": {},
200+
"source": [
201+
"#### In this way, you can use the `DetectSecrets` validator to check whether an LLM-generated code response contains secrets. With Guardrails as wrapper, you can be assured that the secrets in the code will be detected and masked and not be exposed.\n"
202+
]
203+
}
204+
],
205+
"metadata": {
206+
"kernelspec": {
207+
"display_name": "guard-venv",
208+
"language": "python",
209+
"name": "python3"
210+
},
211+
"language_info": {
212+
"codemirror_mode": {
213+
"name": "ipython",
214+
"version": 3
215+
},
216+
"file_extension": ".py",
217+
"mimetype": "text/x-python",
218+
"name": "python",
219+
"nbconvert_exporter": "python",
220+
"pygments_lexer": "ipython3",
221+
"version": "3.11.6"
222+
}
223+
},
224+
"nbformat": 4,
225+
"nbformat_minor": 2
226+
}

guardrails/guard.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -400,15 +400,14 @@ def parse(
400400
) -> Union[str, Dict, Awaitable[str], Awaitable[Dict]]:
401401
"""Alternate flow to using Guard where the llm_output is known.
402402
403-
Args:
404-
llm_api: The LLM API to call
405-
(e.g. openai.Completion.create or
406-
openai.Completion.acreate)
407-
num_reasks: The max times to re-ask the LLM for invalid output.
408-
409-
Returns:
410-
The validated response. This is either a string or a dictionary, \
411-
determined by the object schema defined in the RAILspec.
403+
Args:
404+
llm_api: The LLM API to call
405+
(e.g. openai.Completion.create or openai.Completion.acreate)
406+
num_reasks: The max times to re-ask the LLM for invalid output.
407+
408+
Returns:
409+
The validated response. This is either a string or a dictionary,
410+
determined by the object schema defined in the RAILspec.
412411
"""
413412
num_reasks = (
414413
num_reasks if num_reasks is not None else 0 if llm_api is None else None

0 commit comments

Comments
 (0)