-
Notifications
You must be signed in to change notification settings - Fork 147
feat: Sensitive Data Detection in files like (.csv , .xlsx , json) #761
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
2e77117
c449a9c
8b257a2
027a459
df82548
fd26523
d2314ac
ddc4e98
d9de766
f6798c5
868c074
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Psingle20 marked this conversation as resolved.
Show resolved
Hide resolved
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,3 @@ | ||
| #!/usr/bin/env sh | ||
| . "$(dirname -- "$0")/_/husky.sh" | ||
|
|
||
Psingle20 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| npx --no -- commitlint --edit ${1} && npm run lint | ||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,136 @@ | ||||||||
| const fs = require('fs'); | ||||||||
| const csv = require('csv-parser'); | ||||||||
| const XLSX = require('xlsx'); | ||||||||
| const path = require('path'); | ||||||||
| // const { exec: getDiffExec } = require('./getDiff'); | ||||||||
| // Function to check for sensitive data patterns | ||||||||
| const checkForSensitiveData = (cell) => { | ||||||||
| const sensitivePatterns = [ | ||||||||
| /\d{3}-\d{2}-\d{4}/, // Social Security Number (SSN) | ||||||||
| /\b\d{16}\b/, // Credit card numbers | ||||||||
| /\b\d{5}-\d{4}\b/, // ZIP+4 codes | ||||||||
| // Add more patterns as needed | ||||||||
| ]; | ||||||||
|
Comment on lines
+12
to
+17
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The intent behind this change is good, though it must be noted these will produce a large number of false positives. Ideally this wouldn't block (only warn), or would have an easy way to exclude false positives.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great point @rgmz ! I will think about this
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. Not to mention, this does not cover all geographies. I'm inclined to merge it as it is not configured by default. A more holistic approach with better heuristics is worth investing in for the GitProxy project granted but this is a good enough start. |
||||||||
| return sensitivePatterns.some(pattern => { | ||||||||
| if (pattern.test(String(cell))) { | ||||||||
| console.log(`\x1b[31mDetected sensitive data: ${cell}\x1b[0m`); // Log the detected sensitive data in red | ||||||||
| return true; | ||||||||
| } | ||||||||
| return false; | ||||||||
| }); | ||||||||
| }; | ||||||||
| // Function to process CSV files | ||||||||
| const processCSV = async (filePath) => { | ||||||||
| return new Promise((resolve, reject) => { | ||||||||
| let sensitiveDataFound = false; | ||||||||
| fs.createReadStream(filePath) | ||||||||
| .pipe(csv()) | ||||||||
| .on('data', (row) => { | ||||||||
| for (const [key, value] of Object.entries(row)) { | ||||||||
| if (checkForSensitiveData(value)) { | ||||||||
| console.log(`\x1b[33mSensitive data found in CSV: ${key}: ${value}\x1b[0m`); // Log in yellow | ||||||||
| sensitiveDataFound = true; | ||||||||
| } | ||||||||
| } | ||||||||
| }) | ||||||||
| .on('end', () => { | ||||||||
| if (!sensitiveDataFound) { | ||||||||
| console.log('No sensitive data found in CSV.'); | ||||||||
| } | ||||||||
| resolve(sensitiveDataFound); // Resolve with the flag indicating if sensitive data was found | ||||||||
| }) | ||||||||
| .on('error', (err) => { | ||||||||
| console.error(`Error reading CSV file: ${err.message}`); | ||||||||
| reject(err); // Reject the promise on error | ||||||||
| }); | ||||||||
| }); | ||||||||
| }; | ||||||||
| // Function to process XLSX files | ||||||||
| const processXLSX = async (filePath) => { | ||||||||
| return new Promise((resolve, reject) => { | ||||||||
| let sensitiveDataFound = false; | ||||||||
| try { | ||||||||
| const workbook = XLSX.readFile(filePath); | ||||||||
| const sheetName = workbook.SheetNames[0]; | ||||||||
| const sheet = workbook.Sheets[sheetName]; | ||||||||
| const jsonData = XLSX.utils.sheet_to_json(sheet); | ||||||||
| jsonData.forEach((row) => { | ||||||||
| for (const [key, value] of Object.entries(row)) { | ||||||||
| if (checkForSensitiveData(value)) { | ||||||||
| console.log(`\x1b[33mSensitive data found in XLSX: ${key}: ${value}\x1b[0m`); // Log in yellow | ||||||||
| sensitiveDataFound = true; | ||||||||
| } | ||||||||
| } | ||||||||
| }); | ||||||||
| if (!sensitiveDataFound) { | ||||||||
| console.log('No sensitive data found in XLSX.'); | ||||||||
| } | ||||||||
| resolve(sensitiveDataFound); // Resolve with the flag indicating if sensitive data was found | ||||||||
| } catch (error) { | ||||||||
| console.error(`Error reading XLSX file: ${error.message}`); | ||||||||
| reject(error); // Reject the promise on error | ||||||||
| } | ||||||||
| }); | ||||||||
| }; | ||||||||
| // Function to check for sensitive data in .log and .json files | ||||||||
| const checkLogJsonFiles = async (filePath) => { | ||||||||
| return new Promise((resolve, reject) => { | ||||||||
| let sensitiveDataFound = false; | ||||||||
| fs.readFile(filePath, 'utf8', (err, data) => { | ||||||||
| if (err) { | ||||||||
| console.error(`Error reading file ${filePath}: ${err.message}`); | ||||||||
| return reject(err); | ||||||||
| } | ||||||||
| if (checkForSensitiveData(data)) { | ||||||||
| console.log(`\x1b[33mSensitive data found in ${filePath}\x1b[0m`); | ||||||||
| sensitiveDataFound = true; | ||||||||
| } | ||||||||
| resolve(sensitiveDataFound); | ||||||||
| }); | ||||||||
| }); | ||||||||
| }; | ||||||||
| // Function to parse the file based on its extension | ||||||||
| const parseFile = async (filePath) => { | ||||||||
| const ext = path.extname(filePath).toLowerCase(); | ||||||||
|
|
||||||||
| switch (ext) { | ||||||||
| case '.csv': | ||||||||
| return await processCSV(filePath); | ||||||||
| case '.xlsx': | ||||||||
| return await processXLSX(filePath); | ||||||||
| case '.log': | ||||||||
| return await checkLogJsonFiles(filePath); | ||||||||
| case '.json': | ||||||||
| return await checkLogJsonFiles(filePath); | ||||||||
| default: | ||||||||
| // Skip unsupported file types without logging | ||||||||
| return false; // Indicate that no sensitive data was found for unsupported types | ||||||||
| } | ||||||||
| }; | ||||||||
| // Async exec function to handle actions | ||||||||
| const exec = async (req, action) => { | ||||||||
| // getDiffExec(req, action); // Call to getDiffExec if necessary | ||||||||
| const diffStep = action.steps.find((s) => s.stepName === 'diff'); | ||||||||
| if (diffStep && diffStep.content) { | ||||||||
| console.log('Diff content:', diffStep.content); | ||||||||
| const filePaths = diffStep.content.filePaths || []; | ||||||||
| if (filePaths.length > 0) { | ||||||||
| // Check for sensitive data in all files | ||||||||
| const sensitiveDataFound = await Promise.all(filePaths.map(parseFile)); | ||||||||
Psingle20 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
| const anySensitiveDataDetected = sensitiveDataFound.some(found => found); // Check if any file reported sensitive data | ||||||||
| if (anySensitiveDataDetected) { | ||||||||
| action.pushBlocked = true; // Block the push | ||||||||
| action.error = true; // Set error flag | ||||||||
| action.errorMessage = 'Your push has been blocked due to sensitive data detection.'; // Set error message | ||||||||
| console.log(action.errorMessage); | ||||||||
| } | ||||||||
| } else { | ||||||||
| console.log('No file paths provided in the diff step.'); | ||||||||
| } | ||||||||
| } else { | ||||||||
| console.log('No diff content available.'); | ||||||||
| } | ||||||||
| return action; // Returning action for testing purposes | ||||||||
| }; | ||||||||
| exec.displayName = 'logFileChanges.exec'; | ||||||||
| exports.exec = exec; | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -1,6 +1,7 @@ | ||||
| const child = require('child_process'); | ||||
| const Step = require('../../actions').Step; | ||||
|
|
||||
|
|
||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mistaken change?
Suggested change
|
||||
| const exec = async (req, action) => { | ||||
| const step = new Step('diff'); | ||||
|
|
||||
|
|
||||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -11,3 +11,4 @@ exports.checkCommitMessages = require('./checkCommitMessages').exec; | |||||||
| exports.checkAuthorEmails = require('./checkAuthorEmails').exec; | ||||||||
| exports.checkUserPushPermission = require('./checkUserPushPermission').exec; | ||||||||
| exports.clearBareClone = require('./clearBareClone').exec; | ||||||||
| exports.checkSensitiveData = require('./checkSensitiveData').exec; | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add missing newline at the end of the file.
Suggested change
|
||||||||
Uh oh!
There was an error while loading. Please reload this page.