diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..9c734d2 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,3 @@ +[run] +omit = + */tests* \ No newline at end of file diff --git a/.gitignore b/.gitignore index e495ff8..3583576 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,107 @@ -anon.sql +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# dump +*.sql diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8fc895f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,13 @@ +language: python + +python: + - "3.5" + - "3.6" + - "3.9" + - "3.10" + +install: "make setup" + +script: + - make test + - make coveralls diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..cdd8bf4 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +.SILENT: +PIP=.venv/bin/pip +PYTEST=.venv/bin/pytest +PYTHON=.venv/bin/python +COVERALLS=.venv/bin/coveralls + + +test:clean + PYTHONPATH=anonymize ${PYTEST} -s -v --cov=anonymize --cov-report term-missing tests/${path} + +venv: + virtualenv .venv --python=python3 + +setup:venv + ${PIP} install -U pip + ${PIP} install -r requirements_dev.txt + +clean: + find . -name "*.pyc" -exec rm -rf {} \; + +sample: + PYTHONPATH=anonymize ${PYTHON} anonymize/__init__.py + +coveralls: + ${COVERALLS} diff --git a/README.md b/README.md index 05661c9..9a18059 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,69 @@ ## Mysql Anonymous +[![Build Status](https://travis-ci.org/riquellopes/mysql-anonymous.svg?branch=master)](https://travis-ci.org/riquellopes/mysql-anonymous) +[![Coverage Status](https://coveralls.io/repos/github/riquellopes/mysql-anonymous/badge.svg?branch=master)](https://coveralls.io/github/riquellopes/mysql-anonymous?branch=master) + Contributors can benefit from having real data when they are -developing. This script can do a few things (see `anonymize.yml`): +developing. This script can do a few things (see `sample1.yml` or `sample2.yml`): * Truncate any tables (logs, and other cruft which may have sensitive data) * Nullify fields (emails, passwords, etc) * Fill in random/arbitrary data: * Random integers * Random IP addresses + * Random Cell Phone + * Random Phone + * Random [CPF](https://pt.wikipedia.org/wiki/Cadastro_de_pessoas_f%C3%ADsicas) + * Random [CNPJ](https://pt.wikipedia.org/wiki/Cadastro_Nacional_da_Pessoa_Jur%C3%ADdica) * Email addresses * Usernames + * Text [Loren Ipsum](https://www.lipsum.com/) + * Delete rows based on simple rules: e.g. ``DELETE FROM mytable WHERE private = "Yes"``: + ```yml database: tables: mytable: - delete: + nullify: private: Yes + ``` + +* Apply rules exception in some cases: e.g. + ``UPDATE mytable SET cellphone=NULL WHERE id NOT IN(556, 889)``: + + ```yml + database: + tables: + mytable: + exception: + - 556 + - 889 + nullify: + - cellphone + ``` + +* Define an other name for primary key of table: e.g. + ``UPDATE mytable SET `email` = CONCAT(user_id, '@example.com')``: -### Usage + ```yml + database: + tables: + primary_key: user_id + mytable: + random_email: email + ``` - python anonymize.py > anon.sql - cat anon.sql | mysql +Installation +------------ +```sh +pip install https://github.com/riquellopes/mysql-anonymous/tarball/master +``` +CookBook +-------- +```sh + anonymize --sample-one + anonymize --sample-two + anonymize -y database.yml +``` diff --git a/anonymize.py b/anonymize.py deleted file mode 100755 index 76743da..0000000 --- a/anonymize.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python -# This assumes an id on each field. -import logging -import hashlib -import random - - -log = logging.getLogger('anonymize') -common_hash_secret = "%016x" % (random.getrandbits(128)) - - -def get_truncates(config): - database = config.get('database', {}) - truncates = database.get('truncate', []) - sql = [] - for truncate in truncates: - sql.append('TRUNCATE `%s`' % truncate) - return sql - - -def get_deletes(config): - database = config.get('database', {}) - tables = database.get('tables', []) - sql = [] - for table, data in tables.iteritems(): - if 'delete' in data: - fields = [] - for f, v in data['delete'].iteritems(): - fields.append('`%s` = "%s"' % (f, v)) - statement = 'DELETE FROM `%s` WHERE ' % table + ' AND '.join(fields) - sql.append(statement) - return sql - -listify = lambda x: x if isinstance(x, list) else [x] - -def get_updates(config): - global common_hash_secret - - database = config.get('database', {}) - tables = database.get('tables', []) - sql = [] - for table, data in tables.iteritems(): - updates = [] - for operation, details in data.iteritems(): - if operation == 'nullify': - for field in listify(details): - updates.append("`%s` = NULL" % field) - elif operation == 'random_int': - for field in listify(details): - updates.append("`%s` = ROUND(RAND()*1000000)" % field) - elif operation == 'random_ip': - for field in listify(details): - updates.append("`%s` = INET_NTOA(RAND()*1000000000)" % field) - elif operation == 'random_email': - for field in listify(details): - updates.append("`%s` = CONCAT(id, '@mozilla.com')" - % field) - elif operation == 'random_username': - for field in listify(details): - updates.append("`%s` = CONCAT('_user_', id)" % field) - elif operation == 'hash_value': - for field in listify(details): - updates.append("`%(field)s` = MD5(CONCAT(@common_hash_secret, `%(field)s`))" - % dict(field=field)) - elif operation == 'hash_email': - for field in listify(details): - updates.append("`%(field)s` = CONCAT(MD5(CONCAT(@common_hash_secret, `%(field)s`)), '@mozilla.com')" - % dict(field=field)) - elif operation == 'delete': - continue - else: - log.warning('Unknown operation.') - if updates: - sql.append('UPDATE `%s` SET %s' % (table, ', '.join(updates))) - return sql - - -def anonymize(config): - database = config.get('database', {}) - - if 'name' in database: - print "USE `%s`;" % database['name'] - - print "SET FOREIGN_KEY_CHECKS=0;" - - sql = [] - sql.extend(get_truncates(config)) - sql.extend(get_deletes(config)) - sql.extend(get_updates(config)) - for stmt in sql: - print stmt + ';' - - print "SET FOREIGN_KEY_CHECKS=1;" - print - -if __name__ == '__main__': - - import yaml - import sys - - if len(sys.argv) > 1: - files = sys.argv[1:] - else: - files = [ 'anonymize.yml' ] - - for f in files: - print "--" - print "-- %s" %f - print "--" - print "SET @common_hash_secret=rand();" - print "" - cfg = yaml.load(open(f)) - if 'databases' not in cfg: - anonymize(cfg) - else: - databases = cfg.get('databases') - for name, sub_cfg in databases.items(): - print "USE `%s`;" % name - anonymize({'database': sub_cfg}) diff --git a/anonymize/__init__.py b/anonymize/__init__.py new file mode 100644 index 0000000..950ad27 --- /dev/null +++ b/anonymize/__init__.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# coding: utf-8 +from __future__ import print_function, absolute_import +import os +import logging +from yaml import Loader, load +from optparse import OptionParser +from .anonymize import AnonymizeSchemes + +logging.basicConfig( + filename='anonymize.log', + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=logging.DEBUG) + + +class Anonymize(object): + + def __init__(self, file_name, sample): + self._file_name = file_name + self._sample = sample + self._run = True + + self.validate() + + def run(self): + if self._run: + with open(self.file_name) as handle: + a = AnonymizeSchemes(load(handle, Loader=Loader)) + a.build() + + @property + def file_name(self): + if self.is_sample(): + return os.path.join( + os.path.dirname(os.path.dirname(__file__)), + 'anonymize', + "sample{}.yml".format(self._sample)) + return self._file_name + + def is_sample(self): + return self._file_name is None and self._sample + + def validate(self): + if self._sample is None and self._file_name is None: + self._run = False + print('\033[91m' + "Invalid option.") + + +def main(): + parser = OptionParser() + parser.add_option('-y', '--yaml', help="YAML file to read data from.") + parser.add_option('--sample-one', const=1, action="store_const") + parser.add_option('--sample-two', const=2, action="store_const") + + (options, __) = parser.parse_args() + + anonymize = Anonymize(**{ + "file_name": options.yaml, + "sample": options.sample_one or options.sample_two + }) + anonymize.run() + + +if __name__ == "__main__": + main() diff --git a/anonymize/anonymize.py b/anonymize/anonymize.py new file mode 100755 index 0000000..b7dfc07 --- /dev/null +++ b/anonymize/anonymize.py @@ -0,0 +1,122 @@ +from __future__ import print_function +import itertools +import random +from field import AnonymizeField +from collections import OrderedDict + + +common_hash_secret = "%016x" % (random.getrandbits(128)) + + +class AnonymizeBaseAction(list): + + def __init__(self, scheme): + self._scheme = scheme + self.create() + + +class AnonymizeTruncate(AnonymizeBaseAction): + + def create(self): + for truncate in self._scheme.database.get("truncate", []): + self.append('TRUNCATE `{}`'.format(truncate)) + + +class AnonymizeDelete(AnonymizeBaseAction): + + def create(self): + for table, data in self._scheme.tables.items(): + if 'delete' in data: + self.append('DELETE FROM `{}` WHERE '.format(table) + ' AND '.join( + ['`{}` = "{}"'.format(f, v) for f, v in data['delete'].items()] + )) + + +class AnonymizeUpdate(AnonymizeBaseAction): + + def create(self): + global common_hash_secret + + for table, data in self._scheme.tables.items(): + updates = [] + primary_key, exception = data.pop('primary_key', "id"), data.pop('exception', []) + data = OrderedDict(sorted(data.items(), key=lambda t: t[0])) + + anon = AnonymizeField(data, primary_key) + + for n in anon.build(): + updates.append(n.render()) + + if updates: + self.append( + 'UPDATE `{}` SET {}{}'.format( + table, + ', '.join(updates), + self._sql_exception(primary_key, exception))) + + def _sql_exception(self, primary_key, exception): + where = "" + if exception: + where = " WHERE {primary_key} NOT IN({ids})".format( + primary_key=primary_key, ids=", ".join(map(str, exception))) + return where + + +class AnonymizeScheme(object): + + def __init__(self, name, cfg): + self._name = name + self._cfg = cfg + + def create(self): + if self._print_use(): + print("USE `{}`".format(self.name)) + print("SET FOREIGN_KEY_CHECKS=0;") + + for action in self._actions(): + print("{};".format(action)) + print("SET FOREIGN_KEY_CHECKS=1;") + print() + + @property + def database(self): + return self._cfg + + @property + def tables(self): + return self.database.get("tables", {}) + + def _print_use(self): + return "name" in self.database + + @property + def name(self): + return self.database['name'] or self._name + + def _actions(self): + return itertools.chain( + AnonymizeTruncate(self), AnonymizeDelete(self), AnonymizeUpdate(self)) + + +class AnonymizeSchemes(object): + + def __init__(self, cfg): + self._cfg = cfg + self._print_use = False + + def build(self): + print("--") + print("SET @common_hash_secret=rand();") + print("") + + for name, cfg in self._databases().items(): + if self._print_use: + print("USE `{}`;".format(name)) + a = AnonymizeScheme(name, cfg) + a.create() + + def _databases(self): + if "databases" in self._cfg: + self._print_use = True + return self._cfg.get("databases") + return {"default": self._cfg.get("database")} diff --git a/anonymize/field.py b/anonymize/field.py new file mode 100644 index 0000000..b03c33a --- /dev/null +++ b/anonymize/field.py @@ -0,0 +1,106 @@ +# coding: utf-8 +import logging +logger = logging.getLogger('anonymize') + + +class Field(object): + + def __init__(self, field, primary_key="id"): + self._field = field + self._primary_key = primary_key + + def render(self): + return self.sql_field.format( + field=self._field, primary_key=self._primary_key) + + +class Nullify(Field): + sql_field = "`{field}` = NULL" + + +class RandomInt(Field): + sql_field = "`{field}` = ROUND(RAND()*1000000)" + + +class RandomIp(Field): + sql_field = "`{field}` = INET_NTOA(RAND()*1000000000)" + + +class RandomEmail(Field): + sql_field = "`{field}` = CONCAT({primary_key}, '@example.com')" + + +class RandomUsername(Field): + sql_field = "`{field}` = CONCAT('_user_', {primary_key})" + + +class RandomCellPhone(Field): + sql_field = "`{field}` = LPAD({primary_key}, 13, 5)" + + +class RandomPhone(Field): + sql_field = "`{field}` = LPAD({primary_key}, 12, 5)" + + +class RandomCpf(Field): + sql_field = "`{field}` = LPAD({primary_key}, 11, 5)" + + +class RandomCnpj(Field): + sql_field = "`{field}` = LPAD({primary_key}, 14, 5)" + + +class HashValue(Field): + sql_field = "`{field}` = MD5(CONCAT(@common_hash_secret, `{field}`))" + + +class HashEmail(Field): + sql_field = "`{field}` = CONCAT(MD5(CONCAT(@common_hash_secret, `{field}`)), '@example.com')" + + +class LoremIpsum(Field): + sql_field = """`{field}` = `Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum`""" + + +class AnonymizeField(object): + + def __init__(self, data, primary_key): + self._data = data + self._primary_key = primary_key + + self._fields = { + "nullify": Nullify, + "random_int": RandomInt, + "random_ip": RandomIp, + "random_email": RandomEmail, + "random_username": RandomUsername, + "random_cell_phone": RandomCellPhone, + "random_phone": RandomPhone, + "random_cpf": RandomCpf, + "random_cnpj": RandomCnpj, + "hash_value": HashValue, + "hash_email": HashEmail, + 'text_lorem_ipsum': LoremIpsum + } + + def build(self): + for operation, details in self._data.items(): + if self._valid_operation(operation): + for field in self._listify(details): + yield self.get_field(operation, field) + else: + logger.warning("Unknown {} operation.".format(operation)) + + def _valid_operation(self, operation): + return operation in self._fields + + def _delete_operation(self, operation): + return operation == "delete" + + def _listify(self, values): + if isinstance(values, list): + return values + return [values, ] + + def get_field(self, operation, field): + return self._fields[operation](field, self._primary_key) diff --git a/anonymize.yml b/anonymize/sample1.yml similarity index 97% rename from anonymize.yml rename to anonymize/sample1.yml index e078d17..36c8f4b 100644 --- a/anonymize.yml +++ b/anonymize/sample1.yml @@ -52,6 +52,9 @@ database: - versioncomments tables: addons: + exception: + - 556 + - 889 nullify: [nominationmessage, paypal_id, charity_id] random_int: - average_daily_downloads diff --git a/developer_mozilla_org.yml b/anonymize/sample2.yml similarity index 100% rename from developer_mozilla_org.yml rename to anonymize/sample2.yml diff --git a/requirements.txt b/requirements.txt index c3726e8..bee6c14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -pyyaml +PyYAML==6.0 diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..e67a04a --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,8 @@ +-r requirements.txt + +pytest==6.2.5 +pytest-mock==3.7.0 +pytest-cov==3.0.0 +ipdb==0.13.9 +coverage==6.3.1 +python-coveralls==2.9.3 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..fd65e38 --- /dev/null +++ b/setup.py @@ -0,0 +1,45 @@ +""" + Contributors can benefit from having real data when they are developing. This script can do a few things (see anonymize.yml): + + Truncate any tables (logs, and other cruft which may have sensitive data) + + Nullify fields (emails, passwords, etc) + + Fill in random/arbitrary data: + + Random integers + Random IP addresses + Email addresses + Usernames + Delete rows based on simple rules: e.g. DELETE FROM mytable WHERE private = "Yes": + + database: tables: mytable: delete: private: Yes +""" + +from setuptools import setup + +setup_params = { + "entry_points": { + "console_scripts": [ + "anonymize=anonymize:main" + ] + } +} + + +setup( + author="Dave Dash", + author_email="dd+github@davedash.com, contato@henriquelopes.com.br", + version='0.2', + name="Mysql Anonymous", + url="https://github.com/davedash/mysql-anonymous", + packages=["anonymize"], + platforms=['python >= 2.7'], + description=__doc__, + long_description=__doc__, + install_requires=["PyYAML==4.2b2"], + py_modules=["anonymize"], + package_data={'': ['sample1.yml', 'sample2.yml']}, + include_package_data=True, + **setup_params +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..ba7e1fb --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,15 @@ +import sys +import pytest +try: + from StringIO import StringIO +except: + from io import StringIO + + +@pytest.fixture +def stdout(): + old_stdout = sys.stdout + + result = StringIO() + sys.stdout = result + return old_stdout, result diff --git a/tests/test_anonymize.py b/tests/test_anonymize.py new file mode 100644 index 0000000..ad00704 --- /dev/null +++ b/tests/test_anonymize.py @@ -0,0 +1,19 @@ +import os +import sys +import pytest +from anonymize import Anonymize + + +@pytest.fixture +def sample(): + BASE_DIR = os.path.dirname(os.path.dirname(__file__)) + return os.path.join(BASE_DIR, 'anonymize', "sample1.yml") + + +def test_should_get_trucate_at_output(sample, stdout): + a = Anonymize(file_name=sample, sample="") + a.run() + + sys.stdout, result = stdout + assert "TRUNCATE `stats_collections_counts`;" in result.getvalue() + assert " WHERE id NOT IN(556, 889)" in result.getvalue() diff --git a/tests/test_anonymize_scheme.py b/tests/test_anonymize_scheme.py new file mode 100644 index 0000000..f632691 --- /dev/null +++ b/tests/test_anonymize_scheme.py @@ -0,0 +1,40 @@ +import sys +from anonymize.anonymize import AnonymizeScheme, AnonymizeSchemes + + +def test_property_print_use_should_get_true(): + s = AnonymizeScheme("anything", { + "name": {} + }) + + assert s._print_use() + + +def test_property_print_use_should_get_false(): + s = AnonymizeScheme("anything", { + "names": {} + }) + + assert s._print_use() is False + + +def test_should_start_with_the_correct_header(stdout): + s = AnonymizeScheme("anything", { + "name": {} + }) + + s.create() + sys.stdout, result = stdout + + assert "USE `anything`\nSET FOREIGN_KEY_CHECKS=0;\nSET FOREIGN_KEY_CHECKS=1;\n\n" in result.getvalue() + + +def test_shoulg_get_true_and_empty_dict(): + s = AnonymizeSchemes({ + "databases": {} + }) + + databases = s._databases() + + assert s._print_use + assert databases == {} diff --git a/tests/test_field_methods.py b/tests/test_field_methods.py new file mode 100644 index 0000000..6d0d6bf --- /dev/null +++ b/tests/test_field_methods.py @@ -0,0 +1,62 @@ +import pytest +from anonymize.field import ( + AnonymizeField, Nullify, RandomCellPhone, RandomPhone, RandomCpf, RandomCnpj, LoremIpsum) + + +@pytest.fixture +def anon(): + return AnonymizeField({}, "id") + + +def test_should_get_a_list_item(anon): + assert anon._listify("") == [""] + + +def test_should_get_a_list_none(anon): + assert anon._listify(None) == [None, ] + + +def test_should_get_true(anon): + assert anon._valid_operation("nullify") + + +def test_should_get_false(anon): + assert anon._valid_operation("nullifys") is False + + +def test_should_get_true_for_delete_operations(anon): + assert anon._delete_operation("delete") + + +def test_should_get_Nullify_instance(anon): + assert isinstance(anon.get_field("nullify", "id"), Nullify) + + +def test_should_get_RandomCellPhone_instance(anon): + cell_phone = anon.get_field("random_cell_phone", "id") + assert isinstance(cell_phone, RandomCellPhone) + assert cell_phone.render() == "`id` = LPAD(id, 13, 5)" + + +def test_should_get_RandomPhone_instance(anon): + phone = anon.get_field("random_phone", "id") + assert isinstance(phone, RandomPhone) + assert phone.render() == "`id` = LPAD(id, 12, 5)" + + +def test_should_get_RandomCpf_instance(anon): + cpf = anon.get_field("random_cpf", "id") + assert isinstance(cpf, RandomCpf) + assert cpf.render() == "`id` = LPAD(id, 11, 5)" + + +def test_should_get_RandomCnpj_instance(anon): + cnpj = anon.get_field("random_cnpj", "cnpj") + assert isinstance(cnpj, RandomCnpj) + assert cnpj.render() == "`cnpj` = LPAD(id, 14, 5)" + + +def test_should_get_LoremIpsum_instance(anon): + lipsum = anon.get_field("text_lorem_ipsum", "text") + assert isinstance(lipsum, LoremIpsum) + assert "Lorem Ipsum" in lipsum.render() diff --git a/tests/test_get_deletes.py b/tests/test_get_deletes.py new file mode 100644 index 0000000..fc0df2a --- /dev/null +++ b/tests/test_get_deletes.py @@ -0,0 +1,19 @@ +from anonymize.anonymize import AnonymizeDelete, AnonymizeScheme + + +def test_should_get_the_empty_list(): + truncates = AnonymizeDelete(AnonymizeScheme("default", {})) + + assert truncates == [] + + +def test_should_get_the_list_of_delete_itens(): + deleted = AnonymizeDelete(AnonymizeScheme("default", { + "tables": { + "user": { + "delete": {"id": 1} + } + } + })) + + assert deleted == ['DELETE FROM `user` WHERE `id` = "1"'] diff --git a/tests/test_get_updates.py b/tests/test_get_updates.py new file mode 100644 index 0000000..f07f43e --- /dev/null +++ b/tests/test_get_updates.py @@ -0,0 +1,44 @@ +from anonymize.anonymize import AnonymizeUpdate, AnonymizeScheme + + +def test_should_get_the_update_list(): + data = AnonymizeUpdate(AnonymizeScheme("default", { + "tables": { + "user": { + "nullify": ["phone", ], + "random_email": ["email", ], + "random_ip": ['ip', ] + } + } + })) + + r = ["UPDATE `user` SET `phone` = NULL, `email` = CONCAT(id, '@example.com'), `ip` = INET_NTOA(RAND()*1000000000)"] + assert data == r + + +def test_should_get_the_update_list_with_cnpj(): + data = AnonymizeUpdate(AnonymizeScheme("default", { + "tables": { + "user": { + "nullify": ["phone", ], + "random_cnpj": ["cnpj", ] + } + } + })) + + r = ['UPDATE `user` SET `phone` = NULL, `cnpj` = LPAD(id, 14, 5)'] + assert data == r + + +def test_should_get_the_update_list_with_lipsum(): + data = AnonymizeUpdate(AnonymizeScheme("default", { + "tables": { + "user": { + "text_lorem_ipsum": ["text", ], + } + } + })) + + r = ["UPDATE `user` SET `text` = `Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum`"] + + assert data == r diff --git a/tests/test_truncates.py b/tests/test_truncates.py new file mode 100644 index 0000000..c4f3739 --- /dev/null +++ b/tests/test_truncates.py @@ -0,0 +1,18 @@ +from anonymize.anonymize import AnonymizeTruncate, AnonymizeScheme + + +def test_should_get_the_empty_list(): + truncates = AnonymizeTruncate(AnonymizeScheme("default", {})) + + assert truncates == [] + + +def test_should_get_the_list_of_truncate_tables(): + truncates = AnonymizeTruncate(AnonymizeScheme("default", { + "truncate": [ + "user", + "subscribers" + ] + })) + + assert truncates == ['TRUNCATE `user`', 'TRUNCATE `subscribers`']