+[{"layout":null,"template":null,"templateConfig":null,"name":"OML4Py Data Cleaning Duplicates Removal","description":null,"readOnly":false,"type":"low","paragraphs":[{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":null,"title":null,"message":["%md"," "],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","# OML4Py Data Cleaning: Duplicates Removal","In this notebook, we demonstrate how to remove duplicate records using OML4Py.","","We use the customer insurance lifetime value data set which contains customer financial information, lifetime value, and whether or not the customer bought insurance. ","","The dataset `CUSTOMER_INSURANCE_LTV_PY` is generated by the `\"OML Run-me-first\"` notebook, which `MUST` be run before this notebook.","","---","","###### IMPORTANT: The `\"OML Run-me-first\"` notebook is available under the menu `Templates -> Examples` and is a pre-requisite to the current notebook.","","---","","Copyright (c) 2024 Oracle Corporation ","###### <a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a>","---"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"For more information ...","message":["%md","","* <a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADW Documentation<\/a>","* <a href=\"https://github.com/oracle/oracle-db-examples/tree/master/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a>","* <a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a>","* <a href=\"https://docs.oracle.com/en/database/oracle/machine-learning/oml4py/1/mlpug/clean-data.html\" target=\"_blank\">OML4Py Data Cleaning<\/a>","","","---"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Import python libraries ","message":["%python","","import warnings","warnings.filterwarnings('ignore')","","import pandas as pd","import oml"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Get proxy object for CUSTOMER_INSURANCE_LTV_PY table","message":["%python","","CUST_DF = oml.sync(table = 'CUSTOMER_INSURANCE_LTV_PY')"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Count number of unique customer IDs","message":["%python","","CUST_DF['CUSTOMER_ID'].nunique()"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":5,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Show table dimensions - note there are more rows than customer IDs","message":["%python","","CUST_DF.shape"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":7,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Remove duplicate rows","message":["%python","","CUST_DF = CUST_DF.drop_duplicates()"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Check number of unique customer IDs","message":["%python","","CUST_DF['CUSTOMER_ID'].nunique()"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":5,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Check dimension of the OML dataframe - duplicated rows removed","message":["%python","","CUST_DF.shape"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":7,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","## End of Script"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"}],"version":"6","snapshot":false,"tags":null}]
0 commit comments