diff --git a/app/api/api_v1/routers/dataset.py b/app/api/api_v1/routers/dataset.py index 486eea3..70e5899 100644 --- a/app/api/api_v1/routers/dataset.py +++ b/app/api/api_v1/routers/dataset.py @@ -16,14 +16,7 @@ from fastapi.templating import Jinja2Templates from app.core.config import Settings - -# from app.models.date_strftime_pattern import DateStrftimePattern from app.models.enums import ExpectationResultFormat, ExpectationResultType - -# from app.models.expect_column_values_to_be_in_set import ColumnValuesToBeInSet -# from app.models.general import GeneralTableExpectation -# from app.models.regex_list_pattern import RegexMatchList -# from app.models.regex_pattern import RegexPatternExpectation from app.utils.dataset import ( datasets_expectation, datasets_expectation_from_url, diff --git a/app/api/api_v1/routers/dictionary.py b/app/api/api_v1/routers/dictionary.py index c76030d..e1789b6 100644 --- a/app/api/api_v1/routers/dictionary.py +++ b/app/api/api_v1/routers/dictionary.py @@ -1,4 +1,7 @@ +import io + import pandas as pd +import requests from fastapi import APIRouter, HTTPException, status from fastapi.encoders import jsonable_encoder from fastapi.responses import JSONResponse @@ -11,14 +14,40 @@ dictionary_router = router = APIRouter() +# reading sheet name from env +google_spread_sheet_sheet_name = settings.GOOGLE_SPREAD_SHEET_SHEET_NAME +google_sheet_id = settings.GOOGLE_SHEET_ID + +g_sheet_session = requests.Session() +common_g_sheet_link_format = "https://docs.google.com/spreadsheets/d/" +g_sheet_id = f"{google_sheet_id}" +download_sheet_name = ( + f"/gviz/tq?tqx=out:csv&sheet={google_spread_sheet_sheet_name}" +) +url_name = common_g_sheet_link_format + g_sheet_id + download_sheet_name +g_sheet_response = g_sheet_session.get(url_name) +g_sheet_bytes_data = g_sheet_response.content +data = pd.read_csv(io.StringIO(g_sheet_bytes_data.decode("utf-8"))) + +standard_data_values = data.copy() +standard_data_values.rename( + columns={ + "country_standard_name": "country", + "unique_standard_airline_name": "airline", + "standard_disease_name": "diseases", + "psu_companies": "psu", + "standard_district_name": "district", + "standard_states": "state", + "insurance_standard_names": "insurance_companies", + }, + inplace=True, +) + @router.get("/", summary="Get all Saved Entities csv file name") async def get_entity_names(): # List down all the csv files present in the config folder - return [ - csv_file.name.replace(".csv", "") - for csv_file in CORE_FOLDER.glob("**/*.csv") - ] + return data.columns.tolist() @router.get( @@ -27,9 +56,9 @@ async def get_entity_names(): response_class=JSONResponse, ) async def get_entity_data(entity: str): - entity_df = pd.read_csv(CORE_FOLDER / f"{entity}.csv") + entity_df = data[[entity]].dropna() # to avoid json conversion error - entity_df = entity_df.fillna("") + # entity_df = entity_df.fillna("") # convert to json json_compatible_item_data = jsonable_encoder( diff --git a/app/core/config.py b/app/core/config.py index 07d8dee..b8532f6 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -63,6 +63,12 @@ class Settings(BaseSettings): # Metadata File Parameters METADATA_COLUMN_ORDER_STRING = "" + # Google spread-sheet sheet name + GOOGLE_SPREAD_SHEET_SHEET_NAME: str = "" + + # Google sheet id + GOOGLE_SHEET_ID: str = "" + class Config: env_file = ".env" @@ -502,26 +508,6 @@ class MetadataSettings(BaseSettings): ], } - # SHORT_FORM_EXPECTATION = { - # "data_asset_type": None, - # "expectation_suite_name": "short_form_expectation_suite", - # "expectations": [ - # { - # "expectation_type": "expect_column_values_to_be_in_set", - # "kwargs": { - # "column": "short_form", - # "value_set": [], - # "result_format": "SUMMARY", - # }, - # "meta": { - # "expectation_name": "Short Form in set of values", - # "cleaning_pdf_link": "https://wp.me/ad1WQ9-dvg", - # "expectation_error_message": "Short Form should be from the Data Dictionary", - # }, - # } - # ], - # } - FREQUENCY_OF_UPDATE_EXPECTATION = { "data_asset_type": None, "expectation_suite_name": "frequency_of_update_expectation_suite", @@ -595,25 +581,6 @@ class MetadataSettings(BaseSettings): TIME_SAVED_IN_HOURS_MSG: str = ( "Null values should not present in these columns" ) - # TIME_SAVED_IN_HOURS_EXPECTATION = { - # "data_asset_type": None, - # "expectation_suite_name": "time_saved_in_hours_expectation_suite", - # "expectations": [ - # { - # "expectation_type": "expect_column_values_to_be_in_set", - # "kwargs": { - # "column": "time_saved_in_hours", - # "value_set": [], - # "result_format": "SUMMARY", - # }, - # "meta": { - # "expectation_name": "Time Saved In Hours", - # "cleaning_pdf_link": "https://wp.me/ad1WQ9-dvg", - # "expectation_error_message": "Time Saved in Hours should be from the range of 2 to 6 hours", - # }, - # } - # ], - # } class TagsSettings(BaseSettings): diff --git a/app/models/date_strftime_pattern.py b/app/models/date_strftime_pattern.py index b1d6932..4c2839c 100644 --- a/app/models/date_strftime_pattern.py +++ b/app/models/date_strftime_pattern.py @@ -4,69 +4,6 @@ from pydantic import BaseModel -# class _Kwargs(BaseModel): -# column: str -# strftime_format: str -# result_format: str - -# class Config: -# underscore_attrs_are_private = True - -# class ExpectationConfig(BaseModel): -# _expectation_type: str -# _kwargs: _Kwargs -# _raw_kwargs: Any -# meta: Dict[str, Any] -# success_on_last_run: Any -# _ge_cloud_id: Any -# _expectation_context: Any - -# class Config: -# underscore_attrs_are_private = True - -# class PartialUnexpectedCount(BaseModel): -# value: str -# count: int - -# class Config: -# underscore_attrs_are_private = True - - -# class Result(BaseModel): -# element_count: Optional[int] -# missing_count: Optional[int] -# missing_percent: Optional[int] -# unexpected_count: Optional[int] -# unexpected_percent: Optional[int] -# unexpected_percent_total: Optional[int] -# unexpected_percent_nonmissing: Optional[int] -# partial_unexpected_list: Optional[List[str]] -# partial_unexpected_index_list: Optional[List[int]] -# partial_unexpected_counts: Optional[List[PartialUnexpectedCount]] -# unexpected_list: Optional[List[str]] -# unexpected_index_list: Optional[List[int]] - - -# class ExceptionInfo(BaseModel): -# raised_exception: Optional[bool] -# exception_message: Optional[Any] -# exception_traceback: Optional[Any] - -# class Config: -# underscore_attrs_are_private = True - - -# class DateStrftimePattern(BaseModel): -# success: bool -# expectation_config: Optional[ExpectationConfig] -# result: Optional[Result] -# _meta: Optional[Dict[str, Any]] -# exception_info: Optional[ExceptionInfo] - -# class Config: -# underscore_attrs_are_private = True -# # response_model_exclude_unset = True - class _Kwargs(BaseModel): column: str diff --git a/app/models/expect_column_values_to_be_in_set.py b/app/models/expect_column_values_to_be_in_set.py index 183e251..c2801e4 100644 --- a/app/models/expect_column_values_to_be_in_set.py +++ b/app/models/expect_column_values_to_be_in_set.py @@ -4,42 +4,6 @@ from pydantic import BaseModel -# class PartialUnexpectedCount(BaseModel): -# value: str -# count: int - - -# class Result(BaseModel): -# element_count: Optional[int] -# missing_count: Optional[int] -# missing_percent: Optional[int] -# unexpected_count: Optional[int] -# unexpected_percent: Optional[int] -# unexpected_percent_total: Optional[int] -# unexpected_percent_nonmissing: Optional[int] -# partial_unexpected_list: Optional[List[str]] -# partial_unexpected_index_list: Optional[List[int]] -# partial_unexpected_counts: Optional[List[PartialUnexpectedCount]] -# unexpected_list: Optional[List[str]] -# unexpected_index_list: Optional[List[int]] - - -# class ExceptionInfo(BaseModel): -# raised_exception: bool -# exception_message: Optional[Any] -# exception_traceback: Optional[Any] - - -# class ColumnValuesToBeInSet(BaseModel): -# success: bool -# _expectation_config: Optional[Any] -# result: Optional[Result] -# meta: Optional[Dict[str, Any]] -# _exception_info: Optional[ExceptionInfo] - -# class Config: -# underscore_attrs_are_private = True - class _Kwargs(BaseModel): column: str diff --git a/app/models/regex_list_pattern.py b/app/models/regex_list_pattern.py index 950c20d..8941fee 100644 --- a/app/models/regex_list_pattern.py +++ b/app/models/regex_list_pattern.py @@ -4,59 +4,6 @@ from pydantic import BaseModel -# class _Kwargs(BaseModel): -# column: str -# regex_list: List[str] -# match_on: str -# result_format: str - - -# class ExpectationConfig(BaseModel): -# _expectation_type: str -# _kwargs: _Kwargs -# _raw_kwargs: Any -# meta: Dict[str, Any] -# success_on_last_run: Any -# _ge_cloud_id: Any -# _expectation_context: Any - - -# class PartialUnexpectedCount(BaseModel): -# value: str -# count: int - - -# class Result(BaseModel): -# element_count: Optional[int] -# missing_count: Optional[int] -# missing_percent: Optional[int] -# unexpected_count: Optional[int] -# unexpected_percent: Optional[int] -# unexpected_percent_total: Optional[int] -# unexpected_percent_nonmissing: Optional[int] -# partial_unexpected_list: Optional[List[str]] -# partial_unexpected_index_list: Optional[List[int]] -# partial_unexpected_counts: Optional[List[PartialUnexpectedCount]] -# unexpected_list: Optional[List[str]] -# unexpected_index_list: Optional[List[int]] - - -# class ExceptionInfo(BaseModel): -# raised_exception: Optional[bool] -# exception_message: Optional[Any] -# exception_traceback: Optional[Any] - - -# class RegexMatchList(BaseModel): -# success: Optional[bool] -# expectation_config: Optional[ExpectationConfig] -# result: Optional[Result] -# meta: Optional[Dict[str, Any]] -# exception_info: Optional[ExceptionInfo] - -# class Config: -# underscore_attrs_are_private = True - class _Kwargs(BaseModel): column: str diff --git a/app/models/regex_pattern.py b/app/models/regex_pattern.py index 949ae5c..466ec00 100644 --- a/app/models/regex_pattern.py +++ b/app/models/regex_pattern.py @@ -4,58 +4,6 @@ from pydantic import BaseModel -# class _Kwargs(BaseModel): -# column: str -# regex: str -# result_format: str - - -# class ExpectationConfig(BaseModel): -# _expectation_type: str -# _kwargs: _Kwargs -# _raw_kwargs: Any -# meta: Dict[str, Any] -# success_on_last_run: Any -# _ge_cloud_id: Any -# _expectation_context: Any - - -# class PartialUnexpectedCount(BaseModel): -# value: str -# count: int - - -# class Result(BaseModel): -# element_count: Optional[int] -# missing_count: Optional[int] -# missing_percent: Optional[int] -# unexpected_count: Optional[int] -# unexpected_percent: Optional[int] -# unexpected_percent_total: Optional[int] -# unexpected_percent_nonmissing: Optional[int] -# partial_unexpected_list: Optional[List[str]] -# partial_unexpected_index_list: Optional[List[int]] -# partial_unexpected_counts: Optional[List[PartialUnexpectedCount]] -# unexpected_list: Optional[List[str]] -# unexpected_index_list: Optional[List[int]] - - -# class ExceptionInfo(BaseModel): -# raised_exception: Optional[bool] -# exception_message: Optional[Any] -# exception_traceback: Optional[Any] - - -# class RegexPatternExpectation(BaseModel): -# success: bool -# expectation_config: Optional[ExpectationConfig] -# result: Optional[Result] -# meta: Optional[Dict[str, Any]] -# exception_info: Optional[ExceptionInfo] - -# class Config: -# underscore_attrs_are_private = True - class _Kwargs(BaseModel): column: str diff --git a/app/utils/airline.py b/app/utils/airline.py index 6578e3f..a3c2c3c 100644 --- a/app/utils/airline.py +++ b/app/utils/airline.py @@ -1,9 +1,10 @@ import great_expectations as ge from fastapi.encoders import jsonable_encoder -from app.core.config import APP_DIR, AirlineSettings, Settings +from app.api.api_v1.routers.dictionary import standard_data_values +from app.core.config import AirlineSettings, Settings from app.utils.column_mapping import find_airline_name_columns -from app.utils.common import modify_values_to_be_in_set, read_pandas_dataset +from app.utils.common import modify_values_to_be_in_set settings = Settings() airline_settings = AirlineSettings() @@ -14,10 +15,8 @@ async def modify_airline_name_expectation_suite( ): default_expectation_suite = airline_settings.AIRLINE_NAME_EXPECTATION - airline_names_dataset = await read_pandas_dataset( - APP_DIR / "core" / "airline_names.csv" - ) - airline_names_list = airline_names_dataset["airline_names"].tolist() + airline_names_dataset = standard_data_values[["airline"]].dropna().copy() + airline_names_list = airline_names_dataset["airline"].tolist() changed_config = { "expect_column_values_to_be_in_set": { diff --git a/app/utils/column_mapping.py b/app/utils/column_mapping.py index a619ae4..28f36bf 100644 --- a/app/utils/column_mapping.py +++ b/app/utils/column_mapping.py @@ -174,9 +174,7 @@ async def find_metadata_columns(columns: set): organization_pattern = re.compile( r".*({}).*".format(metadata_settings.ORGANIZATION_KEYWORD) ) - # short_form_pattern = re.compile( - # r".*({}).*".format(metadata_settings.SHORT_FORM_KEYWORD) - # ) + description_pattern = re.compile( r".*({}).*".format(metadata_settings.DESCRIPTION_KEYWORD) ) @@ -221,9 +219,7 @@ async def find_metadata_columns(columns: set): organization_column, columns = extract_pattern_from_columns( columns, organization_pattern ) - # short_form_column, columns = extract_pattern_from_columns( - # columns, short_form_pattern - # ) + description_column, columns = extract_pattern_from_columns( columns, description_pattern ) @@ -265,7 +261,6 @@ async def find_metadata_columns(columns: set): return { "sector": list(sector_column), "organization": list(organization_column), - # "short_form": list(short_form_column), "description": list(description_column), "tags": list(tags_column), "temporal_coverage": list(temporal_coverage_column), @@ -275,7 +270,6 @@ async def find_metadata_columns(columns: set): "file_path": list(file_path_column), "frequency_of_update": list(frequency_of_update_column), "source_link": list(source_link_column), - # "archive": list(archive_column), "spacial_coverage": list(spacial_coverage_column), "variable_measured": list(variable_measured_column), "data_next_update": list(data_next_update_column), @@ -309,5 +303,4 @@ async def find_mapped_columns(columns): list(chain.from_iterable(mapped_columns.values())) ) ) - print({**mapped_columns, "unmapped": not_mapped_columns}) return {**mapped_columns, "unmapped": not_mapped_columns} diff --git a/app/utils/common.py b/app/utils/common.py index 58d3523..2c958ae 100644 --- a/app/utils/common.py +++ b/app/utils/common.py @@ -8,7 +8,7 @@ from charset_normalizer import from_bytes from fastapi.logger import logger -from app.core.config import APP_DIR, GeographySettings +from app.core.config import GeographySettings logging.basicConfig(level=logging.INFO) geographic_settings = GeographySettings() @@ -79,14 +79,6 @@ async def read_pandas_dataset(source: str, **kwargs): return dataset -async def load_values_to_be_in_set(domain: str): - # this function is used to load csv files, consisting values - # for states or country that are required to be in specific set - set_values_file = APP_DIR / "core" / f"{domain}.csv" - set_values = pd.read_csv(set_values_file)[f"{domain}"].unique() - return set_values - - async def modify_column_names_to_expectation_suite( expectation_suite: dict, expectation_config: dict ): diff --git a/app/utils/general.py b/app/utils/general.py index da5a956..abbbd7d 100644 --- a/app/utils/general.py +++ b/app/utils/general.py @@ -157,9 +157,6 @@ async def null_not_in_columns(dataset, result_format, column, column_type): catch_exceptions=True, result_format=result_format, ) - # expectation = ge_pandas_dataset.expect_column_values_to_not_be_null( - # column=column, result_format=result_format, catch_exceptions=True - # ) expectation_dict = expectation.to_json_dict() expectation_dict["expectation_config"]["meta"] = { @@ -422,7 +419,6 @@ async def general_table_expectation_suite(dataset, result_format): multispaces_between_text_expectation_suite(dataset, result_format), bracket_values_expectation_suite(dataset, result_format), special_character_expectation_suite(dataset, result_format), - # null_not_in_columns(dataset, result_format, "price"), *[ null_not_in_columns(dataset, result_format, col, "numeric") for col in numeric_columns diff --git a/app/utils/geography.py b/app/utils/geography.py index cceeacf..8722d01 100644 --- a/app/utils/geography.py +++ b/app/utils/geography.py @@ -4,13 +4,10 @@ import great_expectations as ge from fastapi.encoders import jsonable_encoder -from app.core.config import APP_DIR, GeographySettings, Settings +from app.api.api_v1.routers.dictionary import standard_data_values +from app.core.config import GeographySettings, Settings from app.utils.column_mapping import find_geography_columns -from app.utils.common import ( - modify_values_to_be_in_set, - read_dataset, - read_pandas_dataset, -) +from app.utils.common import modify_values_to_be_in_set, read_dataset settings = Settings() geograhy_setting = GeographySettings() @@ -19,7 +16,7 @@ async def modify_city_expectation_suite(column_name: str, result_format: str): default_expectation_suite = geograhy_setting.STATE_EXPECTATION - city_dataset = await read_pandas_dataset(APP_DIR / "core" / "district.csv") + city_dataset = standard_data_values[["district"]].dropna().copy() city_list = city_dataset["districts"].tolist() changed_config = { @@ -65,7 +62,7 @@ async def city_expectation_suite(dataset, result_format): async def modify_state_expectation_suite(column_name: str, result_format: str): default_expectation_suite = geograhy_setting.STATE_EXPECTATION - state_dataset = await read_pandas_dataset(APP_DIR / "core" / "state.csv") + state_dataset = standard_data_values[["state"]].dropna().copy() state_list = state_dataset["state"].tolist() changed_config = { @@ -112,9 +109,7 @@ async def modify_country_expectation_suite( ): default_expectation_suite = geograhy_setting.COUNTRY_EXPECTATION - country_dataset = await read_pandas_dataset( - APP_DIR / "core" / "country.csv" - ) + country_dataset = standard_data_values[["country"]].dropna().copy() country_list = country_dataset["country"].tolist() changed_config = { diff --git a/app/utils/insurance.py b/app/utils/insurance.py index c78dad9..c87fb03 100644 --- a/app/utils/insurance.py +++ b/app/utils/insurance.py @@ -1,9 +1,10 @@ import great_expectations as ge from fastapi.encoders import jsonable_encoder -from app.core.config import APP_DIR, InsuranceCompanySettings, Settings +from app.api.api_v1.routers.dictionary import standard_data_values +from app.core.config import InsuranceCompanySettings, Settings from app.utils.column_mapping import find_insurance_company_columns -from app.utils.common import modify_values_to_be_in_set, read_pandas_dataset +from app.utils.common import modify_values_to_be_in_set settings = Settings() insurance_company_settings = InsuranceCompanySettings() @@ -16,9 +17,9 @@ async def modify_insurance_company_name_expectation_suite( insurance_company_settings.INSURANCE_COMPANY_NAME_EXPECTATION ) - insurance_company_names_dataset = await read_pandas_dataset( - APP_DIR / "core" / "insurance_companies.csv" - ) + insurance_company_names_dataset = standard_data_values[ + ["insurance_companies"] + ] insurance_company_names_list = insurance_company_names_dataset[ "insurance_companies" ].tolist() diff --git a/app/utils/metadata.py b/app/utils/metadata.py index 07ba034..eeea191 100644 --- a/app/utils/metadata.py +++ b/app/utils/metadata.py @@ -4,14 +4,14 @@ import great_expectations as ge from fastapi.encoders import jsonable_encoder -from app.core.config import APP_DIR, MetadataSettings, Settings +from app.api.api_v1.routers.dictionary import standard_data_values +from app.core.config import MetadataSettings, Settings from app.utils.column_mapping import find_metadata_columns -from app.utils.common import ( # modify_column_order_expectation_suite, +from app.utils.common import ( modify_values_length_to_be_between, modify_values_to_be_in_set, modify_values_to_match_regex_list, read_dataset, - read_pandas_dataset, ) from app.utils.general import general_metadata_expectation_suite from app.utils.tags import tags_expectation_suite @@ -162,8 +162,8 @@ async def modify_sector_expectation_suite( default_expectation_suite = meta_data_setting.SECTOR_EXPECTATION - sector_dataset = await read_pandas_dataset(APP_DIR / "core" / "sector.csv") - sector_list = sector_dataset["sector"].tolist() + sector_dataset = standard_data_values[["sectors"]].dropna().copy() + sector_list = sector_dataset["sectors"].tolist() changed_config = { "expect_column_values_to_be_in_set": { @@ -222,10 +222,10 @@ async def modify_organization_expectation_suite( ): default_expectation_suite = meta_data_setting.ORGANIZATION_EXPECTATION - organization_dataset = await read_pandas_dataset( - APP_DIR / "core" / "organization.csv" + organization_dataset = ( + standard_data_values[["organisation"]].dropna().copy() ) - organization_list = organization_dataset["organization"].tolist() + organization_list = organization_dataset["organisation"].tolist() changed_config = { "expect_column_values_to_be_in_set": { @@ -275,7 +275,6 @@ async def organization_expectation_suite(dataset, result_format): + validation["results"][0]["expectation_config"]["_kwargs"]["column"] ) results[validation_ui_name] = validation - # print(jsonable_encoder(results)) return jsonable_encoder(results) @@ -284,10 +283,9 @@ async def modify_short_form_expectation_suite( ): default_expectation_suite = meta_data_setting.SHORT_FORM_EXPECTATION - short_form_dataset = await read_pandas_dataset( - APP_DIR / "core" / "short_form.csv" - ) - short_form_list = short_form_dataset["short_form"].tolist() + # NOTE: Modify the short_form_expectation_suite to use short_form + short_form_dataset = {"short_form": ""} + short_form_list = short_form_dataset["short_form"] changed_config = { "expect_column_values_to_be_in_set": { @@ -302,43 +300,6 @@ async def modify_short_form_expectation_suite( return changed_expectation_suite -# async def short_form_expectation_suite(dataset, result_format): -# """Expectation to check if Short Form values are in short_form.csv - -# Expectation is on whether every value present in short form column of metadata -# csv is in short_form.csv file or not - -# Args: -# dataset (Dataframe): Read metadata csv using Pandas Dataframe -# result_format (str): SUMMARY - -# Returns: -# Dict: Dictionary of Expectations -# """ -# results = {} -# mapped_columns = await find_metadata_columns(set(dataset.columns)) -# short_form_column = mapped_columns["short_form"][0] - -# expectation_suite = await modify_short_form_expectation_suite( -# short_form_column, result_format -# ) -# # convert pandas dataset to great_expectations dataset -# ge_pandas_dataset = ge.from_pandas( -# dataset, expectation_suite=expectation_suite -# ) -# validation = ge_pandas_dataset.validate() -# validation_ui_name = ( -# validation["results"][0]["expectation_config"]["meta"][ -# "expectation_name" -# ] -# + " - " -# + validation["results"][0]["expectation_config"]["_kwargs"]["column"] -# ) -# results[validation_ui_name] = validation - -# return jsonable_encoder(results) - - async def modify_frequency_of_update_expectation_suite( column_name: str, result_format: str ): @@ -346,8 +307,8 @@ async def modify_frequency_of_update_expectation_suite( meta_data_setting.FREQUENCY_OF_UPDATE_EXPECTATION ) - frequency_of_update_dataset = await read_pandas_dataset( - APP_DIR / "core" / "frequency_of_update.csv" + frequency_of_update_dataset = ( + standard_data_values[["frequency_of_update"]].dropna().copy() ) frequency_of_update_list = frequency_of_update_dataset[ "frequency_of_update" @@ -571,7 +532,6 @@ async def metadata_expectation_suite( """ if isinstance(dataset, str): dataset = await read_dataset(dataset) - # print(dir(dataset)) # Dataset modification for sector expectation suite dataset_sector = dataset.copy() # explode the dataset based on sector column @@ -585,7 +545,6 @@ async def metadata_expectation_suite( check_column_order(dataset), sector_expectation_suite(dataset_sector, result_format), organization_expectation_suite(dataset, result_format), - # short_form_expectation_suite(dataset, result_format), description_expectation_suite(dataset, result_format), dataset_name_for_factly_expectation_suite(dataset, result_format), unit_expectation_suite(dataset, result_format), diff --git a/app/utils/minio_transfer.py b/app/utils/minio_transfer.py index 34849a4..d4a7633 100644 --- a/app/utils/minio_transfer.py +++ b/app/utils/minio_transfer.py @@ -103,8 +103,3 @@ async def get_files_inside_folder(folder_name: str): raise Exception(f"Could not get files inside folder: {e}") else: return file_keys - - -# async def save_expectation_to_minio_folder(expectation, s3_folder: str): - -# pass diff --git a/app/utils/tags.py b/app/utils/tags.py index 62cf155..47e39f8 100644 --- a/app/utils/tags.py +++ b/app/utils/tags.py @@ -4,8 +4,6 @@ from fastapi.encoders import jsonable_encoder from app.core.config import TagsSettings - -# from app.utils.column_mapping import find_tags_columns from app.utils.column_mapping import find_metadata_columns from app.utils.common import modify_values_to_match_regex_list diff --git a/docker-compose.yaml b/docker-compose.yaml index 41bf14c..7d83715 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -18,6 +18,7 @@ services: " volumes: - .:/app + - /Users/paul/factly/factly-datasets/projects/assembly_elections/data/processed/assembly-elections/statistical-reports:/data env_file: - .env networks: