Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions app/api/api_v1/routers/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,7 @@
from fastapi.templating import Jinja2Templates

from app.core.config import Settings

# from app.models.date_strftime_pattern import DateStrftimePattern
from app.models.enums import ExpectationResultFormat, ExpectationResultType

# from app.models.expect_column_values_to_be_in_set import ColumnValuesToBeInSet
# from app.models.general import GeneralTableExpectation
# from app.models.regex_list_pattern import RegexMatchList
# from app.models.regex_pattern import RegexPatternExpectation
from app.utils.dataset import (
datasets_expectation,
datasets_expectation_from_url,
Expand Down
41 changes: 35 additions & 6 deletions app/api/api_v1/routers/dictionary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import io

import pandas as pd
import requests
from fastapi import APIRouter, HTTPException, status
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
Expand All @@ -11,14 +14,40 @@

dictionary_router = router = APIRouter()

# reading sheet name from env
google_spread_sheet_sheet_name = settings.GOOGLE_SPREAD_SHEET_SHEET_NAME
google_sheet_id = settings.GOOGLE_SHEET_ID

g_sheet_session = requests.Session()
common_g_sheet_link_format = "https://docs.google.com/spreadsheets/d/"
g_sheet_id = f"{google_sheet_id}"
download_sheet_name = (
f"/gviz/tq?tqx=out:csv&sheet={google_spread_sheet_sheet_name}"
)
url_name = common_g_sheet_link_format + g_sheet_id + download_sheet_name
g_sheet_response = g_sheet_session.get(url_name)
g_sheet_bytes_data = g_sheet_response.content
data = pd.read_csv(io.StringIO(g_sheet_bytes_data.decode("utf-8")))

standard_data_values = data.copy()
standard_data_values.rename(
columns={
"country_standard_name": "country",
"unique_standard_airline_name": "airline",
"standard_disease_name": "diseases",
"psu_companies": "psu",
"standard_district_name": "district",
"standard_states": "state",
"insurance_standard_names": "insurance_companies",
},
inplace=True,
)


@router.get("/", summary="Get all Saved Entities csv file name")
async def get_entity_names():
# List down all the csv files present in the config folder
return [
csv_file.name.replace(".csv", "")
for csv_file in CORE_FOLDER.glob("**/*.csv")
]
return data.columns.tolist()


@router.get(
Expand All @@ -27,9 +56,9 @@ async def get_entity_names():
response_class=JSONResponse,
)
async def get_entity_data(entity: str):
entity_df = pd.read_csv(CORE_FOLDER / f"{entity}.csv")
entity_df = data[[entity]].dropna()
# to avoid json conversion error
entity_df = entity_df.fillna("")
# entity_df = entity_df.fillna("")

# convert to json
json_compatible_item_data = jsonable_encoder(
Expand Down
45 changes: 6 additions & 39 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ class Settings(BaseSettings):
# Metadata File Parameters
METADATA_COLUMN_ORDER_STRING = ""

# Google spread-sheet sheet name
GOOGLE_SPREAD_SHEET_SHEET_NAME: str = ""

# Google sheet id
GOOGLE_SHEET_ID: str = ""

class Config:
env_file = ".env"

Expand Down Expand Up @@ -502,26 +508,6 @@ class MetadataSettings(BaseSettings):
],
}

# SHORT_FORM_EXPECTATION = {
# "data_asset_type": None,
# "expectation_suite_name": "short_form_expectation_suite",
# "expectations": [
# {
# "expectation_type": "expect_column_values_to_be_in_set",
# "kwargs": {
# "column": "short_form",
# "value_set": [],
# "result_format": "SUMMARY",
# },
# "meta": {
# "expectation_name": "Short Form in set of values",
# "cleaning_pdf_link": "https://wp.me/ad1WQ9-dvg",
# "expectation_error_message": "Short Form should be from the Data Dictionary",
# },
# }
# ],
# }

FREQUENCY_OF_UPDATE_EXPECTATION = {
"data_asset_type": None,
"expectation_suite_name": "frequency_of_update_expectation_suite",
Expand Down Expand Up @@ -595,25 +581,6 @@ class MetadataSettings(BaseSettings):
TIME_SAVED_IN_HOURS_MSG: str = (
"Null values should not present in these columns"
)
# TIME_SAVED_IN_HOURS_EXPECTATION = {
# "data_asset_type": None,
# "expectation_suite_name": "time_saved_in_hours_expectation_suite",
# "expectations": [
# {
# "expectation_type": "expect_column_values_to_be_in_set",
# "kwargs": {
# "column": "time_saved_in_hours",
# "value_set": [],
# "result_format": "SUMMARY",
# },
# "meta": {
# "expectation_name": "Time Saved In Hours",
# "cleaning_pdf_link": "https://wp.me/ad1WQ9-dvg",
# "expectation_error_message": "Time Saved in Hours should be from the range of 2 to 6 hours",
# },
# }
# ],
# }


class TagsSettings(BaseSettings):
Expand Down
63 changes: 0 additions & 63 deletions app/models/date_strftime_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,69 +4,6 @@

from pydantic import BaseModel

# class _Kwargs(BaseModel):
# column: str
# strftime_format: str
# result_format: str

# class Config:
# underscore_attrs_are_private = True

# class ExpectationConfig(BaseModel):
# _expectation_type: str
# _kwargs: _Kwargs
# _raw_kwargs: Any
# meta: Dict[str, Any]
# success_on_last_run: Any
# _ge_cloud_id: Any
# _expectation_context: Any

# class Config:
# underscore_attrs_are_private = True

# class PartialUnexpectedCount(BaseModel):
# value: str
# count: int

# class Config:
# underscore_attrs_are_private = True


# class Result(BaseModel):
# element_count: Optional[int]
# missing_count: Optional[int]
# missing_percent: Optional[int]
# unexpected_count: Optional[int]
# unexpected_percent: Optional[int]
# unexpected_percent_total: Optional[int]
# unexpected_percent_nonmissing: Optional[int]
# partial_unexpected_list: Optional[List[str]]
# partial_unexpected_index_list: Optional[List[int]]
# partial_unexpected_counts: Optional[List[PartialUnexpectedCount]]
# unexpected_list: Optional[List[str]]
# unexpected_index_list: Optional[List[int]]


# class ExceptionInfo(BaseModel):
# raised_exception: Optional[bool]
# exception_message: Optional[Any]
# exception_traceback: Optional[Any]

# class Config:
# underscore_attrs_are_private = True


# class DateStrftimePattern(BaseModel):
# success: bool
# expectation_config: Optional[ExpectationConfig]
# result: Optional[Result]
# _meta: Optional[Dict[str, Any]]
# exception_info: Optional[ExceptionInfo]

# class Config:
# underscore_attrs_are_private = True
# # response_model_exclude_unset = True


class _Kwargs(BaseModel):
column: str
Expand Down
36 changes: 0 additions & 36 deletions app/models/expect_column_values_to_be_in_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,6 @@

from pydantic import BaseModel

# class PartialUnexpectedCount(BaseModel):
# value: str
# count: int


# class Result(BaseModel):
# element_count: Optional[int]
# missing_count: Optional[int]
# missing_percent: Optional[int]
# unexpected_count: Optional[int]
# unexpected_percent: Optional[int]
# unexpected_percent_total: Optional[int]
# unexpected_percent_nonmissing: Optional[int]
# partial_unexpected_list: Optional[List[str]]
# partial_unexpected_index_list: Optional[List[int]]
# partial_unexpected_counts: Optional[List[PartialUnexpectedCount]]
# unexpected_list: Optional[List[str]]
# unexpected_index_list: Optional[List[int]]


# class ExceptionInfo(BaseModel):
# raised_exception: bool
# exception_message: Optional[Any]
# exception_traceback: Optional[Any]


# class ColumnValuesToBeInSet(BaseModel):
# success: bool
# _expectation_config: Optional[Any]
# result: Optional[Result]
# meta: Optional[Dict[str, Any]]
# _exception_info: Optional[ExceptionInfo]

# class Config:
# underscore_attrs_are_private = True


class _Kwargs(BaseModel):
column: str
Expand Down
53 changes: 0 additions & 53 deletions app/models/regex_list_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,59 +4,6 @@

from pydantic import BaseModel

# class _Kwargs(BaseModel):
# column: str
# regex_list: List[str]
# match_on: str
# result_format: str


# class ExpectationConfig(BaseModel):
# _expectation_type: str
# _kwargs: _Kwargs
# _raw_kwargs: Any
# meta: Dict[str, Any]
# success_on_last_run: Any
# _ge_cloud_id: Any
# _expectation_context: Any


# class PartialUnexpectedCount(BaseModel):
# value: str
# count: int


# class Result(BaseModel):
# element_count: Optional[int]
# missing_count: Optional[int]
# missing_percent: Optional[int]
# unexpected_count: Optional[int]
# unexpected_percent: Optional[int]
# unexpected_percent_total: Optional[int]
# unexpected_percent_nonmissing: Optional[int]
# partial_unexpected_list: Optional[List[str]]
# partial_unexpected_index_list: Optional[List[int]]
# partial_unexpected_counts: Optional[List[PartialUnexpectedCount]]
# unexpected_list: Optional[List[str]]
# unexpected_index_list: Optional[List[int]]


# class ExceptionInfo(BaseModel):
# raised_exception: Optional[bool]
# exception_message: Optional[Any]
# exception_traceback: Optional[Any]


# class RegexMatchList(BaseModel):
# success: Optional[bool]
# expectation_config: Optional[ExpectationConfig]
# result: Optional[Result]
# meta: Optional[Dict[str, Any]]
# exception_info: Optional[ExceptionInfo]

# class Config:
# underscore_attrs_are_private = True


class _Kwargs(BaseModel):
column: str
Expand Down
52 changes: 0 additions & 52 deletions app/models/regex_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,58 +4,6 @@

from pydantic import BaseModel

# class _Kwargs(BaseModel):
# column: str
# regex: str
# result_format: str


# class ExpectationConfig(BaseModel):
# _expectation_type: str
# _kwargs: _Kwargs
# _raw_kwargs: Any
# meta: Dict[str, Any]
# success_on_last_run: Any
# _ge_cloud_id: Any
# _expectation_context: Any


# class PartialUnexpectedCount(BaseModel):
# value: str
# count: int


# class Result(BaseModel):
# element_count: Optional[int]
# missing_count: Optional[int]
# missing_percent: Optional[int]
# unexpected_count: Optional[int]
# unexpected_percent: Optional[int]
# unexpected_percent_total: Optional[int]
# unexpected_percent_nonmissing: Optional[int]
# partial_unexpected_list: Optional[List[str]]
# partial_unexpected_index_list: Optional[List[int]]
# partial_unexpected_counts: Optional[List[PartialUnexpectedCount]]
# unexpected_list: Optional[List[str]]
# unexpected_index_list: Optional[List[int]]


# class ExceptionInfo(BaseModel):
# raised_exception: Optional[bool]
# exception_message: Optional[Any]
# exception_traceback: Optional[Any]


# class RegexPatternExpectation(BaseModel):
# success: bool
# expectation_config: Optional[ExpectationConfig]
# result: Optional[Result]
# meta: Optional[Dict[str, Any]]
# exception_info: Optional[ExceptionInfo]

# class Config:
# underscore_attrs_are_private = True


class _Kwargs(BaseModel):
column: str
Expand Down
Loading