diff --git a/src/qa_training/domain/service_make_features.py b/src/qa_training/domain/service_make_features.py index f3d8280..003d973 100644 --- a/src/qa_training/domain/service_make_features.py +++ b/src/qa_training/domain/service_make_features.py @@ -52,7 +52,7 @@ def _make_y( def _handle_missing_values(self, df_customer_info) -> pd.DataFrame: """欠損値処理する.""" - df_customer_info["Age"] = df_customer_info["Age"].fillna(10) + df_customer_info["Age"] = df_customer_info["Age"].fillna(20) df_customer_info["Cabin"] = df_customer_info["Cabin"].fillna("S") df_customer_info = df_customer_info.dropna() return df_customer_info diff --git a/tests/qa_training/domain/service_make_features/missing_values/df_customer_info.csv b/tests/qa_training/domain/service_make_features/missing_values/df_customer_info.csv new file mode 100644 index 0000000..7affe30 --- /dev/null +++ b/tests/qa_training/domain/service_make_features/missing_values/df_customer_info.csv @@ -0,0 +1,7 @@ +PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked +1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,C85,S +2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C +3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,C85,S +4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S +5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,C85,S +6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,C85,Q diff --git a/tests/qa_training/domain/service_make_features/missing_values/df_filled_expected.csv b/tests/qa_training/domain/service_make_features/missing_values/df_filled_expected.csv new file mode 100644 index 0000000..6c539d2 --- /dev/null +++ b/tests/qa_training/domain/service_make_features/missing_values/df_filled_expected.csv @@ -0,0 +1,7 @@ +PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked +1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,C85,S +2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C +3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,C85,S +4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S +5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,C85,S +6,0,3,"Moran, Mr. James",male,20,0,0,330877,8.4583,C85,Q diff --git a/tests/qa_training/domain/service_make_features/missing_values/test_missing_values_in_age.py b/tests/qa_training/domain/service_make_features/missing_values/test_missing_values_in_age.py new file mode 100644 index 0000000..c23fc67 --- /dev/null +++ b/tests/qa_training/domain/service_make_features/missing_values/test_missing_values_in_age.py @@ -0,0 +1,27 @@ + + + +import pandas as pd +import pytest +from qa_training.domain.service_make_features import ServiceMakeFeatures +from qa_training.utils.my_assert_frame_equal import MyAssert + + +@pytest.fixture() +def fixture_run(): + service = ServiceMakeFeatures() + df_customer_info = pd.read_csv("tests/qa_training/domain/service_make_features/missing_values/df_customer_info.csv") + df_filled_expected = pd.read_csv("tests/qa_training/domain/service_make_features/missing_values/df_filled_expected.csv") + + return service, df_customer_info, df_filled_expected + + +def test_run(fixture_run): + # Arrange + service, df_customer_info, df_filled_expected = fixture_run + + # Act + df_filled = service._handle_missing_values(df_customer_info=df_customer_info) + + # Assert + MyAssert().assert_df(df_filled, df_filled_expected) diff --git a/tests/qa_training/domain/test_service_make_features.py b/tests/qa_training/domain/test_service_make_features.py deleted file mode 100644 index d5c0d8f..0000000 --- a/tests/qa_training/domain/test_service_make_features.py +++ /dev/null @@ -1,49 +0,0 @@ -import pandas as pd -import pytest -from qa_training.domain.service_make_features import ServiceMakeFeatures -from qa_training.utils.my_assert_frame_equal import MyAssert - - -@pytest.fixture -def fixture_run(): - service_make_features = ServiceMakeFeatures() - - df_customer_info = pd.read_csv( - "tests/common_data/df_customer_info.csv", - ) - df_id_expected = pd.read_csv( - "tests/common_data/df_id.csv", - ) - df_X_expected = pd.read_csv( - "tests/common_data/df_X.csv", - ) - df_y_expected = pd.read_csv( - "tests/common_data/df_y.csv", - ) - return ( - service_make_features, - df_customer_info, - df_id_expected, - df_X_expected, - df_y_expected, - ) - - -def test_run( - fixture_run: tuple[ - ServiceMakeFeatures, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame - ] -): - ( - service_make_features, - df_customer_info, - df_id_expected, - df_X_expected, - df_y_expected, - ) = fixture_run - - df_id, df_X, df_y = service_make_features.run(df_customer_info) - - MyAssert().assert_df(df_id, df_id_expected) - MyAssert().assert_df(df_X, df_X_expected) - MyAssert().assert_df(df_y, df_y_expected)