@@ -92,3 +92,281 @@ cal_house:
9292    - Sold Price 
9393  metric : rmse 
9494  problem_type : regression 
95+ base : &base 
96+   url : s3://automl-mm-bench/{name}/{split}.csv 
97+   test_split_name : test 
98+   splits :
99+     - train 
100+     - test 
101+   feature_columns :
102+     - ImageID 
103+   label_columns :
104+     - LabelName 
105+   image_columns :
106+   text_columns :
107+   columns_to_drop :
108+   metric : acc 
109+   problem_type : multiclass 
110+   
111+ 
112+ prod :
113+   << : *base 
114+   url : s3://automl-mm-bench/machine_hack_product_sentiment/{split}.csv 
115+   test_split_name : dev 
116+   feature_columns :
117+     - Product_Description 
118+     - Product_Type 
119+   label_columns :
120+     - Sentiment 
121+ 
122+ airbnb :
123+   << : *base 
124+   url : s3://automl-mm-bench/airbnb_melbourne/{split}.pq 
125+   feature_columns :
126+     null 
127+   label_columns :
128+     - price_label 
129+   ignore_columns :
130+     - id 
131+     - listing_url 
132+     - scrape_id 
133+     - last_scraped 
134+     - picture_url 
135+     - host_id 
136+     - host_url 
137+     - host_name 
138+     - host_thumbnail_url 
139+     - host_picture_url 
140+     - monthly_price 
141+     - weekly_price 
142+     - price 
143+     - calendar_last_scraped 
144+ 
145+ channel :
146+   << : *base 
147+   url : s3://automl-mm-bench/news_channel/{split}.csv 
148+   feature_columns :
149+     null 
150+   label_columns :
151+     - channel 
152+   ignore_columns :
153+     null 
154+ 
155+ wine :
156+   << : *base 
157+   url : s3://automl-mm-bench/wine_reviews/{split}.csv 
158+   feature_columns :
159+     null 
160+   label_columns :
161+     - variety 
162+   ignore_columns :
163+     null 
164+ 
165+ imdb :
166+   << : *base 
167+   url : s3://automl-mm-bench/imdb_genre_prediction/{split}.csv 
168+   feature_columns :
169+     null 
170+   label_columns :
171+     - Genre_is_Drama 
172+   ignore_columns :
173+     null 
174+   metric : roc_auc 
175+   problem_type : binary 
176+ 
177+ jigsaw :
178+   << : *base 
179+   url : s3://automl-mm-bench/jigsaw_unintended_bias100K/{split}.pq 
180+   feature_columns :
181+     - comment_text 
182+     - asian 
183+     - atheist 
184+     - bisexual 
185+     - black 
186+     - buddhist 
187+     - christian 
188+     - female 
189+     - heterosexual 
190+     - hindu 
191+     - homosexual_gay_or_lesbian 
192+     - intellectual_or_learning_disability 
193+     - jewish 
194+     - latino 
195+     - male 
196+     - muslim 
197+     - other_disability 
198+     - other_gender 
199+     - other_race_or_ethnicity 
200+     - other_religion 
201+     - other_sexual_orientation 
202+     - physical_disability 
203+     - psychiatric_or_mental_illness 
204+     - transgender 
205+     - white 
206+     - funny 
207+     - wow 
208+     - sad 
209+     - likes 
210+     - disagree 
211+   label_columns :
212+     - target 
213+   metric : roc_auc 
214+   problem_type : binary 
215+ 
216+ fake :
217+   << : *base 
218+   url : s3://automl-mm-bench/fake_job_postings2/{split}.csv 
219+   feature_columns :
220+     null 
221+   label_columns :
222+     - fraudulent 
223+   ignore_columns :
224+     null 
225+   metric : roc_auc 
226+   problem_type : binary 
227+ 
228+ kick :
229+   << : *base 
230+   url : s3://automl-mm-bench/kick_starter_funding/{split}.csv 
231+   feature_columns :
232+     null 
233+   label_columns :
234+     - final_status 
235+   ignore_columns :
236+     null 
237+   metric : roc_auc 
238+   problem_type : binary 
239+ 
240+ ae :
241+   << : *base 
242+   url : s3://automl-mm-bench/ae_price_prediction/{split}.pq 
243+   feature_columns :
244+     null 
245+   label_columns :
246+     - price 
247+   ignore_columns :
248+     - mrp 
249+     - pdp_url 
250+   metric : r2 
251+   problem_type : regression 
252+ 
253+ qaa :
254+   << : *base 
255+   url : s3://automl-mm-bench/google_quest_qa/{split}.pq 
256+   test_split_name : dev 
257+   feature_columns :
258+     - question_title 
259+     - question_body 
260+     - answer 
261+     - category 
262+   label_columns :
263+     - answer_type_reason_explanation 
264+   metric : r2 
265+   problem_type : regression 
266+ 
267+ qaq :
268+   << : *base 
269+   url : s3://automl-mm-bench/google_quest_qa/{split}.pq 
270+   test_split_name : dev 
271+   feature_columns :
272+     - question_title 
273+     - question_body 
274+     - answer 
275+     - category 
276+   label_columns :
277+     - question_type_reason_explanation 
278+   metric : r2 
279+   problem_type : regression 
280+ 
281+ cloth :
282+   << : *base 
283+   url : s3://automl-mm-bench/women_clothing_review/{split}.pq 
284+   feature_columns :
285+     - Title 
286+     - Review Text 
287+     - Age 
288+     - Division Name 
289+     - Department Name 
290+     - Class Name 
291+   label_columns :
292+     - Rating 
293+   metric : r2 
294+   problem_type : regression 
295+ 
296+ mercari :
297+   << : *base 
298+   url : s3://automl-mm-bench/mercari_price_suggestion100K/{split}.pq 
299+   feature_columns :
300+     null 
301+   label_columns :
302+     - log_price 
303+   ignore_columns :
304+     - train_id 
305+     - price 
306+   metric : r2 
307+   problem_type : regression 
308+ 
309+ jc :
310+   << : *base 
311+   url : s3://automl-mm-bench/jc_penney_products/{split}.csv 
312+   feature_columns :
313+     null 
314+   label_columns :
315+     - sale_price 
316+   ignore_columns :
317+     null 
318+   metric : r2 
319+   problem_type : regression 
320+ 
321+ pop :
322+   << : *base 
323+   url : s3://automl-mm-bench/news_popularity2/{split}.csv 
324+   feature_columns :
325+     null 
326+   label_columns :
327+     - log_shares 
328+   ignore_columns :
329+     null 
330+   metric : r2 
331+   problem_type : regression 
332+ 
333+ book :
334+   << : *base 
335+   url : s3://automl-mm-bench/machine_hack_competitions/predict_the_price_of_books/{split}.csv 
336+   feature_columns :
337+     - Title 
338+     - Author 
339+     - Edition 
340+     - Reviews 
341+     - Ratings 
342+     - Synopsis 
343+     - Genre 
344+     - BookCategory 
345+   label_columns :
346+     - Price 
347+   ignore_columns :
348+     null 
349+   metric : r2 
350+   problem_type : regression 
351+ 
352+ salary :
353+   << : *base 
354+   url : s3://automl-mm-bench/machine_hack_competitions/predict_the_data_scientists_salary_in_india_hackathon/{split}.csv 
355+   feature_columns :
356+     null 
357+   label_columns :
358+     - salary 
359+   ignore_columns :
360+     null 
361+   metric : acc 
362+   problem_type : multiclass 
363+ 
364+ house :
365+   << : *base 
366+   url : s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv 
367+   feature_columns :
368+     null 
369+   label_columns :
370+     - Sold Price 
371+   metric : r2 
372+   problem_type : regression 
0 commit comments