Skip to content

Commit 8b46e8a

Browse files
committed
update
1 parent 8bc504a commit 8b46e8a

File tree

8 files changed

+724
-21
lines changed

8 files changed

+724
-21
lines changed

bench_all.py renamed to sample_configs/bench_all.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,24 @@
55
seeds.append(random.randint(0, 100))
66

77
seeds = [22, 92, 54, 86, 41]
8-
8+
seeds = [22]
99
config_paths = [
1010
"sample_configs/paper_image_cloud_configs.yaml",
1111
"sample_configs/paper_text_tabular_cloud_configs.yaml",
1212
"sample_configs/paper_text_cloud_configs.yaml",
13-
]
14-
frameworks = [
15-
# "AutoGluon_best_master",
16-
# "autokeras_master",
17-
"ablation_base",
18-
"ablation_greedy_soup",
19-
"ablation_gradient_clip",
20-
"ablation_warmup_steps",
21-
# "ablation_cosine_decay",
22-
# "ablation_weight_decay",
23-
# "ablation_lr_decay",
24-
25-
]
13+
14+
frameworks = ['AutoGluon_best_master', 'ablation_base', 'ablation_add_greedy', 'ablation_add_grad_clip', 'ablation_add_warmup_steps', 'ablation_add_cosine_decay', 'ablation_add_weight_decay', 'ablation_add_lr_decay', 'AutoGluon_del_greedy', 'AutoGluon_del_grad_clip', 'AutoGluon_del_warmup_steps', 'AutoGluon_del_cosine_decay', 'AutoGluon_del_weight_decay', 'AutoGluon_del_lr_decay']
15+
16+
#frameworks = [
17+
# "ablation_base",
18+
# "ablation_greedy_soup",
19+
# "ablation_gradient_clip",
20+
# "ablation_warmup_steps",
21+
# "ablation_cosine_decay",
22+
# "ablation_weight_decay",
23+
# "ablation_lr_decay",
24+
# "autokeras_master",
25+
#]
2626
constraints = [
2727
"g4_12x"
2828
]

sample_configs/dataloaders/paper_text_datasets.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,12 @@ base: &base
6363

6464

6565
financial_news:
66+
<<: *base
6667
url: s3://zs-models/datasets/financial_news/{lang}/{split}.csv
6768
splits:
6869
- train
6970
langs:
7071
- en
71-
metric: accuracy
72-
problem_type: classification
7372

7473
MLDoc-11000:
7574
<<: *base

sample_configs/dataloaders/paper_text_tabular_datasets.yaml

Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,281 @@ cal_house:
9292
- Sold Price
9393
metric: rmse
9494
problem_type: regression
95+
base: &base
96+
url: s3://automl-mm-bench/{name}/{split}.csv
97+
test_split_name: test
98+
splits:
99+
- train
100+
- test
101+
feature_columns:
102+
- ImageID
103+
label_columns:
104+
- LabelName
105+
image_columns:
106+
text_columns:
107+
columns_to_drop:
108+
metric: acc
109+
problem_type: multiclass
110+
111+
112+
prod:
113+
<<: *base
114+
url: s3://automl-mm-bench/machine_hack_product_sentiment/{split}.csv
115+
test_split_name: dev
116+
feature_columns:
117+
- Product_Description
118+
- Product_Type
119+
label_columns:
120+
- Sentiment
121+
122+
airbnb:
123+
<<: *base
124+
url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
125+
feature_columns:
126+
null
127+
label_columns:
128+
- price_label
129+
ignore_columns:
130+
- id
131+
- listing_url
132+
- scrape_id
133+
- last_scraped
134+
- picture_url
135+
- host_id
136+
- host_url
137+
- host_name
138+
- host_thumbnail_url
139+
- host_picture_url
140+
- monthly_price
141+
- weekly_price
142+
- price
143+
- calendar_last_scraped
144+
145+
channel:
146+
<<: *base
147+
url: s3://automl-mm-bench/news_channel/{split}.csv
148+
feature_columns:
149+
null
150+
label_columns:
151+
- channel
152+
ignore_columns:
153+
null
154+
155+
wine:
156+
<<: *base
157+
url: s3://automl-mm-bench/wine_reviews/{split}.csv
158+
feature_columns:
159+
null
160+
label_columns:
161+
- variety
162+
ignore_columns:
163+
null
164+
165+
imdb:
166+
<<: *base
167+
url: s3://automl-mm-bench/imdb_genre_prediction/{split}.csv
168+
feature_columns:
169+
null
170+
label_columns:
171+
- Genre_is_Drama
172+
ignore_columns:
173+
null
174+
metric: roc_auc
175+
problem_type: binary
176+
177+
jigsaw:
178+
<<: *base
179+
url: s3://automl-mm-bench/jigsaw_unintended_bias100K/{split}.pq
180+
feature_columns:
181+
- comment_text
182+
- asian
183+
- atheist
184+
- bisexual
185+
- black
186+
- buddhist
187+
- christian
188+
- female
189+
- heterosexual
190+
- hindu
191+
- homosexual_gay_or_lesbian
192+
- intellectual_or_learning_disability
193+
- jewish
194+
- latino
195+
- male
196+
- muslim
197+
- other_disability
198+
- other_gender
199+
- other_race_or_ethnicity
200+
- other_religion
201+
- other_sexual_orientation
202+
- physical_disability
203+
- psychiatric_or_mental_illness
204+
- transgender
205+
- white
206+
- funny
207+
- wow
208+
- sad
209+
- likes
210+
- disagree
211+
label_columns:
212+
- target
213+
metric: roc_auc
214+
problem_type: binary
215+
216+
fake:
217+
<<: *base
218+
url: s3://automl-mm-bench/fake_job_postings2/{split}.csv
219+
feature_columns:
220+
null
221+
label_columns:
222+
- fraudulent
223+
ignore_columns:
224+
null
225+
metric: roc_auc
226+
problem_type: binary
227+
228+
kick:
229+
<<: *base
230+
url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
231+
feature_columns:
232+
null
233+
label_columns:
234+
- final_status
235+
ignore_columns:
236+
null
237+
metric: roc_auc
238+
problem_type: binary
239+
240+
ae:
241+
<<: *base
242+
url: s3://automl-mm-bench/ae_price_prediction/{split}.pq
243+
feature_columns:
244+
null
245+
label_columns:
246+
- price
247+
ignore_columns:
248+
- mrp
249+
- pdp_url
250+
metric: r2
251+
problem_type: regression
252+
253+
qaa:
254+
<<: *base
255+
url: s3://automl-mm-bench/google_quest_qa/{split}.pq
256+
test_split_name: dev
257+
feature_columns:
258+
- question_title
259+
- question_body
260+
- answer
261+
- category
262+
label_columns:
263+
- answer_type_reason_explanation
264+
metric: r2
265+
problem_type: regression
266+
267+
qaq:
268+
<<: *base
269+
url: s3://automl-mm-bench/google_quest_qa/{split}.pq
270+
test_split_name: dev
271+
feature_columns:
272+
- question_title
273+
- question_body
274+
- answer
275+
- category
276+
label_columns:
277+
- question_type_reason_explanation
278+
metric: r2
279+
problem_type: regression
280+
281+
cloth:
282+
<<: *base
283+
url: s3://automl-mm-bench/women_clothing_review/{split}.pq
284+
feature_columns:
285+
- Title
286+
- Review Text
287+
- Age
288+
- Division Name
289+
- Department Name
290+
- Class Name
291+
label_columns:
292+
- Rating
293+
metric: r2
294+
problem_type: regression
295+
296+
mercari:
297+
<<: *base
298+
url: s3://automl-mm-bench/mercari_price_suggestion100K/{split}.pq
299+
feature_columns:
300+
null
301+
label_columns:
302+
- log_price
303+
ignore_columns:
304+
- train_id
305+
- price
306+
metric: r2
307+
problem_type: regression
308+
309+
jc:
310+
<<: *base
311+
url: s3://automl-mm-bench/jc_penney_products/{split}.csv
312+
feature_columns:
313+
null
314+
label_columns:
315+
- sale_price
316+
ignore_columns:
317+
null
318+
metric: r2
319+
problem_type: regression
320+
321+
pop:
322+
<<: *base
323+
url: s3://automl-mm-bench/news_popularity2/{split}.csv
324+
feature_columns:
325+
null
326+
label_columns:
327+
- log_shares
328+
ignore_columns:
329+
null
330+
metric: r2
331+
problem_type: regression
332+
333+
book:
334+
<<: *base
335+
url: s3://automl-mm-bench/machine_hack_competitions/predict_the_price_of_books/{split}.csv
336+
feature_columns:
337+
- Title
338+
- Author
339+
- Edition
340+
- Reviews
341+
- Ratings
342+
- Synopsis
343+
- Genre
344+
- BookCategory
345+
label_columns:
346+
- Price
347+
ignore_columns:
348+
null
349+
metric: r2
350+
problem_type: regression
351+
352+
salary:
353+
<<: *base
354+
url: s3://automl-mm-bench/machine_hack_competitions/predict_the_data_scientists_salary_in_india_hackathon/{split}.csv
355+
feature_columns:
356+
null
357+
label_columns:
358+
- salary
359+
ignore_columns:
360+
null
361+
metric: acc
362+
problem_type: multiclass
363+
364+
house:
365+
<<: *base
366+
url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
367+
feature_columns:
368+
null
369+
label_columns:
370+
- Sold Price
371+
metric: r2
372+
problem_type: regression

0 commit comments

Comments
 (0)