Skip to content

Commit bbb2a0c

Browse files
authored
fixed num_workers (#229)
* fixed num_workers * ch06 & ch07: added num_workers to create_dataloader_v1
1 parent 24523bd commit bbb2a0c

File tree

15 files changed

+20
-20
lines changed

15 files changed

+20
-20
lines changed

appendix-D/01_main-chapter-code/previous_chapters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
5050

5151
# Create dataloader
5252
dataloader = DataLoader(
53-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
53+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
5454

5555
return dataloader
5656

ch02/01_main-chapter-code/ch02.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1346,7 +1346,7 @@
13461346
" batch_size=batch_size,\n",
13471347
" shuffle=shuffle,\n",
13481348
" drop_last=drop_last,\n",
1349-
" num_workers=0\n",
1349+
" num_workers=num_workers\n",
13501350
" )\n",
13511351
"\n",
13521352
" return dataloader"

ch02/01_main-chapter-code/dataloader.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@
8282
"\n",
8383
" # Create dataloader\n",
8484
" dataloader = DataLoader(\n",
85-
" dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)\n",
85+
" dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)\n",
8686
"\n",
8787
" return dataloader\n",
8888
"\n",

ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@
128128
" batch_size=batch_size,\n",
129129
" shuffle=shuffle,\n",
130130
" drop_last=drop_last,\n",
131-
" num_workers=0\n",
131+
" num_workers=num_workers\n",
132132
" )\n",
133133
"\n",
134134
" return dataloader"

ch04/01_main-chapter-code/gpt.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414

1515
class GPTDatasetV1(Dataset):
16-
def __init__(self, txt, tokenizer, max_length, stride, num_workers=0):
16+
def __init__(self, txt, tokenizer, max_length, stride):
1717
self.input_ids = []
1818
self.target_ids = []
1919

@@ -44,7 +44,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
4444

4545
# Create dataloader
4646
dataloader = DataLoader(
47-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
47+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
4848

4949
return dataloader
5050

ch04/01_main-chapter-code/previous_chapters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
4141

4242
# Create dataloader
4343
dataloader = DataLoader(
44-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
44+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
4545

4646
return dataloader
4747

ch04/02_performance-analysis/previous_chapters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
4949

5050
# Create dataloader
5151
dataloader = DataLoader(
52-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
52+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
5353

5454
return dataloader
5555

ch05/01_main-chapter-code/previous_chapters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
4949

5050
# Create dataloader
5151
dataloader = DataLoader(
52-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
52+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
5353

5454
return dataloader
5555

ch05/02_alternative_weight_loading/previous_chapters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
4949

5050
# Create dataloader
5151
dataloader = DataLoader(
52-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
52+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
5353

5454
return dataloader
5555

ch05/03_bonus_pretraining_on_gutenberg/previous_chapters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
4444
tokenizer = tiktoken.get_encoding("gpt2")
4545
dataset = GPTDatasetV1(txt, tokenizer, max_length, stride)
4646
dataloader = DataLoader(
47-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
47+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
4848

4949
return dataloader
5050

ch05/05_bonus_hparam_tuning/previous_chapters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
4949

5050
# Create dataloader
5151
dataloader = DataLoader(
52-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
52+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
5353

5454
return dataloader
5555

ch06/01_main-chapter-code/previous_chapters.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def __getitem__(self, idx):
4141

4242

4343
def create_dataloader_v1(txt, batch_size=4, max_length=256,
44-
stride=128, shuffle=True, drop_last=True):
44+
stride=128, shuffle=True, drop_last=True, num_workers=0):
4545
# Initialize the tokenizer
4646
tokenizer = tiktoken.get_encoding("gpt2")
4747

@@ -50,7 +50,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
5050

5151
# Create dataloader
5252
dataloader = DataLoader(
53-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
53+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
5454

5555
return dataloader
5656

ch06/02_bonus_additional-experiments/previous_chapters.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def __getitem__(self, idx):
4141

4242

4343
def create_dataloader_v1(txt, batch_size=4, max_length=256,
44-
stride=128, shuffle=True, drop_last=True):
44+
stride=128, shuffle=True, drop_last=True, num_workers=0):
4545
# Initialize the tokenizer
4646
tokenizer = tiktoken.get_encoding("gpt2")
4747

@@ -50,7 +50,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
5050

5151
# Create dataloader
5252
dataloader = DataLoader(
53-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
53+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
5454

5555
return dataloader
5656

ch06/03_bonus_imdb-classification/previous_chapters.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def __getitem__(self, idx):
4242

4343

4444
def create_dataloader_v1(txt, batch_size=4, max_length=256,
45-
stride=128, shuffle=True, drop_last=True):
45+
stride=128, shuffle=True, drop_last=True, num_workers=0):
4646
# Initialize the tokenizer
4747
tokenizer = tiktoken.get_encoding("gpt2")
4848

@@ -51,7 +51,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
5151

5252
# Create dataloader
5353
dataloader = DataLoader(
54-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
54+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
5555

5656
return dataloader
5757

ch07/01_main-chapter-code/previous_chapters.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __getitem__(self, idx):
4545

4646

4747
def create_dataloader_v1(txt, batch_size=4, max_length=256,
48-
stride=128, shuffle=True, drop_last=True):
48+
stride=128, shuffle=True, drop_last=True, num_workers=0):
4949
# Initialize the tokenizer
5050
tokenizer = tiktoken.get_encoding("gpt2")
5151

@@ -54,7 +54,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
5454

5555
# Create dataloader
5656
dataloader = DataLoader(
57-
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
57+
dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
5858

5959
return dataloader
6060

0 commit comments

Comments
 (0)