@@ -270,6 +270,7 @@ def load_partition(
270
270
partition : Partition ,
271
271
items : Iterable [Dict [str , Any ]],
272
272
insert_mode : Optional [Methods ] = Methods .insert ,
273
+ partition_update_enabled : Optional [bool ] = True ,
273
274
) -> None :
274
275
"""Load items data for a single partition."""
275
276
conn = self .db .connect ()
@@ -441,12 +442,17 @@ def load_partition(
441
442
"Available modes are insert, ignore, upsert, and delsert."
442
443
f"You entered { insert_mode } ." ,
443
444
)
444
- cur .execute ("SELECT update_partition_stats_q(%s);" ,(partition .name ,))
445
+ if partition_update_enabled :
446
+ cur .execute ("SELECT update_partition_stats_q(%s);" ,(partition .name ,))
445
447
logger .debug (
446
448
f"Copying data for { partition } took { time .perf_counter () - t } seconds" ,
447
449
)
448
450
449
- def _partition_update (self , item : Dict [str , Any ]) -> str :
451
+ def _partition_update (
452
+ self ,
453
+ item : Dict [str , Any ],
454
+ update_enabled : Optional [bool ] = True ,
455
+ ) -> str :
450
456
"""Update the cached partition with the item information and return the name.
451
457
452
458
This method will mark the partition as dirty if the bounds of the partition
@@ -512,20 +518,24 @@ def _partition_update(self, item: Dict[str, Any]) -> str:
512
518
partition = self ._partition_cache [partition_name ]
513
519
514
520
if partition :
515
- # Only update the partition if the item is outside the current bounds
516
- if item ["datetime" ] < partition .datetime_range_min :
517
- partition .datetime_range_min = item ["datetime" ]
518
- partition .requires_update = True
519
- if item ["datetime" ] > partition .datetime_range_max :
520
- partition .datetime_range_max = item ["datetime" ]
521
- partition .requires_update = True
522
- if item ["end_datetime" ] < partition .end_datetime_range_min :
523
- partition .end_datetime_range_min = item ["end_datetime" ]
524
- partition .requires_update = True
525
- if item ["end_datetime" ] > partition .end_datetime_range_max :
526
- partition .end_datetime_range_max = item ["end_datetime" ]
527
- partition .requires_update = True
521
+ if update_enabled :
522
+ # Only update the partition if the item is outside the current bounds
523
+ if item ["datetime" ] < partition .datetime_range_min :
524
+ partition .datetime_range_min = item ["datetime" ]
525
+ partition .requires_update = True
526
+ if item ["datetime" ] > partition .datetime_range_max :
527
+ partition .datetime_range_max = item ["datetime" ]
528
+ partition .requires_update = True
529
+ if item ["end_datetime" ] < partition .end_datetime_range_min :
530
+ partition .end_datetime_range_min = item ["end_datetime" ]
531
+ partition .requires_update = True
532
+ if item ["end_datetime" ] > partition .end_datetime_range_max :
533
+ partition .end_datetime_range_max = item ["end_datetime" ]
534
+ partition .requires_update = True
528
535
else :
536
+ if not update_enabled :
537
+ raise Exception (f"Partition { partition_name } does not exist." )
538
+
529
539
# No partition exists yet; create a new one from item
530
540
partition = Partition (
531
541
name = partition_name ,
@@ -541,7 +551,11 @@ def _partition_update(self, item: Dict[str, Any]) -> str:
541
551
542
552
return partition_name
543
553
544
- def read_dehydrated (self , file : Union [Path , str ] = "stdin" ) -> Generator :
554
+ def read_dehydrated (
555
+ self ,
556
+ file : Union [Path , str ] = "stdin" ,
557
+ partition_update_enabled : Optional [bool ] = True ,
558
+ ) -> Generator :
545
559
if file is None :
546
560
file = "stdin"
547
561
if isinstance (file , str ):
@@ -572,15 +586,21 @@ def read_dehydrated(self, file: Union[Path, str] = "stdin") -> Generator:
572
586
item [field ] = content_value
573
587
else :
574
588
item [field ] = tab_split [i ]
575
- item ["partition" ] = self ._partition_update (item )
589
+ item ["partition" ] = self ._partition_update (
590
+ item ,
591
+ partition_update_enabled ,
592
+ )
576
593
yield item
577
594
578
595
def read_hydrated (
579
- self , file : Union [Path , str , Iterator [Any ]] = "stdin" ,
596
+ self ,
597
+ file : Union [Path , str ,
598
+ Iterator [Any ]] = "stdin" ,
599
+ partition_update_enabled : Optional [bool ] = True ,
580
600
) -> Generator :
581
601
for line in read_json (file ):
582
602
item = self .format_item (line )
583
- item ["partition" ] = self ._partition_update (item )
603
+ item ["partition" ] = self ._partition_update (item , partition_update_enabled )
584
604
yield item
585
605
586
606
def load_items (
@@ -589,6 +609,7 @@ def load_items(
589
609
insert_mode : Optional [Methods ] = Methods .insert ,
590
610
dehydrated : Optional [bool ] = False ,
591
611
chunksize : Optional [int ] = 10000 ,
612
+ partition_update_enabled : Optional [bool ] = True ,
592
613
) -> None :
593
614
"""Load items json records."""
594
615
self .check_version ()
@@ -599,15 +620,17 @@ def load_items(
599
620
self ._partition_cache = {}
600
621
601
622
if dehydrated and isinstance (file , str ):
602
- items = self .read_dehydrated (file )
623
+ items = self .read_dehydrated (file , partition_update_enabled )
603
624
else :
604
- items = self .read_hydrated (file )
625
+ items = self .read_hydrated (file , partition_update_enabled )
605
626
606
627
for chunkin in chunked_iterable (items , chunksize ):
607
628
chunk = list (chunkin )
608
629
chunk .sort (key = lambda x : x ["partition" ])
609
630
for k , g in itertools .groupby (chunk , lambda x : x ["partition" ]):
610
- self .load_partition (self ._partition_cache [k ], g , insert_mode )
631
+ self .load_partition (
632
+ self ._partition_cache [k ], g , insert_mode , partition_update_enabled ,
633
+ )
611
634
612
635
logger .debug (f"Adding data to database took { time .perf_counter () - t } seconds." )
613
636
0 commit comments