@@ -25,7 +25,6 @@ import (
25
25
"net"
26
26
"os"
27
27
"path/filepath"
28
- "runtime"
29
28
"strconv"
30
29
"strings"
31
30
"sync/atomic"
@@ -472,7 +471,7 @@ func (worker *WorkerClient) StartWorker() (err error) {
472
471
logger .GetLogger ().Log (logger .Info , "Started " , workerPath , ", pid=" , pid )
473
472
}
474
473
worker .pid = pid
475
- worker .setState (wsInit , false )
474
+ worker .setState (wsInit )
476
475
return nil
477
476
}
478
477
@@ -542,7 +541,7 @@ func (worker *WorkerClient) attachToWorker() (err error) {
542
541
logger .GetLogger ().Log (logger .Info , "Got control message from worker (" , worker .ID , "," , worker .pid , "," , worker .racID , "," , worker .dbUname , ")" )
543
542
}
544
543
545
- worker .setState (wsAcpt , false )
544
+ worker .setState (wsAcpt )
546
545
547
546
pool , err := GetWorkerBrokerInstance ().GetWorkerPool (worker .Type , worker .instID , worker .shardID )
548
547
if err != nil {
@@ -634,11 +633,11 @@ type WorkerClientRecoverParam struct {
634
633
func (worker * WorkerClient ) Recover (p * WorkerPool , ticket string , recovParam WorkerClientRecoverParam , info * strandedCalInfo , param ... int ) {
635
634
if atomic .CompareAndSwapInt32 (& worker .isUnderRecovery , 0 , 1 ) {
636
635
if logger .GetLogger ().V (logger .Debug ) {
637
- logger .GetLogger ().Log (logger .Debug , "begin recover worker Id: " , worker . ID , " process Id : " , worker .pid )
636
+ logger .GetLogger ().Log (logger .Debug , "begin recover worker: " , worker .pid )
638
637
}
639
638
} else {
640
639
if logger .GetLogger ().V (logger .Debug ) {
641
- logger .GetLogger ().Log (logger .Debug , "worker already underrecovery: " , worker .ID , " process Id: " , worker . pid )
640
+ logger .GetLogger ().Log (logger .Debug , "worker already underrecovery: " , worker .pid )
642
641
}
643
642
//
644
643
// defer will not be called.
@@ -666,10 +665,7 @@ func (worker *WorkerClient) Recover(p *WorkerPool, ticket string, recovParam Wor
666
665
return
667
666
}
668
667
priorWorkerStatus := worker .Status
669
- if logger .GetLogger ().V (logger .Debug ) {
670
- logger .GetLogger ().Log (logger .Debug , fmt .Sprintf ("about to recover worker Id: %d, worker process Id: %d as part of reconvery process, setting worker state to Quece" , worker .ID , worker .pid ))
671
- }
672
- worker .setState (wsQuce , true )
668
+ worker .setState (wsQuce )
673
669
killparam := common .StrandedClientClose
674
670
if len (param ) > 0 {
675
671
killparam = param [0 ]
@@ -680,17 +676,9 @@ func (worker *WorkerClient) Recover(p *WorkerPool, ticket string, recovParam Wor
680
676
select {
681
677
case <- workerRecoverTimeout :
682
678
worker .thr .CanRun ()
683
- worker .setState (wsInit , true ) // Set the worker state to INIT when we decide to Terminate the worker
684
- GetStateLog ().PublishStateEvent (StateEvent {eType : WorkerStateEvt , shardID : worker .shardID , wType : worker .Type , instID : worker .instID , workerID : worker .ID , newWState : worker .Status })
679
+ worker .setState (wsInit ) // Set the worker state to INIT when we decide to Terminate the worker
685
680
worker .Terminate ()
686
681
worker .callogStranded ("RECYCLED" , info )
687
- if logger .GetLogger ().V (logger .Debug ) {
688
- logger .GetLogger ().Log (logger .Debug , fmt .Sprintf ("worker Id: %d and process: %d recovered as part of workerRecoverTimeout set status to INIT" , worker .ID , worker .pid ))
689
- }
690
- err := p .RestartWorker (worker )
691
- if err != nil {
692
- logger .GetLogger ().Log (logger .Alert , fmt .Sprintf ("worker: %d failed to restart worker process" , worker .ID ))
693
- }
694
682
return
695
683
case msg , ok := <- worker .channel ():
696
684
if ! ok {
@@ -727,10 +715,7 @@ func (worker *WorkerClient) Recover(p *WorkerPool, ticket string, recovParam Wor
727
715
}
728
716
worker .callogStranded ("RECOVERED" , info )
729
717
730
- worker .setState (wsFnsh , true )
731
- if logger .GetLogger ().V (logger .Debug ) {
732
- logger .GetLogger ().Log (logger .Debug , fmt .Sprintf ("worker Id: %d, worker process: %d recovered as part of message from channel set status to FINSH" , worker .ID , worker .pid ))
733
- }
718
+ worker .setState (wsFnsh )
734
719
p .ReturnWorker (worker , ticket )
735
720
//
736
721
// donot set state to ACPT since worker could already be picked up by another
@@ -910,13 +895,13 @@ func (worker *WorkerClient) doRead() {
910
895
logger .GetLogger ().Log (logger .Verbose , "workerclient (<<< pid =" , worker .pid , ",wrqId:" , worker .rqId , "): EOR code:" , eor , ", rqId: " , rqId , ", data:" , DebugString (payload ))
911
896
}
912
897
if eor == common .EORFree {
913
- worker .setState (wsFnsh , false )
898
+ worker .setState (wsFnsh )
914
899
/*worker.sqlStartTimeMs = 0
915
900
if logger.GetLogger().V(logger.Verbose) {
916
901
logger.GetLogger().Log(logger.Verbose, "workerclient sqltime=", worker.sqlStartTimeMs)
917
902
}*/
918
903
} else {
919
- worker .setState (wsWait , false )
904
+ worker .setState (wsWait )
920
905
}
921
906
if eor != common .EORMoreIncomingRequests {
922
907
worker .outCh <- & workerMsg {data : payload , eor : true , free : (eor == common .EORFree ), inTransaction : ((eor == common .EORInTransaction ) || (eor == common .EORInCursorInTransaction )), rqId : rqId }
@@ -940,7 +925,7 @@ func (worker *WorkerClient) doRead() {
940
925
return
941
926
default :
942
927
if ns .Cmd != common .RcStillExecuting {
943
- worker .setState (wsWait , false )
928
+ worker .setState (wsWait )
944
929
}
945
930
if logger .GetLogger ().V (logger .Verbose ) {
946
931
logger .GetLogger ().Log (logger .Verbose , "workerclient (<<< pid =" , worker .pid , "): data:" , DebugString (ns .Serialized ), len (ns .Serialized ))
@@ -956,7 +941,7 @@ func (worker *WorkerClient) doRead() {
956
941
957
942
// Write sends a message to the worker
958
943
func (worker * WorkerClient ) Write (ns * netstring.Netstring , nsCount uint16 ) error {
959
- worker .setState (wsBusy , false )
944
+ worker .setState (wsBusy )
960
945
961
946
worker .rqId += uint32 (nsCount )
962
947
@@ -980,24 +965,12 @@ func (worker *WorkerClient) Write(ns *netstring.Netstring, nsCount uint16) error
980
965
}
981
966
982
967
// setState updates the worker state
983
- func (worker * WorkerClient ) setState (status HeraWorkerStatus , callFromRecovery bool ) {
968
+ func (worker * WorkerClient ) setState (status HeraWorkerStatus ) {
984
969
if worker .Status == status {
985
970
return
986
971
}
987
- if worker .isUnderRecovery == 1 && ! callFromRecovery {
988
- if logger .GetLogger ().V (logger .Info ) {
989
- //If worker under recovery drinup of channel happens as part of DrainResponseChannel
990
- logger .GetLogger ().Log (logger .Info , "worker : " , worker .ID , " is under recovery. " +
991
- "workerclient pid=" , worker .pid , "not allowed changing status from" , worker .Status , "to" , status )
992
- }
993
- if logger .GetLogger ().V (logger .Debug ) {
994
- worker .printCallStack ()
995
- }
996
- return
997
- }
998
972
if logger .GetLogger ().V (logger .Debug ) {
999
- logger .GetLogger ().Log (logger .Debug , "worker Id=" , worker .ID , " worker pid=" , worker .pid , " changing status from" , worker .Status , "to" , status )
1000
- worker .printCallStack ()
973
+ logger .GetLogger ().Log (logger .Debug , "worker pid=" , worker .pid , " changing status from" , worker .Status , "to" , status )
1001
974
}
1002
975
1003
976
// TODO: sync atomic set
@@ -1029,27 +1002,3 @@ func (worker *WorkerClient) isProcessRunning() bool {
1029
1002
}
1030
1003
return true
1031
1004
}
1032
-
1033
- func (worker * WorkerClient ) printCallStack () {
1034
- // Define a large enough buffer to capture the stack.
1035
- const depth = 64
1036
- pcs := make ([]uintptr , depth )
1037
-
1038
- // Collect the stack trace.
1039
- n := runtime .Callers (2 , pcs ) // Skip the first 2 callers (runtime and printCallStack itself).
1040
- frames := runtime .CallersFrames (pcs [:n ])
1041
- indent := 0
1042
- // Iterate through the frames and print function names and line numbers.
1043
- var builder strings.Builder
1044
- builder .WriteString (fmt .Sprintf ("worker Id= %d Process Id= %d Call Stack:" , worker .ID , worker .pid ))
1045
- for {
1046
- frame , more := frames .Next ()
1047
- builder .WriteString (fmt .Sprintf ("%s - %s\n " , strings .Repeat (" " , indent ), frame .Function ))
1048
- builder .WriteString (fmt .Sprintf ("%s at %s:%d\n " , strings .Repeat (" " , indent ), frame .File , frame .Line ))
1049
- indent ++
1050
- if ! more {
1051
- break
1052
- }
1053
- }
1054
- logger .GetLogger ().Log (logger .Debug , builder .String ())
1055
- }
0 commit comments