@@ -45,14 +45,15 @@ func (r *JobFlowController) deployJobFlow(ctx context.Context, jobFlow jobflowv1
45
45
job .Name = jobName
46
46
job .Namespace = jobFlow .Namespace
47
47
job .Spec = flow .JobTemplate
48
+ job .Spec .Template .Spec .RestartPolicy = v1 .RestartPolicyNever
48
49
// 直接创建
49
50
if err = r .client .Create (ctx , job ); err != nil {
50
51
if errors .IsAlreadyExists (err ) {
51
52
continue
52
53
}
53
54
return err
54
55
}
55
- r .Recorder .Eventf (& jobFlow , v1 .EventTypeNormal , "Created" , fmt .Sprintf ("create a job named %v without dependencies" , job .Name ))
56
+ r .event .Eventf (& jobFlow , v1 .EventTypeNormal , "Created" , fmt .Sprintf ("create a job named %v without dependencies" , job .Name ))
56
57
} else {
57
58
// 如果有依赖的情况
58
59
// query dependency meets the requirements
@@ -64,6 +65,9 @@ func (r *JobFlowController) deployJobFlow(ctx context.Context, jobFlow jobflowv1
64
65
job .Name = jobName
65
66
job .Namespace = jobFlow .Namespace
66
67
job .Spec = flow .JobTemplate
68
+ job .Spec .Template .Spec .RestartPolicy = v1 .RestartPolicyNever
69
+ var cc int32
70
+ job .Spec .BackoffLimit = & cc
67
71
68
72
targetJobName := getJobName (jobFlow .Name , targetName )
69
73
namespacedName := types.NamespacedName {
@@ -85,6 +89,11 @@ func (r *JobFlowController) deployJobFlow(ctx context.Context, jobFlow jobflowv1
85
89
if dependenciesJob .Status .Succeeded != 1 {
86
90
flag = false
87
91
}
92
+
93
+ // 如果依赖的 job 出错,直接退出
94
+ if dependenciesJob .Status .Failed == 1 {
95
+ return errors .NewBadRequest (fmt .Sprintf ("dependencies Job %s execute error" , dependenciesJob .Name ))
96
+ }
88
97
}
89
98
// 如果已经完成,就进行下去
90
99
if flag {
@@ -94,7 +103,7 @@ func (r *JobFlowController) deployJobFlow(ctx context.Context, jobFlow jobflowv1
94
103
}
95
104
return err
96
105
}
97
- r .Recorder .Eventf (& jobFlow , v1 .EventTypeNormal , "Created" , fmt .Sprintf ("create job named %v for next step" , job .Name ))
106
+ r .event .Eventf (& jobFlow , v1 .EventTypeNormal , "Created" , fmt .Sprintf ("create job named %v for next step" , job .Name ))
98
107
}
99
108
}
100
109
continue
@@ -116,14 +125,17 @@ func (r *JobFlowController) updateJobFlowStatus(ctx context.Context, jobFlow *jo
116
125
allJobList := new (batchv1.JobList )
117
126
err := r .client .List (ctx , allJobList )
118
127
if err != nil {
119
- klog .Error (err , "" )
128
+ klog .Error ("list error: " , err )
120
129
return err
121
130
}
122
131
jobFlowStatus , err := getAllJobStatus (jobFlow , allJobList )
123
132
if err != nil {
124
133
return err
125
134
}
126
135
jobFlow .Status = * jobFlowStatus
136
+ if jobFlowStatus .State == jobflowv1alpha1 .Succeed || jobFlowStatus .State == jobflowv1alpha1 .Failed {
137
+ r .event .Eventf (jobFlow , v1 .EventTypeNormal , jobFlowStatus .State , fmt .Sprintf ("finshed JobFlow named %s" , jobFlow .Name ))
138
+ }
127
139
if err = r .client .Status ().Update (ctx , jobFlow ); err != nil {
128
140
if errors .IsNotFound (err ) {
129
141
return nil
@@ -133,17 +145,13 @@ func (r *JobFlowController) updateJobFlowStatus(ctx context.Context, jobFlow *jo
133
145
return nil
134
146
}
135
147
136
- const (
137
- JobFlow = "JobFlow"
138
- )
139
-
140
- // getAllJobStatus Get the information of all created jobs
148
+ // getAllJobStatus 记录 Job Status
141
149
func getAllJobStatus (jobFlow * jobflowv1alpha1.JobFlow , allJobList * batchv1.JobList ) (* jobflowv1alpha1.JobFlowStatus , error ) {
142
- // 过去掉只留 job flow 相关的
150
+ // 过去掉只留 jobflow 相关的 job
143
151
jobListRes := make ([]batchv1.Job , 0 )
144
152
for _ , job := range allJobList .Items {
145
153
for _ , reference := range job .OwnerReferences {
146
- if reference .Kind == JobFlow && reference .Name == jobFlow .Name {
154
+ if reference .Kind == jobflowv1alpha1 . JobFlowKind && reference .Name == jobFlow .Name {
147
155
jobListRes = append (jobListRes , job )
148
156
}
149
157
}
@@ -176,7 +184,7 @@ func getAllJobStatus(jobFlow *jobflowv1alpha1.JobFlow, allJobList *batchv1.JobLi
176
184
}
177
185
}
178
186
179
- // 确认 jobflow 狀態
187
+ // 确认 jobFlow 狀態
180
188
if jobFlow .DeletionTimestamp != nil {
181
189
jobFlowStatus .State = jobflowv1alpha1 .Terminating
182
190
} else {
@@ -198,9 +206,8 @@ func getAllJobStatus(jobFlow *jobflowv1alpha1.JobFlow, allJobList *batchv1.JobLi
198
206
199
207
func (r * JobFlowController ) OnUpdateJobHandler (event event.UpdateEvent , limitingInterface workqueue.RateLimitingInterface ) {
200
208
for _ , ref := range event .ObjectNew .GetOwnerReferences () {
201
- fmt .Println ("ccccccc" )
202
209
if ref .Kind == jobflowv1alpha1 .JobFlowKind && ref .APIVersion == jobflowv1alpha1 .JobFlowApiVersion {
203
- // 重新放入Reconcile调协方法
210
+ // 重新放入 Reconcile 调协方法
204
211
limitingInterface .Add (reconcile.Request {
205
212
NamespacedName : types.NamespacedName {
206
213
Name : ref .Name , Namespace : event .ObjectNew .GetNamespace (),
@@ -212,9 +219,8 @@ func (r *JobFlowController) OnUpdateJobHandler(event event.UpdateEvent, limiting
212
219
213
220
func (r * JobFlowController ) OnDeleteJobHandler (event event.DeleteEvent , limitingInterface workqueue.RateLimitingInterface ) {
214
221
for _ , ref := range event .Object .GetOwnerReferences () {
215
- fmt .Println ("ddddddddd" )
216
222
if ref .Kind == jobflowv1alpha1 .JobFlowKind && ref .APIVersion == jobflowv1alpha1 .JobFlowApiVersion {
217
- // 重新入列,这样删除pod后,就会进入调和loop,发现ownerReference还在,会立即创建出新的pod。
223
+ // 重新入列
218
224
klog .Info ("delete pod: " , event .Object .GetName (), event .Object .GetObjectKind ())
219
225
limitingInterface .Add (reconcile.Request {
220
226
NamespacedName : types.NamespacedName {Name : ref .Name ,
0 commit comments