Skip to content

Commit 3db58c8

Browse files
authored
Merge pull request rubrikinc#14 from rubrikinc/failedjobstshoot
Fixed client dumping on receiving no, or bad data back from Rubrik
2 parents f50d6cc + 994b131 commit 3db58c8

10 files changed

+102
-55
lines changed

quickstart.md

+17
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ docker build -t rubrikinc/prometheus-client -f Dockerfile .
3333

3434
The resulting docker image will be in the local repository on the server.
3535

36+
This docker image is also available on the Docker Hub at [this link](https://hub.docker.com/repository/docker/rubrikinc/rubrik-prometheus-client).
37+
3638
## Using the Prometheus Agent
3739

3840
Ensure that the following environment variables exist, and are defined: `rubrik_cdm_node_ip`, `rubrik_cdm_username`, `rubrik_cdm_password`.
@@ -53,3 +55,18 @@ docker run -d -t -e rubrik_cdm_node_ip=$rubrik_cdm_node_ip \
5355
```
5456

5557
This will map port 8080 inside the container, to port 8080 on the docker host. Metrics will then be browsable via `http://localhost:8080/metrics`.
58+
59+
### Using an alternative HTTP port
60+
61+
In order to use an alternative HTTP port (if default of 8080 is already in use, or using it is not desirable) we can set the below environment variable to override the port:
62+
63+
```bash
64+
export RUBRIK_PROMETHEUS_PORT=9090
65+
```
66+
67+
When the application starts, the log will show the port being used:
68+
69+
```none
70+
2020/10/22 11:21:47 Cluster name: rubrik-1
71+
2020/10/22 11:21:47 Starting on HTTP port 9090
72+
```

src/golang/jobs/rubrik_failed_jobs.go

+26-18
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ import (
44
"log"
55
"strconv"
66
"strings"
7-
"github.com/rubrikinc/rubrik-sdk-for-go/rubrikcdm"
7+
88
"github.com/prometheus/client_golang/prometheus"
9+
"github.com/rubrikinc/rubrik-sdk-for-go/rubrikcdm"
910
)
1011

1112
var (
@@ -36,29 +37,34 @@ func init() {
3637

3738
// GetMssqlFailedJobs ...
3839
func GetMssqlFailedJobs(rubrik *rubrikcdm.Credentials, clusterName string) {
39-
clusterVersion,err := rubrik.ClusterVersion()
40+
clusterVersion, err := rubrik.ClusterVersion()
4041
if err != nil {
41-
log.Println("Error from jobs.GetMssqlFailedJobs: ",err)
42+
log.Printf("Error from jobs.GetMssqlFailedJobs: ", err)
43+
return
4244
}
43-
clusterMajorVersion,err := strconv.ParseInt(strings.Split(clusterVersion,".")[0], 10, 64)
45+
clusterMajorVersion, err := strconv.ParseInt(strings.Split(clusterVersion, ".")[0], 10, 64)
4446
if err != nil {
45-
log.Println("Error from jobs.GetMssqlFailedJobs: ",err)
47+
log.Printf("Error from jobs.GetMssqlFailedJobs: ", err)
48+
return
4649
}
47-
clusterMinorVersion,err := strconv.ParseInt(strings.Split(clusterVersion,".")[1], 10, 64)
50+
clusterMinorVersion, err := strconv.ParseInt(strings.Split(clusterVersion, ".")[1], 10, 64)
4851
if err != nil {
49-
log.Println("Error from jobs.GetMssqlFailedJobs: ",err)
52+
log.Printf("Error from jobs.GetMssqlFailedJobs: ", err)
53+
return
5054
}
5155
if (clusterMajorVersion == 5 && clusterMinorVersion < 2) || clusterMajorVersion < 5 { // cluster version is older than 5.1
52-
eventData,err := rubrik.Get("internal","/event_series?status=Failure&event_type=Backup&object_type=Mssql")
56+
eventData, err := rubrik.Get("internal", "/event_series?status=Failure&event_type=Backup&object_type=Mssql", 60)
5357
if err != nil {
54-
log.Println("Error from jobs.GetMssqlFailedJobs: ",err)
58+
log.Printf("Error from jobs.GetMssqlFailedJobs: ", err)
59+
return
5560
}
56-
if (eventData.(map[string]interface{})["data"] != nil) {
61+
if eventData != nil || eventData.(map[string]interface{})["data"] != nil {
5762
for _, v := range eventData.(map[string]interface{})["data"].([]interface{}) {
5863
thisEventSeriesID := v.(map[string]interface{})["eventSeriesId"]
59-
eventSeriesData,err := rubrik.Get("internal","/event_series/"+thisEventSeriesID.(string))
64+
eventSeriesData, err := rubrik.Get("internal", "/event_series/"+thisEventSeriesID.(string), 60)
6065
if err != nil {
61-
log.Println("Error from jobs.GetMssqlFailedJobs: ",err)
66+
log.Printf("Error from jobs.GetMssqlFailedJobs: ", err)
67+
return
6268
}
6369
hasFailedEvent := false
6470
for _, w := range eventSeriesData.(map[string]interface{})["eventDetailList"].([]interface{}) {
@@ -110,16 +116,18 @@ func GetMssqlFailedJobs(rubrik *rubrikcdm.Credentials, clusterName string) {
110116
}
111117
}
112118
} else { // cluster version is 5.2 or newer
113-
eventData,err := rubrik.Get("v1","/event/latest?event_status=Failure&event_type=Backup&object_type=Mssql")
119+
eventData, err := rubrik.Get("v1", "/event/latest?event_status=Failure&event_type=Backup&object_type=Mssql", 60)
114120
if err != nil {
115-
log.Println("Error from jobs.GetMssqlFailedJobs: ",err)
121+
log.Printf("Error from jobs.GetMssqlFailedJobs: ", err)
122+
return
116123
}
117-
if (eventData.(map[string]interface{})["data"] != nil) {
124+
if eventData != nil || eventData.(map[string]interface{})["data"] != nil {
118125
for _, v := range eventData.(map[string]interface{})["data"].([]interface{}) {
119126
thisEventSeriesID := v.(map[string]interface{})["latestEvent"].(map[string]interface{})["eventSeriesId"]
120-
eventSeriesData,err := rubrik.Get("v1","/event_series/"+thisEventSeriesID.(string))
127+
eventSeriesData, err := rubrik.Get("v1", "/event_series/"+thisEventSeriesID.(string), 60)
121128
if err != nil {
122-
log.Println("Error from jobs.GetMssqlFailedJobs: ",err)
129+
log.Printf("Error from jobs.GetMssqlFailedJobs: ", err)
130+
return
123131
}
124132
hasFailedEvent := false
125133
for _, w := range eventSeriesData.(map[string]interface{})["eventDetailList"].([]interface{}) {
@@ -169,4 +177,4 @@ func GetMssqlFailedJobs(rubrik *rubrikcdm.Credentials, clusterName string) {
169177
}
170178
}
171179
}
172-
}
180+
}

src/golang/livemount/rubrik_live_mount.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@ func init() {
3030

3131
// GetMssqlLiveMountAges ...
3232
func GetMssqlLiveMountAges(rubrik *rubrikcdm.Credentials, clusterName string) {
33-
mountData,err := rubrik.Get("v1","/mssql/db/mount") // get our mssql live mount summary
33+
mountData,err := rubrik.Get("v1","/mssql/db/mount", 60) // get our mssql live mount summary
3434
if err != nil {
35-
log.Println("Error from livemount.GetMssqlLiveMountAges: ",err)
35+
log.Printf("Error from livemount.GetMssqlLiveMountAges: ",err)
36+
return
3637
}
3738
for _, v := range mountData.(map[string]interface{})["data"].([]interface{}) {
3839
thisSourceDatabaseName := v.(map[string]interface{})["sourceDatabaseName"]

src/golang/main.go

+18-9
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,38 @@ Requirements:
1212
package main
1313

1414
import (
15-
"fmt"
1615
"log"
1716
"net/http"
1817
"time"
19-
"github.com/rubrikinc/rubrik-sdk-for-go/rubrikcdm"
18+
"os"
2019
"github.com/prometheus/client_golang/prometheus/promhttp"
21-
"github.com/rubrikinc/rubrik-client-for-prometheus/src/golang/stats"
22-
"github.com/rubrikinc/rubrik-client-for-prometheus/src/golang/livemount"
2320
"github.com/rubrikinc/rubrik-client-for-prometheus/src/golang/jobs"
21+
"github.com/rubrikinc/rubrik-client-for-prometheus/src/golang/livemount"
22+
"github.com/rubrikinc/rubrik-client-for-prometheus/src/golang/stats"
23+
"github.com/rubrikinc/rubrik-sdk-for-go/rubrikcdm"
2424
)
2525

2626
func main() {
27+
// set our Prometheus variables
28+
httpPortEnv, _ := os.LookupEnv("RUBRIK_PROMETHEUS_PORT")
29+
var httpPort string;
30+
if httpPortEnv == "" {
31+
httpPort = "8080"
32+
} else {
33+
httpPort = httpPortEnv
34+
}
2735
rubrik, err := rubrikcdm.ConnectEnv()
2836
if err != nil {
29-
log.Println("Error from main.go:")
37+
log.Printf("Error from main.go:")
3038
log.Fatal(err)
3139
}
32-
clusterDetails,err := rubrik.Get("v1","/cluster/me")
40+
clusterDetails, err := rubrik.Get("v1", "/cluster/me", 60)
3341
if err != nil {
34-
log.Println("Error from main.go:")
42+
log.Printf("Error from main.go:")
3543
log.Fatal(err)
3644
}
3745
clusterName := clusterDetails.(map[string]interface{})["name"]
38-
fmt.Println("Cluster name: "+clusterName.(string))
46+
log.Printf("Cluster name: " + clusterName.(string))
3947

4048
// get storage summary
4149
go func() {
@@ -105,5 +113,6 @@ func main() {
105113
// The Handler function provides a default handler to expose metrics
106114
// via an HTTP server. "/metrics" is the usual endpoint for that.
107115
http.Handle("/metrics", promhttp.Handler())
108-
log.Fatal(http.ListenAndServe(":8080" , nil))
116+
log.Printf("Starting on HTTP port "+httpPort)
117+
log.Fatal(http.ListenAndServe(":"+httpPort, nil))
109118
}

src/golang/stats/rubrik_compliance_stats.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,16 @@ func init() {
3636

3737
// Get ...
3838
func GetSlaComplianceStats(rubrik *rubrikcdm.Credentials, clusterName string) {
39-
reportData,err := rubrik.Get("internal","/report?report_template=SlaComplianceSummary&report_type=Canned") // get our sla compliance summary report
39+
reportData,err := rubrik.Get("internal","/report?report_template=SlaComplianceSummary&report_type=Canned", 60) // get our sla compliance summary report
4040
if err != nil {
41-
log.Println("Error from stats.GetSlaComplianceStats: ",err)
41+
log.Printf("Error from stats.GetSlaComplianceStats: ",err)
4242
}
4343
reports := reportData.(map[string]interface{})["data"].([]interface{})
4444
reportID := reports[0].(map[string]interface{})["id"]
4545
chartData,err := rubrik.Get("internal","/report/"+reportID.(string)+"/chart?chart_id=chart0") // get our chart for the report
4646
if err != nil {
47-
log.Println("Error from stats.GetSlaComplianceStats: ",err)
47+
log.Printf("Error from stats.GetSlaComplianceStats: ",err)
48+
return
4849
}
4950
for _, v := range chartData.([]interface{}) {
5051
dataColumns := v.(map[string]interface{})["dataColumns"]

src/golang/stats/rubrik_job_stats.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,17 @@ func init() {
4646

4747
// GetMssqlLiveMounts ...
4848
func Get24HJobStats(rubrik *rubrikcdm.Credentials, clusterName string) {
49-
reportData,err := rubrik.Get("internal","/report?report_template=ProtectionTasksDetails&report_type=Canned") // get our protection tasks details report
49+
reportData,err := rubrik.Get("internal","/report?report_template=ProtectionTasksDetails&report_type=Canned", 60) // get our protection tasks details report
5050
if err != nil {
51-
log.Println("Error from stats.Get24HJobStats: ",err)
51+
log.Printf("Error from stats.Get24HJobStats: ",err)
52+
return
5253
}
5354
reports := reportData.(map[string]interface{})["data"].([]interface{})
5455
reportID := reports[0].(map[string]interface{})["id"]
55-
chartData,err := rubrik.Get("internal","/report/"+reportID.(string)+"/chart?chart_id=chart0") // get our chart for the report
56+
chartData,err := rubrik.Get("internal","/report/"+reportID.(string)+"/chart?chart_id=chart0", 60) // get our chart for the report
5657
if err != nil {
57-
log.Println("Error from stats.Get24HJobStats: ",err)
58+
log.Printf("Error from stats.Get24HJobStats: ",err)
59+
return
5860
}
5961
for _, v := range chartData.([]interface{}) {
6062
dataColumns := v.(map[string]interface{})["dataColumns"]

src/golang/stats/rubrik_mssql_stats.go

+7-5
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,10 @@ func init() {
4343

4444
// GetMssqlCapacityStats ...
4545
func GetMssqlCapacityStats(rubrik *rubrikcdm.Credentials, clusterName string) {
46-
reportData,err := rubrik.Get("internal","/report?report_template=ObjectProtectionSummary&report_type=Canned") // get our object protection summary report
46+
reportData,err := rubrik.Get("internal","/report?report_template=ObjectProtectionSummary&report_type=Canned", 60) // get our object protection summary report
4747
if err != nil {
48-
log.Println("Error from stats.GetMssqlCapacityStats: ",err)
48+
log.Printf("Error from stats.GetMssqlCapacityStats: ",err)
49+
return
4950
}
5051
reports := reportData.(map[string]interface{})["data"].([]interface{})
5152
reportID := reports[0].(map[string]interface{})["id"]
@@ -57,9 +58,10 @@ func GetMssqlCapacityStats(rubrik *rubrikcdm.Credentials, clusterName string) {
5758
}
5859
for {
5960
hasMore := true
60-
tableData,err := rubrik.Post("internal","/report/"+reportID.(string)+"/table",body) // get our first page of data for the report
61+
tableData,err := rubrik.Post("internal","/report/"+reportID.(string)+"/table",body, 60) // get our first page of data for the report
6162
if err != nil {
62-
log.Println("Error from stats.GetMssqlCapacityStats: ",err)
63+
log.Printf("Error from stats.GetMssqlCapacityStats: ",err)
64+
return
6365
}
6466
dataGrid := tableData.(map[string]interface{})["dataGrid"].([]interface{})
6567
hasMore = tableData.(map[string]interface{})["hasMore"].(bool)
@@ -96,7 +98,7 @@ func GetMssqlCapacityStats(rubrik *rubrikcdm.Credentials, clusterName string) {
9698
thisLocation).Set(thisArchiveStorage)
9799
}
98100
if !hasMore {
99-
break
101+
return
100102
} else {
101103
body = map[string]interface{}{
102104
"limit": 1000,

src/golang/stats/rubrik_node_stats.go

+9-6
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,17 @@ func init() {
6060

6161
// GetNodeStats ...
6262
func GetNodeStats(rubrik *rubrikcdm.Credentials, clusterName string) {
63-
nodes,err := rubrik.Get("internal","/node")
63+
nodes,err := rubrik.Get("internal","/node", 60)
6464
if err != nil {
65-
log.Println("Error from stats.GetNodeStats: ",err)
65+
log.Printf("Error from stats.GetNodeStats: ",err)
66+
return
6667
}
6768
for _, v := range nodes.(map[string]interface{})["data"].([]interface{}) {
6869
thisNode := (v.(interface{}).(map[string]interface{})["id"])
69-
nodeDetail,err := rubrik.Get("internal","/node/"+thisNode.(string))
70+
nodeDetail,err := rubrik.Get("internal","/node/"+thisNode.(string), 60)
7071
if err != nil {
71-
log.Println("Error from stats.GetNodeStats: ",err)
72+
log.Printf("Error from stats.GetNodeStats: ",err)
73+
return
7274
}
7375
thisNodeStatus := nodeDetail.(map[string]interface{})["status"]
7476
switch thisNodeStatus {
@@ -78,9 +80,10 @@ func GetNodeStats(rubrik *rubrikcdm.Credentials, clusterName string) {
7880
rubrikNodeStatus.WithLabelValues(clusterName,thisNode.(string)).Set(0)
7981
}
8082

81-
nodeStats,err := rubrik.Get("internal","/node/"+thisNode.(string)+"/stats?range=-6min")
83+
nodeStats,err := rubrik.Get("internal","/node/"+thisNode.(string)+"/stats?range=-6min", 60)
8284
if err != nil {
83-
log.Println("Error from stats.GetNodeStats: ",err)
85+
log.Printf("Error from stats.GetNodeStats: ",err)
86+
return
8487
}
8588
// get cpu stat
8689
cpuData := nodeStats.(map[string]interface{})["cpuStat"].([]interface{})

src/golang/stats/rubrik_oracle_stats.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,10 @@ func init() {
4343

4444
// GetOracleCapacityStats ...
4545
func GetOracleCapacityStats(rubrik *rubrikcdm.Credentials, clusterName string) {
46-
reportData,err := rubrik.Get("internal","/report?report_template=ObjectProtectionSummary&report_type=Canned") // get our object protection summary report
46+
reportData,err := rubrik.Get("internal","/report?report_template=ObjectProtectionSummary&report_type=Canned", 60) // get our object protection summary report
4747
if err != nil {
4848
log.Fatal(err)
49+
return
4950
}
5051
reports := reportData.(map[string]interface{})["data"].([]interface{})
5152
reportID := reports[0].(map[string]interface{})["id"]
@@ -57,9 +58,10 @@ func GetOracleCapacityStats(rubrik *rubrikcdm.Credentials, clusterName string) {
5758
}
5859
for {
5960
hasMore := true
60-
tableData,err := rubrik.Post("internal","/report/"+reportID.(string)+"/table",body) // get our first page of data for the report
61+
tableData,err := rubrik.Post("internal","/report/"+reportID.(string)+"/table",body, 60) // get our first page of data for the report
6162
if err != nil {
6263
log.Fatal(err)
64+
return
6365
}
6466
dataGrid := tableData.(map[string]interface{})["dataGrid"].([]interface{})
6567
hasMore = tableData.(map[string]interface{})["hasMore"].(bool)
@@ -70,7 +72,7 @@ func GetOracleCapacityStats(rubrik *rubrikcdm.Credentials, clusterName string) {
7072
thisLocalStorage, thisArchiveStorage := 0.0,0.0
7173
for i := 0; i < len(columns); i++ {
7274
switch columns[i] {
73-
case "ObjectId":
75+
case "ObjectId","ObjectLinkingId":
7476
thisObjectID = v.([]interface{})[i].(string)
7577
case "ObjectName":
7678
thisObjectName = v.([]interface{})[i].(string)
@@ -94,7 +96,7 @@ func GetOracleCapacityStats(rubrik *rubrikcdm.Credentials, clusterName string) {
9496
thisLocation).Set(thisArchiveStorage)
9597
}
9698
if !hasMore {
97-
break
99+
return
98100
} else {
99101
body = map[string]interface{}{
100102
"limit": 1000,

src/golang/stats/rubrik_storage_stats.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,10 @@ func init() {
8686

8787
// GetStorageSummaryStats ...
8888
func GetStorageSummaryStats(rubrik *rubrikcdm.Credentials, clusterName string) {
89-
storageStats,err := rubrik.Get("internal","/stats/system_storage")
89+
storageStats,err := rubrik.Get("internal","/stats/system_storage", 60)
9090
if err != nil {
91-
log.Println("Error from stats.GetStorageSummaryStats: ",err)
91+
log.Printf("Error from stats.GetStorageSummaryStats: ",err)
92+
return
9293
}
9394
// get total storage stat
9495
if total, ok := storageStats.(map[string]interface{})["total"].(float64); ok {
@@ -118,9 +119,10 @@ func GetStorageSummaryStats(rubrik *rubrikcdm.Credentials, clusterName string) {
118119

119120
// GetRunwayRemaining ...q
120121
func GetRunwayRemaining(rubrik *rubrikcdm.Credentials, clusterName string) {
121-
runwayRemaining,err := rubrik.Get("internal","/stats/runway_remaining")
122+
runwayRemaining,err := rubrik.Get("internal","/stats/runway_remaining", 60)
122123
if err != nil {
123-
log.Println("Error from stats.GetRunwayRemaining: ",err)
124+
log.Printf("Error from stats.GetRunwayRemaining: ",err)
125+
return
124126
}
125127
// get runway remaining stat
126128
if runway, ok := runwayRemaining.(map[string]interface{})["days"].(float64); ok {

0 commit comments

Comments
 (0)