diff --git a/cluster/cluster.go b/cluster/cluster.go index 61a5c8847..fbcacb841 100644 --- a/cluster/cluster.go +++ b/cluster/cluster.go @@ -10,7 +10,6 @@ import ( "crypto/tls" "encoding/json" "errors" - "fmt" "hash/crc64" "io/ioutil" "os" @@ -853,7 +852,7 @@ func (cluster *Cluster) MonitorVariablesDiff() { cluster.LogPrintf(LvlErr, "Encoding variables diff %s", err) return } - cluster.SetState("WARN0084", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0084"], string(jtext)), ErrFrom: "MON", ServerUrl: cluster.GetMaster().URL}) + cluster.SetSugarState("WARN0084", "MON", cluster.GetMaster().URL, string(jtext)) } } diff --git a/cluster/cluster_bck.go b/cluster/cluster_bck.go index 93d8622f2..54cde8294 100644 --- a/cluster/cluster_bck.go +++ b/cluster/cluster_bck.go @@ -10,7 +10,6 @@ import ( "bytes" "encoding/json" "errors" - "fmt" "io" "os" "os/exec" @@ -20,7 +19,6 @@ import ( "github.com/signal18/replication-manager/config" v3 "github.com/signal18/replication-manager/repmanv3" - "github.com/signal18/replication-manager/utils/state" ) /* Replaced by v3.Backup @@ -49,7 +47,7 @@ func (cluster *Cluster) ResticPurgeRepo() error { stderr := io.MultiWriter(os.Stderr, &stderrBuf) resticcmd.Env = cluster.ResticGetEnv() if err := resticcmd.Start(); err != nil { - cluster.SetState("WARN0096", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0096"], resticcmd.Path, err, ""), ErrFrom: "BACKUP"}) + cluster.SetSugarState("WARN0096", "BACKUP", "", resticcmd.Path, err) return err } var wg sync.WaitGroup @@ -64,7 +62,7 @@ func (cluster *Cluster) ResticPurgeRepo() error { err := resticcmd.Wait() if err != nil { - cluster.sme.AddState("WARN0094", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0094"], err, string(stdoutBuf.Bytes()), string(stderrBuf.Bytes())), ErrFrom: "CHECK"}) + cluster.AddSugarState("WARN0094", "CHECK", "", err, stdoutBuf.Bytes(), stderrBuf.Bytes()) return err } if errStdout != nil || errStderr != nil { @@ -107,7 +105,7 @@ func (cluster *Cluster) ResticInitRepo() error { resticcmd.Env = cluster.ResticGetEnv() if err := resticcmd.Start(); err != nil { - cluster.SetState("WARN0095", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0095"], resticcmd.Path, err, ""), ErrFrom: "BACKUP"}) + cluster.SetSugarState("WARN0095", "BACKUP", "", resticcmd.Path, err) return err } var wg sync.WaitGroup @@ -122,7 +120,7 @@ func (cluster *Cluster) ResticInitRepo() error { err := resticcmd.Wait() if err != nil { - cluster.sme.AddState("WARN0095", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0095"], err, string(stdoutBuf.Bytes()), string(stderrBuf.Bytes())), ErrFrom: "CHECK"}) + cluster.AddSugarState("WARN0095", "CHECK", "", err, stdoutBuf.Bytes(), stderrBuf.Bytes()) } if errStdout != nil || errStderr != nil { return errors.New("failed to capture stdout or stderr\n") @@ -144,7 +142,7 @@ func (cluster *Cluster) ResticFetchRepo() error { resticcmd.Env = cluster.ResticGetEnv() if err := resticcmd.Start(); err != nil { - cluster.SetState("WARN0094", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0094"], resticcmd.Path, err, ""), ErrFrom: "BACKUP"}) + cluster.SetSugarState("WARN0094", "BACKUP", "", resticcmd.Path, err) return err } var wg sync.WaitGroup @@ -159,7 +157,7 @@ func (cluster *Cluster) ResticFetchRepo() error { err := resticcmd.Wait() if err != nil { - cluster.sme.AddState("WARN0093", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0093"], err, string(stdoutBuf.Bytes()), string(stderrBuf.Bytes())), ErrFrom: "CHECK"}) + cluster.AddSugarState("WARN0093", "CHECK", "", err, stdoutBuf.Bytes(), stderrBuf.Bytes()) cluster.ResticInitRepo() return err } diff --git a/cluster/cluster_chk.go b/cluster/cluster_chk.go index 71c60cea8..cd6bdabb1 100644 --- a/cluster/cluster_chk.go +++ b/cluster/cluster_chk.go @@ -10,7 +10,6 @@ import ( "bytes" "encoding/json" "errors" - "fmt" "io/ioutil" "net/http" "os/exec" @@ -27,7 +26,7 @@ import ( func (cluster *Cluster) CheckFailed() { // Don't trigger a failover if a switchover is happening if cluster.sme.IsInFailover() { - cluster.sme.AddState("ERR00001", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00001"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00001", "CHECK", "") return } if cluster.master == nil { @@ -115,7 +114,7 @@ func (cluster *Cluster) isAutomaticFailover() bool { if cluster.Conf.Interactive == false { return true } - cluster.sme.AddState("ERR00002", state.State{ErrType: "ERR00002", ErrDesc: fmt.Sprintf(clusterError["ERR00002"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00002", "CHECK", "") return false } @@ -131,7 +130,7 @@ func (cluster *Cluster) isMaxMasterFailedCountReached() bool { // no illimited failed count if cluster.master.FailCount >= cluster.Conf.MaxFail { - cluster.sme.AddState("WARN0023", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0023"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("WARN0023", "CHECK", "") return true } else { // cluster.sme.AddState("ERR00023", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf("Constraint is blocking state %s, interactive:%t, maxfail reached:%d", cluster.master.State, cluster.Conf.Interactive, cluster.Conf.MaxFail), ErrFrom: "CONF"}) @@ -146,7 +145,7 @@ func (cluster *Cluster) isMaxClusterFailoverCountNotReached() bool { return true } if cluster.FailoverCtr == cluster.Conf.FailLimit { - cluster.sme.AddState("ERR00027", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00027"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00027", "CHECK", "") return false } return true @@ -160,7 +159,7 @@ func (cluster *Cluster) isBetweenFailoverTimeValid() bool { } // cluster.LogPrintf("CHECK: Failover Time to short with previous failover") if rem > 0 { - cluster.sme.AddState("ERR00029", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00029"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00029", "CHECK", "") return false } return true @@ -189,7 +188,7 @@ func (cluster *Cluster) isOneSlaveHeartbeatIncreasing() bool { cluster.LogPrintf(LvlDbg, "SLAVE_RECEIVED_HEARTBEATS %d", status2["SLAVE_RECEIVED_HEARTBEATS"]) } if status2["SLAVE_RECEIVED_HEARTBEATS"] > saveheartbeats { - cluster.sme.AddState("ERR00028", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00028"], s.URL), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00028", "CHECK", s.URL, s.URL) return true } } @@ -249,7 +248,7 @@ func (cluster *Cluster) isMaxscaleSupectRunning() bool { time.Sleep(time.Duration(cluster.Conf.CheckFalsePositiveMaxscaleTimeout) * time.Second) if strings.Contains(cluster.master.MxsServerStatus, "Running") { - cluster.sme.AddState("ERR00030", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00030"], cluster.master.MxsServerStatus), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00030", "CHECK", "", cluster.master.MxsServerStatus) return true } return false @@ -259,7 +258,7 @@ func (cluster *Cluster) isFoundCandidateMaster() bool { key := cluster.electFailoverCandidate(cluster.slaves, false) if key == -1 { - cluster.sme.AddState("ERR00032", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00032"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00032", "CHECK", "") return false } return true @@ -286,7 +285,7 @@ func (cluster *Cluster) isActiveArbitration() bool { resp, err := client.Do(req) if err != nil { cluster.LogPrintf(LvlErr, "%s", err.Error()) - cluster.sme.AddState("ERR00022", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00022"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00022", "CHECK", "") return false } defer resp.Body.Close() @@ -300,14 +299,14 @@ func (cluster *Cluster) isActiveArbitration() bool { err = json.Unmarshal(body, &r) if err != nil { cluster.LogPrintf(LvlErr, "Arbitrator sent invalid JSON") - cluster.sme.AddState("ERR00022", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00022"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00022", "CHECK", "") return false } if r.Arbitration == "winner" { cluster.LogPrintf(LvlInfo, "Arbitrator says: winner") return true } - cluster.sme.AddState("ERR00022", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00022"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00022", "CHECK", "") return false } @@ -325,7 +324,7 @@ func (cluster *Cluster) isExternalOk() bool { return false } if req.StatusCode == 200 { - cluster.sme.AddState("ERR00031", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00031"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00031", "CHECK", "") return true } return false @@ -336,7 +335,7 @@ func (cluster *Cluster) isArbitratorAlive() bool { return true } if cluster.IsFailedArbitrator { - cluster.sme.AddState("ERR00055", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00055"], cluster.Conf.ArbitrationSasHosts), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00055", "CHECK", "", cluster.Conf.ArbitrationSasHosts) return false } return true @@ -351,7 +350,7 @@ func (cluster *Cluster) isNotFirstSlave() bool { // - first replication-manager start on no topology // - all cluster down if cluster.master == nil { - cluster.sme.AddState("ERR00026", state.State{ErrType: LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00026"]), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00026", "CHECK", "") return false } @@ -611,7 +610,7 @@ func (cluster *Cluster) CheckTableChecksum(schema string, table string) { if slaveSeq >= masterSeq { break } else { - cluster.SetState("WARN0086", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0086"], s.URL), ErrFrom: "MON", ServerUrl: s.URL}) + cluster.SetSugarState("WARN0086", "MON", s.URL, s.URL) } time.Sleep(1 * time.Second) } @@ -655,8 +654,7 @@ func (cluster *Cluster) CheckSameServerID() { continue } if s.ServerID == sothers.ServerID { - cluster.SetState("WARN0087", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0087"], s.URL, sothers.URL), ErrFrom: "MON", ServerUrl: s.URL}) - + cluster.SetSugarState("WARN0087", "MON", s.URL, s.URL, sothers.URL) } } } @@ -675,7 +673,7 @@ func (cluster *Cluster) IsSameWsrepUUID() bool { continue } if s.Status["WSREP_CLUSTER_STATE_UUID"] != sothers.Status["WSREP_CLUSTER_STATE_UUID"] { - cluster.SetState("ERR00083", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00083"], s.URL, s.Status["WSREP_CLUSTER_STATE_UUID"], sothers.URL, sothers.Status["WSREP_CLUSTER_STATE_UUID"]), ErrFrom: "MON", ServerUrl: s.URL}) + cluster.SetSugarState("ERR00083", "MON", s.URL, s.URL, s.Status["WSREP_CLUSTER_STATE_UUID"], sothers.URL, sothers.Status["WSREP_CLUSTER_STATE_UUID"]) return false } } @@ -693,7 +691,7 @@ func (cluster *Cluster) IsNotHavingMySQLErrantTransaction() bool { } hasErrantTrx, _, _ := dbhelper.HaveErrantTransactions(s.Conn, cluster.master.Variables["GTID_EXECUTED"], s.Variables["GTID_EXECUTED"]) if hasErrantTrx { - cluster.SetState("WARN0091", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0091"], s.URL), ErrFrom: "MON", ServerUrl: s.URL}) + cluster.SetSugarState("WARN0091", "MON", s.URL, s.URL) return false } } diff --git a/cluster/cluster_fail.go b/cluster/cluster_fail.go index 2397ed56e..d036a0a64 100644 --- a/cluster/cluster_fail.go +++ b/cluster/cluster_fail.go @@ -660,7 +660,7 @@ func (cluster *Cluster) electSwitchoverCandidate(l []*ServerMonitor, forcingLog /* If server is in the ignore list, do not elect it in switchover */ if sl.IsIgnored() { - cluster.sme.AddState("ERR00037", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00037"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00037", "CHECK", sl.URL, sl.URL) continue } if sl.IsFull { @@ -668,27 +668,27 @@ func (cluster *Cluster) electSwitchoverCandidate(l []*ServerMonitor, forcingLog } //Need comment// if sl.IsRelay { - cluster.sme.AddState("ERR00036", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00036"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00036", "CHECK", sl.URL, sl.URL) continue } if !sl.HasBinlog() && !sl.IsIgnored() { - cluster.SetState("ERR00013", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00013"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL}) + cluster.AddSugarState("ERR00013", "CHECK", sl.URL, sl.URL) continue } if cluster.Conf.MultiMaster == true && sl.State == stateMaster { - cluster.sme.AddState("ERR00035", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00035"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00035", "CHECK", sl.URL, sl.URL) continue } // The tests below should run only in case of a switchover as they require the master to be up. if cluster.isSlaveElectableForSwitchover(sl, forcingLog) == false { - cluster.sme.AddState("ERR00034", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00034"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00034", "CHECK", sl.URL, sl.URL) continue } /* binlog + ping */ if cluster.isSlaveElectable(sl, forcingLog) == false { - cluster.sme.AddState("ERR00039", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00039"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00039", "CHECK", sl.URL, sl.URL) continue } @@ -700,14 +700,14 @@ func (cluster *Cluster) electSwitchoverCandidate(l []*ServerMonitor, forcingLog return i } if sl.HaveNoMasterOnStart == true && cluster.Conf.FailRestartUnsafe == false { - cluster.sme.AddState("ERR00084", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00084"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00084", "CHECK", sl.URL, sl.URL) continue } ss, errss := sl.GetSlaveStatus(sl.ReplicationSourceName) // not a slave if errss != nil && cluster.Conf.FailRestartUnsafe == false { //Skip slave in election %s have no master log file, slave might have failed - cluster.sme.AddState("ERR00033", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00033"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00033", "CHECK", sl.URL, sl.URL) continue } // Fake position if none as new slave @@ -795,23 +795,23 @@ func (cluster *Cluster) electFailoverCandidate(l []*ServerMonitor, forcingLog bo //Need comment// if sl.IsRelay { - cluster.sme.AddState("ERR00036", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00036"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL}) + cluster.AddSugarState("ERR00036", "CHECK", sl.URL, sl.URL) continue } if sl.IsFull { continue } if cluster.Conf.MultiMaster == true && sl.State == stateMaster { - cluster.sme.AddState("ERR00035", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00035"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL}) + cluster.AddSugarState("ERR00035", "CHECK", sl.URL, sl.URL) trackposList[i].Ignoredmultimaster = true continue } if sl.HaveNoMasterOnStart == true && cluster.Conf.FailRestartUnsafe == false { - cluster.sme.AddState("ERR00084", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00084"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00084", "CHECK", sl.URL, sl.URL) continue } if !sl.HasBinlog() && !sl.IsIgnored() { - cluster.SetState("ERR00013", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00013"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL}) + cluster.AddSugarState("ERR00013", "CHECK", sl.URL, sl.URL) continue } if cluster.GetTopology() == topoMultiMasterWsrep && cluster.vmaster != nil { @@ -830,7 +830,7 @@ func (cluster *Cluster) electFailoverCandidate(l []*ServerMonitor, forcingLog bo ss, errss := sl.GetSlaveStatus(sl.ReplicationSourceName) // not a slave if errss != nil && cluster.Conf.FailRestartUnsafe == false { - cluster.sme.AddState("ERR00033", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00033"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL}) + cluster.AddSugarState("ERR00033", "CHECK", sl.URL, sl.URL) trackposList[i].Ignoredreplication = true continue } @@ -967,14 +967,14 @@ func (cluster *Cluster) isSlaveElectable(sl *ServerMonitor, forcingLog bool) boo } /* binlog + ping */ if dbhelper.CheckSlavePrerequisites(sl.Conn, sl.Host, sl.DBVersion) == false { - cluster.sme.AddState("ERR00040", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00040"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL}) + cluster.AddSugarState("ERR00040", "CHECK", sl.URL, sl.URL) if cluster.Conf.LogLevel > 1 || forcingLog { cluster.LogPrintf(LvlWarn, "Slave %s does not ping or has no binlogs. Skipping", sl.URL) } return false } if sl.IsMaintenance { - cluster.sme.AddState("ERR00047", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00047"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL}) + cluster.AddSugarState("ERR00047", "CHECK", sl.URL, sl.URL) if cluster.Conf.LogLevel > 1 || forcingLog { cluster.LogPrintf(LvlWarn, "Slave %s is in maintenance. Skipping", sl.URL) } @@ -982,6 +982,7 @@ func (cluster *Cluster) isSlaveElectable(sl *ServerMonitor, forcingLog bool) boo } if ss.SecondsBehindMaster.Int64 > cluster.Conf.FailMaxDelay && cluster.Conf.FailMaxDelay != -1 && cluster.Conf.RplChecks == true { + // TODO: this message is very different then others, special case needs to be checked cluster.sme.AddState("ERR00041", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00041"]+" Sql: "+sl.GetProcessListReplicationLongQuery(), sl.URL, cluster.Conf.FailMaxDelay, ss.SecondsBehindMaster.Int64), ErrFrom: "CHECK", ServerUrl: sl.URL}) if cluster.Conf.LogLevel > 1 || forcingLog { cluster.LogPrintf(LvlWarn, "Unsafe failover condition. Slave %s has more than failover-max-delay %d seconds with replication delay %d. Skipping", sl.URL, cluster.Conf.FailMaxDelay, ss.SecondsBehindMaster.Int64) @@ -990,14 +991,14 @@ func (cluster *Cluster) isSlaveElectable(sl *ServerMonitor, forcingLog bool) boo return false } if ss.SlaveSQLRunning.String == "No" && cluster.Conf.RplChecks { - cluster.sme.AddState("ERR00042", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00042"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL}) + cluster.AddSugarState("ERR00042", "CHECK", sl.URL, sl.URL) if cluster.Conf.LogLevel > 1 || forcingLog { cluster.LogPrintf(LvlWarn, "Unsafe failover condition. Slave %s SQL Thread is stopped. Skipping", sl.URL) } return false } if sl.HaveSemiSync && sl.SemiSyncSlaveStatus == false && cluster.Conf.FailSync && cluster.Conf.RplChecks { - cluster.sme.AddState("ERR00043", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00043"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL}) + cluster.AddSugarState("ERR00043", "CHECK", sl.URL, sl.URL) if cluster.Conf.LogLevel > 1 || forcingLog { cluster.LogPrintf(LvlWarn, "Semi-sync slave %s is out of sync. Skipping", sl.URL) } @@ -1269,7 +1270,7 @@ func (cluster *Cluster) electVirtualCandidate(oldMaster *ServerMonitor, forcingL for i, sl := range cluster.Servers { /* If server is in the ignore list, do not elect it */ if sl.IsIgnored() { - cluster.sme.AddState("ERR00037", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00037"], sl.URL), ErrFrom: "CHECK"}) + cluster.AddSugarState("ERR00037", "CHECK", sl.URL, sl.URL) if cluster.Conf.LogLevel > 1 || forcingLog { cluster.LogPrintf(LvlDbg, "%s is in the ignore list. Skipping", sl.URL) } diff --git a/cluster/cluster_set.go b/cluster/cluster_set.go index a457ba89a..da0fb6f60 100644 --- a/cluster/cluster_set.go +++ b/cluster/cluster_set.go @@ -663,6 +663,65 @@ func (cluster *Cluster) SetClusterList(clusters map[string]*Cluster) { cluster.clusterList = clusters } +// SetSugarState calls SetState without needing to create the state.State{} struct yourself. +// Key sets the ErrKey +// From sets the ErrFrom +// URL is optional, will be set to the state if present +// Based on WARN/ERR it will set the ErrType correctly. +// desc is optional, if there are parameters missing for the error message they'll be blanked +func (cluster *Cluster) SetSugarState(key, from, url string, desc ...interface{}) { + s := cluster.createState(key, from, url, desc...) + + cluster.SetState(key, s) +} + +// AddSugarState calls sme.AddState without needing to create the state.State{} struct yourself. +// Key sets the ErrKey +// From sets the ErrFrom +// URL is optional, will be set to the state if present +// Based on WARN/ERR it will set the ErrType correctly. +// desc is optional, if there are parameters missing for the error message they'll be blanked +func (cluster *Cluster) AddSugarState(key, from, url string, desc ...interface{}) { + s := cluster.createState(key, from, url, desc...) + + cluster.sme.AddState(key, s) +} + +// createState creates a state.State +// Key sets the ErrKey +// From sets the ErrFrom +// URL is optional, will be set to the state if present +// Based on WARN/ERR it will set the ErrType correctly. +// desc is optional, if there are parameters missing for the error message they'll be blanked +func (cluster *Cluster) createState(key, from, url string, desc ...interface{}) state.State { + s := state.State{ + ErrKey: key, + ErrFrom: from, + } + if strings.Contains(key, "WARN") { + s.ErrType = LvlWarn + } + if strings.Contains(key, "ERR") { + s.ErrType = LvlErr + } + + if url != "" { + s.ServerUrl = url + } + + count := strings.Count(clusterError[key], "%s") + if count != 0 { + for len(desc) != count { + desc = append(desc, "") + } + s.ErrDesc = fmt.Sprintf(clusterError[key], desc...) + } else { + s.ErrDesc = clusterError[key] + } + + return s +} + func (cluster *Cluster) SetState(key string, s state.State) { if !strings.Contains(cluster.Conf.MonitorIgnoreError, key) { cluster.sme.AddState(key, s) diff --git a/cluster/cluster_set_test.go b/cluster/cluster_set_test.go new file mode 100644 index 000000000..d4566ae09 --- /dev/null +++ b/cluster/cluster_set_test.go @@ -0,0 +1,130 @@ +// replication-manager - Replication Manager Monitoring and CLI for MariaDB and MySQL +// Copyright 2017 Signal 18 SARL +// Authors: Guillaume Lefranc +// Stephane Varoqui +// This source code is licensed under the GNU General Public License, version 3. + +package cluster + +import ( + "testing" + + "github.com/signal18/replication-manager/utils/state" +) + +func TestCluster_SetSugarState(t *testing.T) { + + sm := &state.StateMachine{} + sm.Init() + + type fields struct { + sme *state.StateMachine + expectedErrDesc string + expectedType string + } + type args struct { + key string + from string + url string + desc []interface{} + } + tests := []struct { + name string + fields fields + args args + }{ + { + name: "Add Error ERR00001", + fields: fields{ + sme: sm, + expectedErrDesc: "Monitor freeze while running critical section", + expectedType: "ERROR", + }, + args: args{ + key: "ERR00001", + from: "TEST", + url: "", + }, + }, + { + name: "Add Error ERR00080", + fields: fields{ + sme: sm, + expectedErrDesc: "Connection use old TLS keys on foobar.com", + expectedType: "ERROR", + }, + args: args{ + key: "ERR00080", + from: "TEST", + url: "", + desc: []interface{}{ + "foobar.com", + }, + }, + }, + { + name: "Add Warning WARN0048", + fields: fields{ + sme: sm, + expectedErrDesc: "No semisync settings on slave foobar.com", + expectedType: "WARN", + }, + args: args{ + key: "WARN0048", + from: "TEST", + url: "", + desc: []interface{}{ + "foobar.com", + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cluster := &Cluster{ + sme: tt.fields.sme, + } + cluster.SetSugarState(tt.args.key, tt.args.from, tt.args.url, tt.args.desc...) + + // this one won't neccesarily report it + if !tt.fields.sme.IsInState(tt.args.key) { + t.Fatal("State not set") + } + + // manually check + if !tt.fields.sme.CurState.Search(tt.args.key) { + t.Fatal("State not set") + } + + found := false + var comp state.State + // now check the actual error message + for _, state := range *tt.fields.sme.CurState { + if state.ErrKey == tt.args.key { + comp = state + found = true + } + } + + if !found { + t.Fatal("State not found") + } + + if comp.ErrType != tt.fields.expectedType { + t.Fatalf("State Type is wrong. \nGot: %s\nWant: %s", comp.ErrType, tt.fields.expectedType) + } + + if comp.ErrFrom != tt.args.from { + t.Fatalf("State FROM is wrong. \nGot: %s\nWant: %s", comp.ErrFrom, tt.args.from) + } + + if comp.ServerUrl != tt.args.url { + t.Fatalf("State URL is wrong. \nGot: %s\nWant: %s", comp.ServerUrl, tt.args.url) + } + + if comp.ErrDesc != tt.fields.expectedErrDesc { + t.Fatalf("State ErrDesc is wrong. \nGot: %s\nWant: %s", comp.ErrDesc, tt.fields.expectedErrDesc) + } + }) + } +} diff --git a/cluster/cluster_split.go b/cluster/cluster_split.go index 8cf03bfd5..509719f18 100644 --- a/cluster/cluster_split.go +++ b/cluster/cluster_split.go @@ -32,7 +32,7 @@ func (cluster *Cluster) Heartbeat(wg *sync.WaitGroup) { if cluster.IsSplitBrain { err := cluster.SetArbitratorReport() if err != nil { - cluster.SetState("WARN0081", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0081"], err), ErrFrom: "ARB"}) + cluster.SetSugarState("WARN0081", "ARB", "", err) } if cluster.IsSplitBrainBck != cluster.IsSplitBrain { time.Sleep(5 * time.Second) @@ -42,7 +42,7 @@ func (cluster *Cluster) Heartbeat(wg *sync.WaitGroup) { i++ err = cluster.ArbitratorElection() if err != nil { - cluster.SetState("WARN0082", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0082"], err), ErrFrom: "ARB"}) + cluster.SetSugarState("WARN0082", "ARB", "", err) } else { break //break the loop on success retry 3 times } diff --git a/cluster/cluster_topo.go b/cluster/cluster_topo.go index 91cb943de..2c807e72d 100644 --- a/cluster/cluster_topo.go +++ b/cluster/cluster_topo.go @@ -11,10 +11,7 @@ package cluster import ( "errors" - "fmt" "sync" - - "github.com/signal18/replication-manager/utils/state" ) type topologyError struct { @@ -184,7 +181,7 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error { } if cluster.IsActive() && cluster.master != nil && cluster.GetTopology() == topoMasterSlave && cluster.Servers[k].URL != cluster.master.URL { //Extra master in master slave topology rejoin it after split brain - cluster.SetState("ERR00063", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00063"]), ErrFrom: "TOPO"}) + cluster.SetSugarState("ERR00063", "TOPO", "") // cluster.Servers[k].RejoinMaster() /* remove for rolling restart , wrongly rejoin server as master before just after swithover while the server is just stopping } else { cluster.master = cluster.Servers[k] @@ -202,7 +199,7 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error { // If no cluster.slaves are detected, generate an error if len(cluster.slaves) == 0 && cluster.GetTopology() != topoMultiMasterWsrep { - cluster.SetState("ERR00010", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00010"]), ErrFrom: "TOPO"}) + cluster.SetSugarState("ERR00010", "TOPO", "") } // Check that all slave servers have the same master and conformity. @@ -213,7 +210,7 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error { sl.CheckSlaveSameMasterGrants() if sl.HasCycling() { if cluster.Conf.MultiMaster == false && len(cluster.Servers) == 2 { - cluster.SetState("ERR00011", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00011"]), ErrFrom: "TOPO", ServerUrl: sl.URL}) + cluster.SetSugarState("ERR00011", "TOPO", sl.URL) cluster.Conf.MultiMaster = true } if cluster.Conf.MultiMasterRing == false && len(cluster.Servers) > 2 { @@ -226,7 +223,7 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error { //broken replication ring } else if cluster.Conf.MultiMasterRing == true { //setting a virtual master if none - cluster.SetState("ERR00048", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00048"]), ErrFrom: "TOPO"}) + cluster.SetSugarState("ERR00048", "TOPO", sl.URL) cluster.master = cluster.GetFailedServer() } @@ -249,7 +246,7 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error { } } if srw > 1 { - cluster.SetState("WARN0003", state.State{ErrType: "WARNING", ErrDesc: "RW server count > 1 in multi-master mode. set read_only=1 in cnf is a must have, choosing prefered master", ErrFrom: "TOPO"}) + cluster.SetSugarState("WARN0003", "TOPO", "") } srw = 0 for _, s := range cluster.Servers { @@ -258,12 +255,12 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error { } } if srw > 1 { - cluster.SetState("WARN0004", state.State{ErrType: "WARNING", ErrDesc: "RO server count > 1 in multi-master mode. switching to preferred master.", ErrFrom: "TOPO"}) + cluster.SetSugarState("WARN0004", "TOPO", "RO server count > 1 in multi-master mode. switching to preferred master.") server := cluster.getOnePreferedMaster() if server != nil { server.SetReadWrite() } else { - cluster.SetState("WARN0006", state.State{ErrType: "WARNING", ErrDesc: "Multi-master need a preferred master.", ErrFrom: "TOPO"}) + cluster.SetSugarState("WARN0006", "TOPO", "") } } } @@ -313,7 +310,7 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error { if cluster.master == nil { // could not detect master if cluster.GetMaster() == nil { - cluster.SetState("ERR00012", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00012"]), ErrFrom: "TOPO"}) + cluster.SetSugarState("ERR00012", "TOPO", "") } } else { cluster.master.HaveHealthyReplica = false @@ -332,7 +329,7 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error { replMaster, _ := cluster.GetMasterFromReplication(sl) if replMaster != nil && replMaster.Id != cluster.master.Id { - cluster.SetState("ERR00064", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00064"], sl.URL, cluster.master.URL, replMaster.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + cluster.SetSugarState("ERR00064", "TOPO", sl.URL, sl.URL, cluster.master.URL, replMaster.URL) if cluster.Conf.ReplicationNoRelay && cluster.Status == ConstMonitorActif { cluster.RejoinFixRelay(sl, cluster.master) @@ -350,12 +347,8 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error { } // State also check in failover_check false positive if cluster.master.IsFailed() && cluster.slaves.HasAllSlavesRunning() { - cluster.SetState("ERR00016", state.State{ - ErrType: "ERROR", - ErrDesc: clusterError["ERR00016"], - ErrFrom: "NET", - ServerUrl: cluster.master.URL, - }) + // TODO: check if From: NET is correct here + cluster.SetSugarState("ERR00016", "NET", cluster.master.URL) } } @@ -369,13 +362,13 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error { } if cluster.Conf.Arbitration { if cluster.IsSplitBrain { - cluster.SetState("WARN0079", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0079"]), ErrFrom: "ARB"}) + cluster.SetSugarState("WARN0079", "ARB", "") } if cluster.IsLostMajority { - cluster.SetState("WARN0080", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0080"]), ErrFrom: "ARB"}) + cluster.SetSugarState("WARN0080", "ARB", "") } if cluster.IsFailedArbitrator { - cluster.SetState("WARN0090", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0090"], cluster.Conf.ArbitratorAddress), ErrFrom: "ARB"}) + cluster.SetSugarState("WARN0090", "ARB", "", cluster.Conf.ArbitratorAddress) } } if cluster.sme.CanMonitor() { @@ -392,7 +385,7 @@ func (cluster *Cluster) AllServersFailed() bool { } } //"ERR00077": "All databases state down", - cluster.SetState("ERR00077", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00077"]), ErrFrom: "TOPO"}) + cluster.SetSugarState("ERR00077", "TOPO", "") return true } @@ -417,7 +410,7 @@ func (cluster *Cluster) TopologyClusterDown() bool { } } - cluster.SetState("ERR00021", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00021"]), ErrFrom: "TOPO"}) + cluster.SetSugarState("ERR00021", "TOPO", "") cluster.IsClusterDown = true return true } diff --git a/cluster/error.go b/cluster/error.go index 805e9457c..ad8eb7228 100644 --- a/cluster/error.go +++ b/cluster/error.go @@ -149,5 +149,9 @@ var clusterError = map[string]string{ "WARN0098": "ProxySQL could not load global variables from runtime (%s)", "WARN0099": "MariaDB version as replication issue https://jira.mariadb.org/browse/MDEV-20821", "WARN0100": "No space left on device pn %s", + "WARN0003": "RW server count > 1 in multi-master mode. set read_only=1 in cnf is a must have, choosing prefered master", + "WARN0004": "RO server count > 1 in multi-master mode. switching to preferred master.", + "WARN0006": "Multi-master need a preferred master.", + "WARN0007": "At least one server is not ACID-compliant. Please make sure that sync_binlog and innodb_flush_log_at_trx_commit are set to 1", "WARN0101": "Cluster does not have backup", } diff --git a/cluster/prov_opensvc.go b/cluster/prov_opensvc.go index 59c46a588..4561ebcf5 100644 --- a/cluster/prov_opensvc.go +++ b/cluster/prov_opensvc.go @@ -15,7 +15,6 @@ import ( "github.com/signal18/replication-manager/opensvc" "github.com/signal18/replication-manager/utils/misc" - "github.com/signal18/replication-manager/utils/state" ) var dockerMinusRm bool @@ -81,7 +80,7 @@ func (cluster *Cluster) OpenSVCGetNodes() ([]Agent, error) { svc := cluster.OpenSVCConnect() hosts, err := svc.GetNodes() if err != nil { - cluster.SetState("ERR00082", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00082"], err), ErrFrom: "OPENSVC"}) + cluster.SetSugarState("ERR00082", "OPENSVC", "", err) return nil, err } if hosts == nil { @@ -148,10 +147,10 @@ func (cluster *Cluster) OpenSVCWaitDequeue(svc opensvc.Collector, idaction int) time.Sleep(2 * time.Second) status := svc.GetActionStatus(strconv.Itoa(idaction)) if status == "Q" { - cluster.sme.AddState("WARN0045", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0045"]), ErrFrom: "TOPO"}) + cluster.AddSugarState("WARN0045", "TOPO", "") } if status == "W" { - cluster.sme.AddState("WARN0046", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0046"]), ErrFrom: "TOPO"}) + cluster.AddSugarState("WARN0046", "TOPO", "") } if status == "T" { return nil diff --git a/cluster/prov_opensvc_db.go b/cluster/prov_opensvc_db.go index 8b12fd12d..2ad9806de 100644 --- a/cluster/prov_opensvc_db.go +++ b/cluster/prov_opensvc_db.go @@ -16,7 +16,6 @@ import ( "strings" "github.com/signal18/replication-manager/opensvc" - "github.com/signal18/replication-manager/utils/state" ) func (cluster *Cluster) GetDatabaseServiceConfig(s *ServerMonitor) string { @@ -209,7 +208,7 @@ func (cluster *Cluster) OpenSVCFoundDatabaseAgent(server *ServerMonitor) (opensv svc := cluster.OpenSVCConnect() agents, err := svc.GetNodes() if err != nil { - cluster.SetState("ERR00082", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00082"], err), ErrFrom: "TOPO"}) + cluster.SetSugarState("ERR00082", "TOPO", "", err) } if agents == nil { return agent, errors.New("Error getting OpenSVC node list") diff --git a/cluster/prov_opensvc_prx.go b/cluster/prov_opensvc_prx.go index 126f300e7..ed988b2b3 100644 --- a/cluster/prov_opensvc_prx.go +++ b/cluster/prov_opensvc_prx.go @@ -16,7 +16,6 @@ import ( "github.com/signal18/replication-manager/opensvc" "github.com/signal18/replication-manager/utils/misc" - "github.com/signal18/replication-manager/utils/state" ) func (cluster *Cluster) OpenSVCUnprovisionProxyService(prx DatabaseProxy) { @@ -399,7 +398,7 @@ func (cluster *Cluster) FoundProxyAgent(proxy DatabaseProxy) (opensvc.Host, erro svc := cluster.OpenSVCConnect() agents, err := svc.GetNodes() if err != nil { - cluster.SetState("ERR00082", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00082"], err), ErrFrom: "TOPO"}) + cluster.SetSugarState("ERR00082", "TOPO", "", err) } var clusteragents []opensvc.Host var agent opensvc.Host diff --git a/cluster/prx.go b/cluster/prx.go index 4df6ce5b4..834fccb29 100644 --- a/cluster/prx.go +++ b/cluster/prx.go @@ -23,7 +23,6 @@ import ( "github.com/signal18/replication-manager/router/proxysql" "github.com/signal18/replication-manager/utils/dbhelper" "github.com/signal18/replication-manager/utils/misc" - "github.com/signal18/replication-manager/utils/state" "github.com/spf13/pflag" ) @@ -241,7 +240,7 @@ func (cluster *Cluster) InjectProxiesTraffic() { } db, err := pr.GetClusterConnection() if err != nil { - cluster.sme.AddState("ERR00050", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00050"], err), ErrFrom: "TOPO"}) + cluster.AddSugarState("ERR00050", "TOPO", "", err) } else { if pr.GetType() == config.ConstProxyMyProxy { definer = "DEFINER = root@localhost" @@ -251,7 +250,7 @@ func (cluster *Cluster) InjectProxiesTraffic() { _, err := db.Exec("CREATE OR REPLACE " + definer + " VIEW replication_manager_schema.pseudo_gtid_v as select '" + misc.GetUUID() + "' from dual") if err != nil { - cluster.sme.AddState("ERR00050", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00050"], err), ErrFrom: "TOPO"}) + cluster.AddSugarState("ERR00050", "TOPO", "", err) db.Exec("CREATE DATABASE IF NOT EXISTS replication_manager_schema") } diff --git a/cluster/prx_haproxy.go b/cluster/prx_haproxy.go index 308f07a09..ace689875 100644 --- a/cluster/prx_haproxy.go +++ b/cluster/prx_haproxy.go @@ -274,7 +274,7 @@ func (proxy *HaproxyProxy) Refresh() error { result, err := haRuntime.ApiCmd("show stat") if err != nil { - cluster.sme.AddState("ERR00052", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00052"], err), ErrFrom: "MON"}) + cluster.AddSugarState("ERR00052", "MON", "", err) return err } if cluster.Conf.HaproxyDebug { @@ -296,7 +296,7 @@ func (proxy *HaproxyProxy) Refresh() error { return err } if len(line) < 73 { - cluster.sme.AddState("WARN0078", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0078"], err), ErrFrom: "MON"}) + cluster.SetSugarState("WARN0078", "MON", "", err) return errors.New(clusterError["WARN0078"]) } if strings.Contains(strings.ToLower(line[0]), "write") { diff --git a/cluster/prx_mariadbshardproxy.go b/cluster/prx_mariadbshardproxy.go index 22651b6da..78d86b542 100644 --- a/cluster/prx_mariadbshardproxy.go +++ b/cluster/prx_mariadbshardproxy.go @@ -22,7 +22,6 @@ import ( "github.com/signal18/replication-manager/config" "github.com/signal18/replication-manager/utils/dbhelper" "github.com/signal18/replication-manager/utils/misc" - "github.com/signal18/replication-manager/utils/state" "github.com/spf13/pflag" ) @@ -145,7 +144,7 @@ func (cluster *Cluster) CheckMdbShardServersSchema(proxy *MariadbShardProxy) { } schemas, _, err := cluster.master.GetSchemas() if err != nil { - cluster.sme.AddState("WARN0089", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(cluster.GetErrorList()["WARN0089"], cluster.master.URL), ErrFrom: "PROXY", ServerUrl: cluster.master.URL}) + cluster.AddSugarState("WARN0089", "PROXY", cluster.master.URL, cluster.master.URL) return } foundReplicationManagerSchema := false diff --git a/cluster/prx_maxscale.go b/cluster/prx_maxscale.go index 54ce41abf..e6ff546ae 100644 --- a/cluster/prx_maxscale.go +++ b/cluster/prx_maxscale.go @@ -9,13 +9,11 @@ package cluster import ( - "fmt" "strconv" "github.com/signal18/replication-manager/config" "github.com/signal18/replication-manager/router/maxscale" "github.com/signal18/replication-manager/utils/crypto" - "github.com/signal18/replication-manager/utils/state" "github.com/spf13/pflag" ) @@ -90,7 +88,7 @@ func (proxy *MaxscaleProxy) Refresh() error { if cluster.Conf.MxsOn { err := m.Connect() if err != nil { - cluster.sme.AddState("ERR00018", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00018"], err), ErrFrom: "CONF"}) + cluster.AddSugarState("ERR00018", "CONF", "", err) cluster.sme.CopyOldStateFromUnknowServer(proxy.Name) return err } @@ -108,7 +106,7 @@ func (proxy *MaxscaleProxy) Refresh() error { if cluster.Conf.MxsGetInfoMethod == "maxinfo" { _, err := m.GetMaxInfoServers("http://" + proxy.Host + ":" + strconv.Itoa(cluster.Conf.MxsMaxinfoPort) + "/servers") if err != nil { - cluster.sme.AddState("ERR00020", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00020"], server.URL), ErrFrom: "MON", ServerUrl: proxy.Name}) + cluster.AddSugarState("ERR00020", "MON", proxy.Name, server.URL) } srvport, _ := strconv.Atoi(server.Port) mxsConnections := 0 @@ -120,7 +118,7 @@ func (proxy *MaxscaleProxy) Refresh() error { } else { _, err := m.ListServers() if err != nil { - server.ClusterGroup.sme.AddState("ERR00019", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00019"], server.URL), ErrFrom: "MON", ServerUrl: proxy.Name}) + cluster.AddSugarState("ERR00019", "MON", proxy.Name, server.URL) } else { if proxy.Tunnel { @@ -199,7 +197,7 @@ func (proxy *MaxscaleProxy) Init() { cluster.LogPrintf(LvlErr, "MaxScale client could not shutdown monitor:%s", err) } } else { - cluster.sme.AddState("ERR00017", state.State{ErrType: "ERROR", ErrDesc: clusterError["ERR00017"], ErrFrom: "TOPO", ServerUrl: proxy.Name}) + cluster.AddSugarState("ERR00017", "TOPO", proxy.Name) } err = m.SetServer(cluster.GetMaster().MxsServerName, "master") @@ -269,7 +267,7 @@ func (pr *MaxscaleProxy) SetMaintenance(server *ServerMonitor) { m := maxscale.MaxScale{Host: pr.Host, Port: pr.Port, User: pr.User, Pass: pr.Pass} err := m.Connect() if err != nil { - cluster.sme.AddState("ERR00018", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00018"], err), ErrFrom: "CONF"}) + cluster.AddSugarState("ERR00018", "CONF", "", err) } if server.IsMaintenance { err = m.SetServer(server.MxsServerName, "maintenance") diff --git a/cluster/prx_proxysql.go b/cluster/prx_proxysql.go index 36eac8a49..c3c391dfd 100644 --- a/cluster/prx_proxysql.go +++ b/cluster/prx_proxysql.go @@ -111,7 +111,7 @@ func (proxy *ProxySQLProxy) AddShardProxy(shardproxy *MariadbShardProxy) { } psql, err := proxy.Connect() if err != nil { - cluster.sme.AddState("ERR00051", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00051"], err), ErrFrom: "MON"}) + cluster.AddSugarState("ERR00051", "MON", "", err) return } defer psql.Connection.Close() @@ -125,7 +125,7 @@ func (proxy *ProxySQLProxy) AddQueryRulesProxysql(rules []proxysql.QueryRule) er } psql, err := proxy.Connect() if err != nil { - cluster.sme.AddState("ERR00051", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00051"], err), ErrFrom: "MON"}) + cluster.AddSugarState("ERR00051", "MON", "", err) return err } defer psql.Connection.Close() @@ -141,7 +141,7 @@ func (proxy *ProxySQLProxy) Init() { psql, err := proxy.Connect() if err != nil { - cluster.sme.AddState("ERR00051", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00051"], err), ErrFrom: "MON"}) + cluster.AddSugarState("ERR00051", "MON", "", err) return } defer psql.Connection.Close() @@ -218,7 +218,7 @@ func (proxy *ProxySQLProxy) Failover() { cluster := proxy.ClusterGroup psql, err := proxy.Connect() if err != nil { - cluster.sme.AddState("ERR00051", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00051"], err), ErrFrom: "MON"}) + cluster.AddSugarState("ERR00051", "MON", "", err) return } @@ -262,7 +262,7 @@ func (proxy *ProxySQLProxy) Refresh() error { psql, err := proxy.Connect() if err != nil { - cluster.sme.AddState("ERR00051", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00051"], err), ErrFrom: "MON"}) + cluster.AddSugarState("ERR00051", "MON", "", err) cluster.sme.CopyOldStateFromUnknowServer(proxy.Name) return err } @@ -339,7 +339,7 @@ func (proxy *ProxySQLProxy) Refresh() error { cluster.LogPrintf(LvlInfo, "Monitor ProxySQL setting offline standalone server %s", s.URL) err = psql.SetOffline(misc.Unbracket(s.Host), s.Port) if err != nil { - cluster.sme.AddState("ERR00070", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00070"], err, s.URL), ErrFrom: "PRX", ServerUrl: proxy.Name}) + cluster.AddSugarState("ERR00070", "PRX", proxy.Name, s.URL, err) } updated = true @@ -385,7 +385,7 @@ func (proxy *ProxySQLProxy) Refresh() error { cluster.LogPrintf(LvlInfo, "Monitor ProxySQL setting reader standalone server %s", s.URL) } if err != nil { - cluster.sme.AddState("ERR00072", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00072"], err, s.URL), ErrFrom: "PRX", ServerUrl: proxy.Name}) + cluster.AddSugarState("ERR00072", "PRX", proxy.Name, s.URL, err) } updated = true } @@ -394,7 +394,7 @@ func (proxy *ProxySQLProxy) Refresh() error { if s.IsMaster() && cluster.Conf.ProxysqlCopyGrants { myprxusermap, _, err := dbhelper.GetProxySQLUsers(psql.Connection) if err != nil { - cluster.sme.AddState("ERR00053", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00053"], err), ErrFrom: "MON", ServerUrl: proxy.Name}) + cluster.AddSugarState("ERR00053", "PRX", proxy.Name, err) } uniUsers := make(map[string]dbhelper.Grant) dupUsers := make(map[string]string) @@ -403,7 +403,7 @@ func (proxy *ProxySQLProxy) Refresh() error { user, ok := uniUsers[u.User+":"+u.Password] if ok { dupUsers[user.User] = user.User - cluster.sme.AddState("ERR00057", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00057"], user.User), ErrFrom: "MON", ServerUrl: proxy.Name}) + cluster.AddSugarState("ERR00057", "MON", proxy.Name, user.User) } else { if u.Password != "" && u.Password != "invalid" { if u.User != cluster.dbUser { @@ -422,7 +422,7 @@ func (proxy *ProxySQLProxy) Refresh() error { err := psql.AddUser(user.User, user.Password) if err != nil { - cluster.sme.AddState("ERR00054", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00054"], err), ErrFrom: "MON", ServerUrl: proxy.Name}) + cluster.AddSugarState("ERR00054", "MON", proxy.Name, err) } } @@ -437,11 +437,11 @@ func (proxy *ProxySQLProxy) Refresh() error { } proxy.QueryRules, err = psql.GetQueryRulesRuntime() if err != nil { - cluster.sme.AddState("WARN0092", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0092"], err), ErrFrom: "MON", ServerUrl: proxy.Name}) + cluster.AddSugarState("WARN0092", "MON", proxy.Name, err) } proxy.Variables, err = psql.GetVariables() if err != nil { - cluster.sme.AddState("WARN0098", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0098"], err), ErrFrom: "MON", ServerUrl: proxy.Name}) + cluster.AddSugarState("WARN0098", "MON", proxy.Name, err) } if proxy.ClusterGroup.Conf.ProxysqlBootstrapVariables { if proxy.Variables["MYSQL-MULTIPLEXING"] == "TRUE" && !proxy.ClusterGroup.Conf.ProxysqlMultiplexing { @@ -483,7 +483,7 @@ func (proxy *ProxySQLProxy) SetMaintenance(s *ServerMonitor) { psql, err := proxy.Connect() if err != nil { - cluster.sme.AddState("ERR00051", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00051"], err), ErrFrom: "MON"}) + cluster.AddSugarState("ERR00051", "MON", "", err) return } defer psql.Connection.Close() diff --git a/cluster/prx_sphinx.go b/cluster/prx_sphinx.go index 1283be150..a864ee3c9 100644 --- a/cluster/prx_sphinx.go +++ b/cluster/prx_sphinx.go @@ -9,13 +9,11 @@ package cluster import ( - "fmt" "runtime" "strconv" "github.com/signal18/replication-manager/config" "github.com/signal18/replication-manager/router/sphinx" - "github.com/signal18/replication-manager/utils/state" "github.com/spf13/pflag" ) @@ -83,7 +81,7 @@ func (proxy *SphinxProxy) Init() { sphinx, err := proxy.Connect() if err != nil { - cluster.sme.AddState("ERR00058", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00058"], err), ErrFrom: "MON"}) + cluster.AddSugarState("ERR00058", "MON", "", err) return } defer sphinx.Connection.Close() @@ -102,7 +100,7 @@ func (proxy *SphinxProxy) Refresh() error { sphinx, err := proxy.Connect() if err != nil { - cluster.sme.AddState("ERR00058", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00058"], err), ErrFrom: "MON"}) + cluster.AddSugarState("ERR00058", "MON", "", err) return err } defer sphinx.Connection.Close() diff --git a/cluster/srv_chk.go b/cluster/srv_chk.go index ef1fe2f76..0d6017748 100644 --- a/cluster/srv_chk.go +++ b/cluster/srv_chk.go @@ -22,14 +22,14 @@ func (server *ServerMonitor) CheckMaxConnections() { maxCx, _ := strconv.ParseInt(server.Variables["MAX_CONNECTIONS"], 10, 64) curCx, _ := strconv.ParseInt(server.Status["THREADS_CONNECTED"], 10, 64) if curCx > maxCx*80/100 { - server.ClusterGroup.sme.AddState("ERR00076", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00076"], server.URL), ErrFrom: "MON", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("ERR00076", "MON", server.URL, server.URL) } } func (server *ServerMonitor) CheckVersion() { if server.DBVersion.IsMariaDB() && ((server.DBVersion.Major == 10 && server.DBVersion.Minor == 4 && server.DBVersion.Release < 12) || (server.DBVersion.Major == 10 && server.DBVersion.Minor == 5 && server.DBVersion.Release < 1)) { - server.ClusterGroup.sme.AddState("WARN0099", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0099"], server.URL), ErrFrom: "MON", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0099", "MON", server.URL, server.URL) } } @@ -38,7 +38,7 @@ func (server *ServerMonitor) CheckVersion() { func (server *ServerMonitor) CheckDisks() { for _, d := range server.Disks { if d.Used/d.Total*100 > int32(server.ClusterGroup.Conf.MonitorDiskUsagePct) { - server.ClusterGroup.sme.AddState("ERR00079", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00079"], server.URL), ErrFrom: "MON", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("ERR00079", "MON", server.URL, server.URL) } } } @@ -152,7 +152,7 @@ func (server *ServerMonitor) CheckSlaveSettings() { server.ClusterGroup.LogPrintf("DEBUG", "Enforce semisync on slave %s", sl.URL) dbhelper.InstallSemiSync(sl.Conn) } else if sl.IsIgnored() == false && sl.HaveSemiSync == false { - server.ClusterGroup.sme.AddState("WARN0048", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0048"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0048", "TOPO", sl.URL, sl.URL) } if server.ClusterGroup.Conf.ForceBinlogRow && sl.HaveBinlogRow == false { @@ -160,7 +160,7 @@ func (server *ServerMonitor) CheckSlaveSettings() { dbhelper.SetBinlogFormat(sl.Conn, "ROW") server.ClusterGroup.LogPrintf("INFO", "Enforce binlog format ROW on slave %s", sl.URL) } else if sl.IsIgnored() == false && sl.HaveBinlogRow == false && server.ClusterGroup.Conf.AutorejoinFlashback == true { - server.ClusterGroup.sme.AddState("WARN0049", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0049"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0049", "TOPO", sl.URL, sl.URL) } if server.ClusterGroup.Conf.ForceSlaveReadOnly && sl.ReadOnly == "OFF" && !server.ClusterGroup.IsInIgnoredReadonly(server) { // In non-multimaster mode, enforce read-only flag if the option is set @@ -171,58 +171,58 @@ func (server *ServerMonitor) CheckSlaveSettings() { dbhelper.SetSlaveHeartbeat(sl.Conn, "1", server.ClusterGroup.Conf.MasterConn, server.DBVersion) server.ClusterGroup.LogPrintf("INFO", "Enforce heartbeat to 1s on slave %s", sl.URL) } else if sl.IsIgnored() == false && sl.GetReplicationHearbeatPeriod() > 1 { - server.ClusterGroup.sme.AddState("WARN0050", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0050"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0050", "TOPO", sl.URL, sl.URL) } if server.ClusterGroup.Conf.ForceSlaveGtid && sl.GetReplicationUsingGtid() == "No" { dbhelper.SetSlaveGTIDMode(sl.Conn, "slave_pos", server.ClusterGroup.Conf.MasterConn, server.DBVersion) server.ClusterGroup.LogPrintf("INFO", "Enforce GTID replication on slave %s", sl.URL) } else if sl.IsIgnored() == false && sl.GetReplicationUsingGtid() == "No" { - server.ClusterGroup.sme.AddState("WARN0051", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0051"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0051", "TOPO", sl.URL, sl.URL) } if server.ClusterGroup.Conf.ForceSlaveGtidStrict && sl.IsReplicationUsingGtidStrict() == false { dbhelper.SetSlaveGTIDModeStrict(sl.Conn, server.DBVersion) server.ClusterGroup.LogPrintf("INFO", "Enforce GTID strict mode on slave %s", sl.URL) } else if sl.IsIgnored() == false && sl.IsReplicationUsingGtidStrict() == false { - server.ClusterGroup.sme.AddState("WARN0058", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0058"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0058", "TOPO", sl.URL, sl.URL) } if server.ClusterGroup.Conf.ForceSyncInnoDB && sl.HaveInnodbTrxCommit == false { dbhelper.SetSyncInnodb(sl.Conn) server.ClusterGroup.LogPrintf("INFO", "Enforce InnoDB durability on slave %s", sl.URL) } else if sl.IsIgnored() == false && sl.HaveInnodbTrxCommit == false { - server.ClusterGroup.sme.AddState("WARN0052", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0052"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0052", "TOPO", sl.URL, sl.URL) } if server.ClusterGroup.Conf.ForceBinlogChecksum && sl.HaveChecksum == false { dbhelper.SetBinlogChecksum(sl.Conn) server.ClusterGroup.LogPrintf("INFO", "Enforce checksum on slave %s", sl.URL) } else if sl.IsIgnored() == false && sl.HaveChecksum == false { - server.ClusterGroup.sme.AddState("WARN0053", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0053"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0053", "TOPO", sl.URL, sl.URL) } if server.ClusterGroup.Conf.ForceBinlogSlowqueries && sl.HaveBinlogSlowqueries == false { dbhelper.SetBinlogSlowqueries(sl.Conn) server.ClusterGroup.LogPrintf("INFO", "Enforce log slow queries of replication on slave %s", sl.URL) } else if sl.IsIgnored() == false && sl.HaveBinlogSlowqueries == false { - server.ClusterGroup.sme.AddState("WARN0054", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0054"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0054", "TOPO", sl.URL, sl.URL) } if server.ClusterGroup.Conf.ForceBinlogAnnotate && sl.HaveBinlogAnnotate == false && server.IsMariaDB() { dbhelper.SetBinlogAnnotate(sl.Conn) server.ClusterGroup.LogPrintf("INFO", "Enforce annotate on slave %s", sl.URL) } else if sl.IsIgnored() == false && sl.HaveBinlogAnnotate == false && server.IsMariaDB() { - server.ClusterGroup.sme.AddState("WARN0055", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0055"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0055", "TOPO", sl.URL, sl.URL) } if server.ClusterGroup.Conf.ForceBinlogCompress && sl.HaveBinlogCompress == false && sl.DBVersion.IsMariaDB() && sl.DBVersion.Major >= 10 && sl.DBVersion.Minor >= 2 { dbhelper.SetBinlogCompress(sl.Conn) server.ClusterGroup.LogPrintf("INFO", "Enforce binlog compression on slave %s", sl.URL) } else if sl.IsIgnored() == false && sl.HaveBinlogCompress == false && sl.DBVersion.IsMariaDB() && sl.DBVersion.Major >= 10 && sl.DBVersion.Minor >= 2 { - server.ClusterGroup.sme.AddState("WARN0056", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0056"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0056", "TOPO", sl.URL, sl.URL) } if sl.IsIgnored() == false && sl.HaveBinlogSlaveUpdates == false { - server.ClusterGroup.sme.AddState("WARN0057", state.State{ErrType: LvlWarn, ErrDesc: fmt.Sprintf(clusterError["WARN0057"], sl.URL), ErrFrom: "TOPO", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0057", "TOPO", sl.URL, sl.URL) } if server.IsAcid() == false && server.ClusterGroup.IsDiscovered() { - server.ClusterGroup.SetState("WARN0007", state.State{ErrType: LvlWarn, ErrDesc: "At least one server is not ACID-compliant. Please make sure that sync_binlog and innodb_flush_log_at_trx_commit are set to 1", ErrFrom: "CONF", ServerUrl: sl.URL}) + server.ClusterGroup.SetSugarState("WARN0007", "CONF", sl.URL) } } @@ -233,52 +233,53 @@ func (server *ServerMonitor) CheckMasterSettings() { server.ClusterGroup.LogPrintf("INFO", "Enforce semisync on Master %s", server.URL) dbhelper.InstallSemiSync(server.Conn) } else if server.HaveSemiSync == false { - server.ClusterGroup.sme.AddState("WARN0060", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0060"], server.URL), ErrFrom: "TOPO", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0060", "TOPO", server.URL, server.URL) } if server.ClusterGroup.Conf.ForceBinlogRow && server.HaveBinlogRow == false { dbhelper.SetBinlogFormat(server.Conn, "ROW") server.ClusterGroup.LogPrintf("INFO", "Enforce binlog format ROW on Master %s", server.URL) } else if server.HaveBinlogRow == false && server.ClusterGroup.Conf.AutorejoinFlashback == true { - server.ClusterGroup.sme.AddState("WARN0061", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0061"], server.URL), ErrFrom: "TOPO", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0061", "TOPO", server.URL, server.URL) } if server.ClusterGroup.Conf.ForceSyncBinlog && server.HaveBinlogSync == false { dbhelper.SetSyncBinlog(server.Conn) server.ClusterGroup.LogPrintf("INFO", "Enforce sync binlog on Master %s", server.URL) } else if server.HaveBinlogSync == false { - server.ClusterGroup.sme.AddState("WARN0062", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0062"], server.URL), ErrFrom: "TOPO", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0062", "TOPO", server.URL, server.URL) } if server.ClusterGroup.Conf.ForceSyncInnoDB && server.HaveBinlogSync == false { dbhelper.SetSyncInnodb(server.Conn) server.ClusterGroup.LogPrintf("INFO", "Enforce innodb durability on Master %s", server.URL) } else if server.HaveBinlogSync == false { - server.ClusterGroup.sme.AddState("WARN0064", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0064"], server.URL), ErrFrom: "TOPO", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0064", "TOPO", server.URL, server.URL) } if server.ClusterGroup.Conf.ForceBinlogAnnotate && server.HaveBinlogAnnotate == false && server.IsMariaDB() { dbhelper.SetBinlogAnnotate(server.Conn) server.ClusterGroup.LogPrintf("INFO", "Enforce binlog annotate on master %s", server.URL) } else if server.HaveBinlogAnnotate == false && server.IsMariaDB() { - server.ClusterGroup.sme.AddState("WARN0067", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0067"], server.URL), ErrFrom: "TOPO", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0067", "TOPO", server.URL, server.URL) } if server.ClusterGroup.Conf.ForceBinlogChecksum && server.HaveChecksum == false { dbhelper.SetBinlogChecksum(server.Conn) server.ClusterGroup.LogPrintf("INFO", "Enforce ckecksum annotate on master %s", server.URL) } else if server.HaveChecksum == false { - server.ClusterGroup.sme.AddState("WARN0065", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0065"], server.URL), ErrFrom: "TOPO", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0065", "TOPO", server.URL, server.URL) } if server.ClusterGroup.Conf.ForceBinlogCompress && server.HaveBinlogCompress == false && server.IsMariaDB() && server.DBVersion.Major >= 10 && server.DBVersion.Minor >= 2 { dbhelper.SetBinlogCompress(server.Conn) server.ClusterGroup.LogPrintf("INFO", "Enforce binlog compression on master %s", server.URL) } else if server.HaveBinlogCompress == false && server.DBVersion.IsMariaDB() && server.DBVersion.Major >= 10 && server.DBVersion.Minor >= 2 { - server.ClusterGroup.sme.AddState("WARN0068", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0068"], server.URL), ErrFrom: "TOPO", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0068", "TOPO", server.URL, server.URL) } if server.HaveBinlogSlaveUpdates == false { - server.ClusterGroup.sme.AddState("WARN0069", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0069"], server.URL), ErrFrom: "TOPO", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0069", "TOPO", server.URL, server.URL) } + if server.HaveGtidStrictMode == false && server.DBVersion.Flavor == "MariaDB" { - server.ClusterGroup.sme.AddState("WARN0070", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0070"], server.URL), ErrFrom: "TOPO", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0070", "TOPO", server.URL, server.URL) } if server.IsAcid() == false && server.ClusterGroup.IsDiscovered() { - server.ClusterGroup.SetState("WARN0007", state.State{ErrType: "WARNING", ErrDesc: "At least one server is not ACID-compliant. Please make sure that sync_binlog and innodb_flush_log_at_trx_commit are set to 1", ErrFrom: "CONF", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("WARN0007", "CONF", server.URL, server.URL) } } @@ -289,7 +290,7 @@ func (server *ServerMonitor) CheckSlaveSameMasterGrants() bool { } for _, user := range server.ClusterGroup.GetMaster().Users { if _, ok := server.Users["'"+user.User+"'@'"+user.Host+"'"]; !ok { - server.ClusterGroup.sme.AddState("ERR00056", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00056"], fmt.Sprintf("'%s'@'%s'", user.User, user.Host), server.URL), ErrFrom: "TOPO", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("ERR00056", "TOPO", server.URL, fmt.Sprintf("'%s'@'%s'", user.User, user.Host), server.URL) return false } } diff --git a/cluster/srv_job.go b/cluster/srv_job.go index 3475d3874..c902e76bb 100644 --- a/cluster/srv_job.go +++ b/cluster/srv_job.go @@ -34,7 +34,6 @@ import ( "github.com/signal18/replication-manager/utils/misc" river "github.com/signal18/replication-manager/utils/river" "github.com/signal18/replication-manager/utils/s18log" - "github.com/signal18/replication-manager/utils/state" ) func (server *ServerMonitor) JobRun() { @@ -493,41 +492,31 @@ func (server *ServerMonitor) JobsCheckRunning() error { rows.Scan(&task.task, &task.ct) if task.ct > 0 { if task.ct > 10 { - server.ClusterGroup.sme.AddState("ERR00060", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["ERR00060"], server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) + server.ClusterGroup.SetSugarState("ERR00060", "JOB", server.URL, server.URL) purge := "DELETE from replication_manager_schema.jobs WHERE task='" + task.task + "' AND done=0 AND result IS NULL order by start asc limit " + strconv.Itoa(task.ct-1) err := server.ExecQueryNoBinLog(purge) if err != nil { server.ClusterGroup.LogPrintf(LvlErr, "Scheduler error purging replication_manager_schema.jobs %s", err) } } else { - if task.task == "optimized" { - server.ClusterGroup.sme.AddState("WARN0072", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0072"], server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "restart" { - server.ClusterGroup.sme.AddState("WARN0096", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0096"], server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "stop" { - server.ClusterGroup.sme.AddState("WARN0097", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0097"], server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "xtrabackup" { - server.ClusterGroup.sme.AddState("WARN0073", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0073"], server.ClusterGroup.Conf.BackupPhysicalType, server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "mariabackup" { - server.ClusterGroup.sme.AddState("WARN0073", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0073"], server.ClusterGroup.Conf.BackupPhysicalType, server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "reseedxtrabackup" { - server.ClusterGroup.sme.AddState("WARN0074", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0074"], server.ClusterGroup.Conf.BackupPhysicalType, server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "reseedmariabackup" { - server.ClusterGroup.sme.AddState("WARN0074", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0074"], server.ClusterGroup.Conf.BackupPhysicalType, server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "reseedmysqldump" { - server.ClusterGroup.sme.AddState("WARN0075", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0075"], server.ClusterGroup.Conf.BackupLogicalType, server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "reseedmydumper" { - server.ClusterGroup.sme.AddState("WARN0075", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0075"], server.ClusterGroup.Conf.BackupLogicalType, server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "flashbackxtrabackup" { - server.ClusterGroup.sme.AddState("WARN0076", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0076"], server.ClusterGroup.Conf.BackupPhysicalType, server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "flashbackmariabackup" { - server.ClusterGroup.sme.AddState("WARN0076", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0076"], server.ClusterGroup.Conf.BackupPhysicalType, server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "flashbackmydumper" { - server.ClusterGroup.sme.AddState("WARN0077", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0077"], server.ClusterGroup.Conf.BackupLogicalType, server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) - } else if task.task == "flashbackmysqldump" { - server.ClusterGroup.sme.AddState("WARN0077", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(server.ClusterGroup.GetErrorList()["WARN0077"], server.ClusterGroup.Conf.BackupLogicalType, server.URL), ErrFrom: "JOB", ServerUrl: server.URL}) + switch task.task { + case "optimized": + server.ClusterGroup.SetSugarState("WARN0072", "JOB", server.URL, server.URL) + case "restart": + server.ClusterGroup.SetSugarState("WARN0096", "JOB", server.URL, server.URL) + case "stop": + server.ClusterGroup.SetSugarState("WARN0097", "JOB", server.URL, server.URL) + case "xtrabackup", "mariabackup": + server.ClusterGroup.SetSugarState("WARN0073", "JOB", server.URL, server.ClusterGroup.Conf.BackupPhysicalType, server.URL) + case "reseedxtrabackup", "reseedmariabackup": + server.ClusterGroup.SetSugarState("WARN0074", "JOB", server.URL, server.ClusterGroup.Conf.BackupPhysicalType, server.URL) + case "reseedmysqldump", "reseedmydumper": + server.ClusterGroup.SetSugarState("WARN0075", "JOB", server.URL, server.ClusterGroup.Conf.BackupLogicalType, server.URL) + case "flashbackxtrabackup", "flashbackmariabackup": + server.ClusterGroup.SetSugarState("WARN0076", "JOB", server.URL, server.ClusterGroup.Conf.BackupPhysicalType, server.URL) + case "flashbackmydumper", "flashbackmysqldump": + server.ClusterGroup.SetSugarState("WARN0077", "JOB", server.URL, server.ClusterGroup.Conf.BackupLogicalType, server.URL) } - } } diff --git a/cluster/srv_rejoin.go b/cluster/srv_rejoin.go index f543b7ad9..e6036bf65 100644 --- a/cluster/srv_rejoin.go +++ b/cluster/srv_rejoin.go @@ -507,7 +507,7 @@ func (server *ServerMonitor) rejoinSlave(ss dbhelper.SlaveStatus) error { } else { //Adding state waiting for old master to rejoin in positional mode // this state prevent crash info to be removed - server.ClusterGroup.sme.AddState("ERR00049", state.State{ErrType: "ERRRO", ErrDesc: fmt.Sprintf(clusterError["ERR00049"]), ErrFrom: "TOPO"}) + server.ClusterGroup.SetSugarState("ERR00049", "TOPO", "") } } } @@ -641,10 +641,10 @@ func (cluster *Cluster) RejoinFixRelay(slave *ServerMonitor, relay *ServerMonito if cluster.GetTopology() == topoMultiMasterRing || cluster.GetTopology() == topoMultiMasterWsrep { return nil } - cluster.sme.AddState("ERR00045", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00045"]), ErrFrom: "TOPO"}) + cluster.SetSugarState("ERR00045", "TOPO", "") if slave.GetReplicationDelay() > cluster.Conf.FailMaxDelay { - cluster.sme.AddState("ERR00046", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00046"]), ErrFrom: "TOPO"}) + cluster.SetSugarState("ERR00046", "TOPO", "") return nil } else { ss, err := slave.GetSlaveStatus(slave.ReplicationSourceName)