Skip to content

Commit 3ee9af0

Browse files
Minor corrections in SLURM fetcher and TSDB updater (#280)
* fix: Minor corrections in fetcher and updater * When pulling historical SLURM data, use running state as well to update jobs incrementally. * Use `lookback_delta` query parameter in TSDB queries to limit the artificial look back metrics which can over estimate energy consumption * refactor: Use text/template to avoid escaping chars like `<` --------- Signed-off-by: Mahendra Paipuri <[email protected]>
1 parent 411fabc commit 3ee9af0

File tree

4 files changed

+18
-19
lines changed

4 files changed

+18
-19
lines changed

pkg/api/resource/slurm/cli.go

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -487,21 +487,6 @@ func parseSacctMgrCmdOutput(sacctMgrOutput string, currentTime string) ([]models
487487

488488
// runSacctCmd executes sacct command and return output.
489489
func (s *slurmScheduler) runSacctCmd(ctx context.Context, start, end time.Time) ([]byte, error) {
490-
// If we are fetching historical data, do not use RUNNING state as it can report
491-
// same job twice once when it was still in running state and once it is in completed
492-
// state.
493-
// endTimeParsed, _ := time.Parse(base.DatetimeLayout, endTime)
494-
var states []string
495-
// When fetching current jobs, endTime should be very close to current time. Here we
496-
// assume that if current time is more than 5 sec than end time, we are fetching
497-
// historical data
498-
if time.Now().In(end.Location()).Sub(end) > 5*time.Second {
499-
// Strip RUNNING state from slice
500-
states = slurmStates[:len(slurmStates)-1]
501-
} else {
502-
states = slurmStates
503-
}
504-
505490
// sacct path
506491
sacctPath := filepath.Join(s.cluster.CLI.Path, "sacct")
507492

@@ -515,7 +500,7 @@ func (s *slurmScheduler) runSacctCmd(ctx context.Context, start, end time.Time)
515500
args := []string{
516501
"-D", "-X", "--noheader", "--allusers", "--parsable2",
517502
"--format", strings.Join(sacctFields, ","),
518-
"--state", strings.Join(states, ","),
503+
"--state", strings.Join(slurmStates, ","),
519504
"--starttime", start.Format(base.DatetimeLayout),
520505
"--endtime", end.Format(base.DatetimeLayout),
521506
}

pkg/api/resource/slurm/manager.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,6 @@ func (s *slurmScheduler) FetchUsersProjects(
138138

139139
// Get jobs from slurm sacct command.
140140
func (s *slurmScheduler) fetchFromSacct(ctx context.Context, start time.Time, end time.Time) ([]models.Unit, error) {
141-
// startTime := start.Format(base.DatetimeLayout)
142-
// endTime := end.Format(base.DatetimeLayout)
143141
// Execute sacct command between start and end times
144142
sacctOutput, err := s.runSacctCmd(ctx, start, end)
145143
if err != nil {

pkg/api/updater/tsdb/tsdb.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ import (
55
"context"
66
"errors"
77
"fmt"
8-
"html/template"
98
"log/slog"
109
"maps"
1110
"math"
1211
"strings"
1312
"sync"
13+
"text/template"
1414
"time"
1515

1616
"github.com/mahendrapaipuri/ceems/pkg/api/helper"

pkg/tsdb/tsdb.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,14 @@ func (t *TSDB) Query(ctx context.Context, query string, queryTime time.Time) (Me
322322
"time": []string{queryTime.UTC().Format(time.RFC3339Nano)},
323323
}
324324

325+
// Get current scrape interval to use as lookback_delta
326+
// This query parameter is undocumented on Prometheus. If we use
327+
// default value of 5m, we tend to have metrics 5m **after** compute
328+
// unit has finished which gives over estimation of energy
329+
if scrapeInterval := t.Settings(ctx).ScrapeInterval; scrapeInterval > 0 {
330+
values.Add("lookback_delta", scrapeInterval.String())
331+
}
332+
325333
// Create a new POST request
326334
req, err := http.NewRequestWithContext(
327335
ctx,
@@ -449,6 +457,14 @@ func (t *TSDB) RangeQuery(
449457
"step": []string{step},
450458
}
451459

460+
// Get current scrape interval to use as lookback_delta
461+
// This query parameter is undocumented on Prometheus. If we use
462+
// default value of 5m, we tend to have metrics 5m **after** compute
463+
// unit has finished which gives over estimation of energy
464+
if scrapeInterval := t.Settings(ctx).ScrapeInterval; scrapeInterval > 0 {
465+
values.Add("lookback_delta", scrapeInterval.String())
466+
}
467+
452468
// Create a new POST request
453469
req, err := http.NewRequestWithContext(
454470
ctx,

0 commit comments

Comments
 (0)