From 958e7a9efb18aeabd4ca224beb3f0a203b96bfdc Mon Sep 17 00:00:00 2001 From: Andrew Ayer Date: Tue, 6 May 2025 10:26:11 -0400 Subject: [PATCH] Avoid relying on STH timestamp during monitoring Instead use the time at which the STH was observed (which for FilesystemState is assumed to be the mtime of the STH file). This is easier to reason about: we don't have to worry about logs lying about the time; we don't have to take into account the delay between STH fetch and healthcheck; we won't raise spurious health checks about logs with MMDs longer than the healthcheck interval. --- monitor/fsstate.go | 2 +- monitor/healthcheck.go | 22 ++++++++++++---------- monitor/monitor.go | 34 +++++++++++++++++----------------- monitor/state.go | 5 ++++- monitor/statedir.go | 2 ++ monitor/sthdir.go | 38 ++++++++++++++++++++++++++++++-------- 6 files changed, 66 insertions(+), 37 deletions(-) diff --git a/monitor/fsstate.go b/monitor/fsstate.go index c1131a6..d7d9d04 100644 --- a/monitor/fsstate.go +++ b/monitor/fsstate.go @@ -83,7 +83,7 @@ func (s *FilesystemState) StoreSTH(ctx context.Context, logID LogID, sth *cttype return storeSTHInDir(sthsDirPath, sth) } -func (s *FilesystemState) LoadSTHs(ctx context.Context, logID LogID) ([]*cttypes.SignedTreeHead, error) { +func (s *FilesystemState) LoadSTHs(ctx context.Context, logID LogID) ([]*StoredSTH, error) { sthsDirPath := filepath.Join(s.logStateDir(logID), "unverified_sths") return loadSTHsFromDir(sthsDirPath) } diff --git a/monitor/healthcheck.go b/monitor/healthcheck.go index 17c939e..15812b6 100644 --- a/monitor/healthcheck.go +++ b/monitor/healthcheck.go @@ -31,7 +31,7 @@ func healthCheckLog(ctx context.Context, config *Config, ctlog *loglist.Log) err return nil } - if state.VerifiedSTH != nil && time.Since(state.VerifiedSTH.TimestampTime()) < config.HealthCheckInterval { + if time.Since(state.LastSuccess) < config.HealthCheckInterval { return nil } @@ -42,8 +42,9 @@ func healthCheckLog(ctx context.Context, config *Config, ctlog *loglist.Log) err if len(sths) == 0 { info := &StaleSTHInfo{ - Log: ctlog, - LatestSTH: state.VerifiedSTH, + Log: ctlog, + LastSuccess: state.LastSuccess, + LatestSTH: state.VerifiedSTH, } if err := config.State.NotifyHealthCheckFailure(ctx, ctlog, info); err != nil { return fmt.Errorf("error notifying about stale STH: %w", err) @@ -68,13 +69,14 @@ type HealthCheckFailure interface { } type StaleSTHInfo struct { - Log *loglist.Log - LatestSTH *cttypes.SignedTreeHead // may be nil + Log *loglist.Log + LastSuccess time.Time + LatestSTH *cttypes.SignedTreeHead // may be nil } type BacklogInfo struct { Log *loglist.Log - LatestSTH *cttypes.SignedTreeHead + LatestSTH *StoredSTH Position uint64 } @@ -90,7 +92,7 @@ func (e *BacklogInfo) Backlog() uint64 { } func (e *StaleSTHInfo) Summary() string { - return fmt.Sprintf("%s is out-of-date", e.Log.GetMonitoringURL()) + return fmt.Sprintf("Unable to contact %s since %s", e.Log.GetMonitoringURL(), e.LastSuccess) } func (e *BacklogInfo) Summary() string { return fmt.Sprintf("Backlog of size %d from %s", e.Backlog(), e.Log.GetMonitoringURL()) @@ -101,12 +103,12 @@ func (e *StaleLogListInfo) Summary() string { func (e *StaleSTHInfo) Text() string { text := new(strings.Builder) - fmt.Fprintf(text, "certspotter has been unable to get up-to-date information about %s. Consequentially, certspotter may fail to notify you about certificates in this log.\n", e.Log.GetMonitoringURL()) + fmt.Fprintf(text, "certspotter has been unable to contact %s since %s. Consequentially, certspotter may fail to notify you about certificates in this log.\n", e.Log.GetMonitoringURL(), e.LastSuccess) fmt.Fprintf(text, "\n") fmt.Fprintf(text, "For details, see certspotter's stderr output.\n") fmt.Fprintf(text, "\n") if e.LatestSTH != nil { - fmt.Fprintf(text, "Latest known log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.TimestampTime()) + fmt.Fprintf(text, "Latest known log size = %d\n", e.LatestSTH.TreeSize) } else { fmt.Fprintf(text, "Latest known log size = none\n") } @@ -118,7 +120,7 @@ func (e *BacklogInfo) Text() string { fmt.Fprintf(text, "\n") fmt.Fprintf(text, "For more details, see certspotter's stderr output.\n") fmt.Fprintf(text, "\n") - fmt.Fprintf(text, "Current log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.TimestampTime()) + fmt.Fprintf(text, "Current log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.StoredAt) fmt.Fprintf(text, "Current position = %d\n", e.Position) fmt.Fprintf(text, " Backlog = %d\n", e.Backlog()) return text.String() diff --git a/monitor/monitor.go b/monitor/monitor.go index 08ed4bf..98df2a8 100644 --- a/monitor/monitor.go +++ b/monitor/monitor.go @@ -104,9 +104,6 @@ func getAndVerifySTH(ctx context.Context, ctlog *loglist.Log, client ctclient.Lo if err := ctcrypto.PublicKey(ctlog.Key).Verify(ctcrypto.SignatureInputForSTH(sth), sth.Signature); err != nil { return nil, "", fmt.Errorf("STH has invalid signature: %w", err) } - if now := time.Now(); sth.TimestampTime().After(now) { - return nil, "", fmt.Errorf("STH timestamp %s is after current time %s (either log is misbehaving or your system clock is incorrect)", sth.TimestampTime(), now) - } return sth, url, nil } @@ -222,12 +219,14 @@ func monitorLogContinously(ctx context.Context, config *Config, ctlog *loglist.L DownloadPosition: tree, VerifiedPosition: tree, VerifiedSTH: sth, + LastSuccess: time.Now(), } } else { state = &LogState{ DownloadPosition: merkletree.EmptyCollapsedTree(), VerifiedPosition: merkletree.EmptyCollapsedTree(), VerifiedSTH: nil, + LastSuccess: time.Now(), } } if config.Verbose { @@ -303,8 +302,8 @@ func getSTHWorker(ctx context.Context, config *Config, ctlog *loglist.Log, clien type batch struct { number uint64 begin, end uint64 - sths []*cttypes.SignedTreeHead // STHs with sizes in range [begin,end], sorted by TreeSize - entries []ctclient.Entry // in range [begin,end) + sths []*StoredSTH // STHs with sizes in range [begin,end], sorted by TreeSize + entries []ctclient.Entry // in range [begin,end) } func generateBatchesWorker(ctx context.Context, config *Config, ctlog *loglist.Log, position uint64, batches chan<- *batch) error { @@ -317,7 +316,7 @@ func generateBatchesWorker(ctx context.Context, config *Config, ctlog *loglist.L } for len(sths) > 0 && sths[0].TreeSize < position { // TODO-4: audit sths[0] against log's verified STH - if err := config.State.RemoveSTH(ctx, ctlog.LogID, sths[0]); err != nil { + if err := config.State.RemoveSTH(ctx, ctlog.LogID, &sths[0].SignedTreeHead); err != nil { return fmt.Errorf("error removing STH: %w", err) } sths = sths[1:] @@ -335,30 +334,30 @@ func generateBatchesWorker(ctx context.Context, config *Config, ctlog *loglist.L return ctx.Err() } -// return the earliest STH timestamp within the right-most tile -func tileEarliestTimestamp(sths []*cttypes.SignedTreeHead) time.Time { +// return the time at which the right-most tile indicated by sths was discovered +func tileDiscoveryTime(sths []*StoredSTH) time.Time { largestSTH, sths := sths[len(sths)-1], sths[:len(sths)-1] tileNumber := largestSTH.TreeSize / ctclient.StaticTileWidth - earliest := largestSTH.TimestampTime() + storedAt := largestSTH.StoredAt for _, sth := range slices.Backward(sths) { if sth.TreeSize/ctclient.StaticTileWidth != tileNumber { break } - if timestamp := sth.TimestampTime(); timestamp.Before(earliest) { - earliest = timestamp + if sth.StoredAt.Before(storedAt) { + storedAt = sth.StoredAt } } - return earliest + return storedAt } -func generateBatches(ctx context.Context, ctlog *loglist.Log, position uint64, number uint64, sths []*cttypes.SignedTreeHead, batches chan<- *batch) (uint64, uint64, error) { +func generateBatches(ctx context.Context, ctlog *loglist.Log, position uint64, number uint64, sths []*StoredSTH, batches chan<- *batch) (uint64, uint64, error) { downloadJobSize := downloadJobSize(ctlog) if len(sths) == 0 { return position, number, nil } largestSTH := sths[len(sths)-1] treeSize := largestSTH.TreeSize - if ctlog.IsStaticCTAPI() && time.Since(tileEarliestTimestamp(sths)) < 5*time.Minute { + if ctlog.IsStaticCTAPI() && time.Since(tileDiscoveryTime(sths)) < 5*time.Minute { // Round down to the tile boundary to avoid downloading a partial tile that was recently discovered // In a future invocation of this function, either enough time will have passed that this code path will be skipped, or the log will have grown and treeSize will be rounded to a larger tile boundary treeSize -= treeSize % ctclient.StaticTileWidth @@ -471,17 +470,18 @@ func saveStateWorker(ctx context.Context, config *Config, ctlog *loglist.Log, st batch.sths = batch.sths[1:] if sth.RootHash != rootHash { return &verifyEntriesError{ - sth: sth, + sth: &sth.SignedTreeHead, entriesRootHash: rootHash, } } state.advanceVerifiedPosition() - state.VerifiedSTH = sth + state.LastSuccess = sth.StoredAt + state.VerifiedSTH = &sth.SignedTreeHead if err := config.State.StoreLogState(ctx, ctlog.LogID, state); err != nil { return fmt.Errorf("error storing log state: %w", err) } // don't remove the STH until state has been durably stored - if err := config.State.RemoveSTH(ctx, ctlog.LogID, sth); err != nil { + if err := config.State.RemoveSTH(ctx, ctlog.LogID, &sth.SignedTreeHead); err != nil { return fmt.Errorf("error removing verified STH: %w", err) } } diff --git a/monitor/state.go b/monitor/state.go index b08014a..6c5d062 100644 --- a/monitor/state.go +++ b/monitor/state.go @@ -11,6 +11,8 @@ package monitor import ( "context" + "time" + "software.sslmate.com/src/certspotter/cttypes" "software.sslmate.com/src/certspotter/loglist" "software.sslmate.com/src/certspotter/merkletree" @@ -20,6 +22,7 @@ type LogState struct { DownloadPosition *merkletree.CollapsedTree `json:"download_position"` VerifiedPosition *merkletree.CollapsedTree `json:"verified_position"` VerifiedSTH *cttypes.SignedTreeHead `json:"verified_sth"` + LastSuccess time.Time `json:"last_success"` } func (state *LogState) rewindDownloadPosition() { @@ -55,7 +58,7 @@ type StateProvider interface { // Load all STHs for this log previously stored with StoreSTH. // The returned slice must be sorted by tree size. - LoadSTHs(context.Context, LogID) ([]*cttypes.SignedTreeHead, error) + LoadSTHs(context.Context, LogID) ([]*StoredSTH, error) // Remove an STH so it is no longer returned by LoadSTHs. RemoveSTH(context.Context, LogID, *cttypes.SignedTreeHead) error diff --git a/monitor/statedir.go b/monitor/statedir.go index 5618a8c..bffd044 100644 --- a/monitor/statedir.go +++ b/monitor/statedir.go @@ -20,6 +20,7 @@ import ( "software.sslmate.com/src/certspotter/merkletree" "strconv" "strings" + "time" ) func readVersion(stateDir string) (int, error) { @@ -79,6 +80,7 @@ func migrateLogStateDirV1(dir string) error { DownloadPosition: &tree, VerifiedPosition: &tree, VerifiedSTH: &sth, + LastSuccess: time.Now(), } if err := writeJSONFile(filepath.Join(dir, "state.json"), stateFile, 0666); err != nil { return err diff --git a/monitor/sthdir.go b/monitor/sthdir.go index 78ccc5a..b6407ad 100644 --- a/monitor/sthdir.go +++ b/monitor/sthdir.go @@ -17,6 +17,7 @@ import ( "encoding/json" "errors" "fmt" + "io" "io/fs" "os" "path/filepath" @@ -24,16 +25,22 @@ import ( "software.sslmate.com/src/certspotter/cttypes" "strconv" "strings" + "time" ) -func loadSTHsFromDir(dirPath string) ([]*cttypes.SignedTreeHead, error) { +type StoredSTH struct { + cttypes.SignedTreeHead + StoredAt time.Time `json:"stored_at"` // time at which the STH was first stored +} + +func loadSTHsFromDir(dirPath string) ([]*StoredSTH, error) { entries, err := os.ReadDir(dirPath) if errors.Is(err, fs.ErrNotExist) { - return []*cttypes.SignedTreeHead{}, nil + return []*StoredSTH{}, nil } else if err != nil { return nil, err } - sths := make([]*cttypes.SignedTreeHead, 0, len(entries)) + sths := make([]*StoredSTH, 0, len(entries)) for _, entry := range entries { filename := entry.Name() if strings.HasPrefix(filename, ".") || !strings.HasSuffix(filename, ".json") { @@ -45,17 +52,30 @@ func loadSTHsFromDir(dirPath string) ([]*cttypes.SignedTreeHead, error) { } sths = append(sths, sth) } - slices.SortFunc(sths, func(a, b *cttypes.SignedTreeHead) int { return cmp.Compare(a.TreeSize, b.TreeSize) }) + slices.SortFunc(sths, func(a, b *StoredSTH) int { return cmp.Compare(a.TreeSize, b.TreeSize) }) return sths, nil } -func readSTHFile(filePath string) (*cttypes.SignedTreeHead, error) { - fileBytes, err := os.ReadFile(filePath) +func readSTHFile(filePath string) (*StoredSTH, error) { + file, err := os.Open(filePath) if err != nil { return nil, err } - sth := new(cttypes.SignedTreeHead) - if err := json.Unmarshal(fileBytes, sth); err != nil { + defer file.Close() + + info, err := file.Stat() + if err != nil { + return nil, err + } + sth := &StoredSTH{ + StoredAt: info.ModTime(), + } + + fileBytes, err := io.ReadAll(file) + if err != nil { + return nil, fmt.Errorf("error reading %s: %w", filePath, err) + } + if err := json.Unmarshal(fileBytes, &sth.SignedTreeHead); err != nil { return nil, fmt.Errorf("error parsing %s: %w", filePath, err) } return sth, nil @@ -64,6 +84,8 @@ func readSTHFile(filePath string) (*cttypes.SignedTreeHead, error) { func storeSTHInDir(dirPath string, sth *cttypes.SignedTreeHead) error { filePath := filepath.Join(dirPath, sthFilename(sth)) if fileExists(filePath) { + // If the file already exists, we don't want its mtime to change + // because StoredSTH.StoredAt needs to be the time the STH was *first* stored. return nil } return writeJSONFile(filePath, sth, 0666)