Avoid relying on STH timestamp during monitoring

Instead use the time at which the STH was observed (which for
FilesystemState is assumed to be the mtime of the STH file).  This is
easier to reason about: we don't have to worry about logs lying about
the time; we don't have to take into account the delay between STH fetch
and healthcheck; we won't raise spurious health checks about logs with
MMDs longer than the healthcheck interval.
This commit is contained in:
Andrew Ayer 2025-05-06 10:26:11 -04:00
parent 00fd77f6ed
commit 958e7a9efb
6 changed files with 66 additions and 37 deletions

View File

@ -83,7 +83,7 @@ func (s *FilesystemState) StoreSTH(ctx context.Context, logID LogID, sth *cttype
return storeSTHInDir(sthsDirPath, sth) return storeSTHInDir(sthsDirPath, sth)
} }
func (s *FilesystemState) LoadSTHs(ctx context.Context, logID LogID) ([]*cttypes.SignedTreeHead, error) { func (s *FilesystemState) LoadSTHs(ctx context.Context, logID LogID) ([]*StoredSTH, error) {
sthsDirPath := filepath.Join(s.logStateDir(logID), "unverified_sths") sthsDirPath := filepath.Join(s.logStateDir(logID), "unverified_sths")
return loadSTHsFromDir(sthsDirPath) return loadSTHsFromDir(sthsDirPath)
} }

View File

@ -31,7 +31,7 @@ func healthCheckLog(ctx context.Context, config *Config, ctlog *loglist.Log) err
return nil return nil
} }
if state.VerifiedSTH != nil && time.Since(state.VerifiedSTH.TimestampTime()) < config.HealthCheckInterval { if time.Since(state.LastSuccess) < config.HealthCheckInterval {
return nil return nil
} }
@ -42,8 +42,9 @@ func healthCheckLog(ctx context.Context, config *Config, ctlog *loglist.Log) err
if len(sths) == 0 { if len(sths) == 0 {
info := &StaleSTHInfo{ info := &StaleSTHInfo{
Log: ctlog, Log: ctlog,
LatestSTH: state.VerifiedSTH, LastSuccess: state.LastSuccess,
LatestSTH: state.VerifiedSTH,
} }
if err := config.State.NotifyHealthCheckFailure(ctx, ctlog, info); err != nil { if err := config.State.NotifyHealthCheckFailure(ctx, ctlog, info); err != nil {
return fmt.Errorf("error notifying about stale STH: %w", err) return fmt.Errorf("error notifying about stale STH: %w", err)
@ -68,13 +69,14 @@ type HealthCheckFailure interface {
} }
type StaleSTHInfo struct { type StaleSTHInfo struct {
Log *loglist.Log Log *loglist.Log
LatestSTH *cttypes.SignedTreeHead // may be nil LastSuccess time.Time
LatestSTH *cttypes.SignedTreeHead // may be nil
} }
type BacklogInfo struct { type BacklogInfo struct {
Log *loglist.Log Log *loglist.Log
LatestSTH *cttypes.SignedTreeHead LatestSTH *StoredSTH
Position uint64 Position uint64
} }
@ -90,7 +92,7 @@ func (e *BacklogInfo) Backlog() uint64 {
} }
func (e *StaleSTHInfo) Summary() string { func (e *StaleSTHInfo) Summary() string {
return fmt.Sprintf("%s is out-of-date", e.Log.GetMonitoringURL()) return fmt.Sprintf("Unable to contact %s since %s", e.Log.GetMonitoringURL(), e.LastSuccess)
} }
func (e *BacklogInfo) Summary() string { func (e *BacklogInfo) Summary() string {
return fmt.Sprintf("Backlog of size %d from %s", e.Backlog(), e.Log.GetMonitoringURL()) return fmt.Sprintf("Backlog of size %d from %s", e.Backlog(), e.Log.GetMonitoringURL())
@ -101,12 +103,12 @@ func (e *StaleLogListInfo) Summary() string {
func (e *StaleSTHInfo) Text() string { func (e *StaleSTHInfo) Text() string {
text := new(strings.Builder) text := new(strings.Builder)
fmt.Fprintf(text, "certspotter has been unable to get up-to-date information about %s. Consequentially, certspotter may fail to notify you about certificates in this log.\n", e.Log.GetMonitoringURL()) fmt.Fprintf(text, "certspotter has been unable to contact %s since %s. Consequentially, certspotter may fail to notify you about certificates in this log.\n", e.Log.GetMonitoringURL(), e.LastSuccess)
fmt.Fprintf(text, "\n") fmt.Fprintf(text, "\n")
fmt.Fprintf(text, "For details, see certspotter's stderr output.\n") fmt.Fprintf(text, "For details, see certspotter's stderr output.\n")
fmt.Fprintf(text, "\n") fmt.Fprintf(text, "\n")
if e.LatestSTH != nil { if e.LatestSTH != nil {
fmt.Fprintf(text, "Latest known log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.TimestampTime()) fmt.Fprintf(text, "Latest known log size = %d\n", e.LatestSTH.TreeSize)
} else { } else {
fmt.Fprintf(text, "Latest known log size = none\n") fmt.Fprintf(text, "Latest known log size = none\n")
} }
@ -118,7 +120,7 @@ func (e *BacklogInfo) Text() string {
fmt.Fprintf(text, "\n") fmt.Fprintf(text, "\n")
fmt.Fprintf(text, "For more details, see certspotter's stderr output.\n") fmt.Fprintf(text, "For more details, see certspotter's stderr output.\n")
fmt.Fprintf(text, "\n") fmt.Fprintf(text, "\n")
fmt.Fprintf(text, "Current log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.TimestampTime()) fmt.Fprintf(text, "Current log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.StoredAt)
fmt.Fprintf(text, "Current position = %d\n", e.Position) fmt.Fprintf(text, "Current position = %d\n", e.Position)
fmt.Fprintf(text, " Backlog = %d\n", e.Backlog()) fmt.Fprintf(text, " Backlog = %d\n", e.Backlog())
return text.String() return text.String()

View File

@ -104,9 +104,6 @@ func getAndVerifySTH(ctx context.Context, ctlog *loglist.Log, client ctclient.Lo
if err := ctcrypto.PublicKey(ctlog.Key).Verify(ctcrypto.SignatureInputForSTH(sth), sth.Signature); err != nil { if err := ctcrypto.PublicKey(ctlog.Key).Verify(ctcrypto.SignatureInputForSTH(sth), sth.Signature); err != nil {
return nil, "", fmt.Errorf("STH has invalid signature: %w", err) return nil, "", fmt.Errorf("STH has invalid signature: %w", err)
} }
if now := time.Now(); sth.TimestampTime().After(now) {
return nil, "", fmt.Errorf("STH timestamp %s is after current time %s (either log is misbehaving or your system clock is incorrect)", sth.TimestampTime(), now)
}
return sth, url, nil return sth, url, nil
} }
@ -222,12 +219,14 @@ func monitorLogContinously(ctx context.Context, config *Config, ctlog *loglist.L
DownloadPosition: tree, DownloadPosition: tree,
VerifiedPosition: tree, VerifiedPosition: tree,
VerifiedSTH: sth, VerifiedSTH: sth,
LastSuccess: time.Now(),
} }
} else { } else {
state = &LogState{ state = &LogState{
DownloadPosition: merkletree.EmptyCollapsedTree(), DownloadPosition: merkletree.EmptyCollapsedTree(),
VerifiedPosition: merkletree.EmptyCollapsedTree(), VerifiedPosition: merkletree.EmptyCollapsedTree(),
VerifiedSTH: nil, VerifiedSTH: nil,
LastSuccess: time.Now(),
} }
} }
if config.Verbose { if config.Verbose {
@ -303,8 +302,8 @@ func getSTHWorker(ctx context.Context, config *Config, ctlog *loglist.Log, clien
type batch struct { type batch struct {
number uint64 number uint64
begin, end uint64 begin, end uint64
sths []*cttypes.SignedTreeHead // STHs with sizes in range [begin,end], sorted by TreeSize sths []*StoredSTH // STHs with sizes in range [begin,end], sorted by TreeSize
entries []ctclient.Entry // in range [begin,end) entries []ctclient.Entry // in range [begin,end)
} }
func generateBatchesWorker(ctx context.Context, config *Config, ctlog *loglist.Log, position uint64, batches chan<- *batch) error { func generateBatchesWorker(ctx context.Context, config *Config, ctlog *loglist.Log, position uint64, batches chan<- *batch) error {
@ -317,7 +316,7 @@ func generateBatchesWorker(ctx context.Context, config *Config, ctlog *loglist.L
} }
for len(sths) > 0 && sths[0].TreeSize < position { for len(sths) > 0 && sths[0].TreeSize < position {
// TODO-4: audit sths[0] against log's verified STH // TODO-4: audit sths[0] against log's verified STH
if err := config.State.RemoveSTH(ctx, ctlog.LogID, sths[0]); err != nil { if err := config.State.RemoveSTH(ctx, ctlog.LogID, &sths[0].SignedTreeHead); err != nil {
return fmt.Errorf("error removing STH: %w", err) return fmt.Errorf("error removing STH: %w", err)
} }
sths = sths[1:] sths = sths[1:]
@ -335,30 +334,30 @@ func generateBatchesWorker(ctx context.Context, config *Config, ctlog *loglist.L
return ctx.Err() return ctx.Err()
} }
// return the earliest STH timestamp within the right-most tile // return the time at which the right-most tile indicated by sths was discovered
func tileEarliestTimestamp(sths []*cttypes.SignedTreeHead) time.Time { func tileDiscoveryTime(sths []*StoredSTH) time.Time {
largestSTH, sths := sths[len(sths)-1], sths[:len(sths)-1] largestSTH, sths := sths[len(sths)-1], sths[:len(sths)-1]
tileNumber := largestSTH.TreeSize / ctclient.StaticTileWidth tileNumber := largestSTH.TreeSize / ctclient.StaticTileWidth
earliest := largestSTH.TimestampTime() storedAt := largestSTH.StoredAt
for _, sth := range slices.Backward(sths) { for _, sth := range slices.Backward(sths) {
if sth.TreeSize/ctclient.StaticTileWidth != tileNumber { if sth.TreeSize/ctclient.StaticTileWidth != tileNumber {
break break
} }
if timestamp := sth.TimestampTime(); timestamp.Before(earliest) { if sth.StoredAt.Before(storedAt) {
earliest = timestamp storedAt = sth.StoredAt
} }
} }
return earliest return storedAt
} }
func generateBatches(ctx context.Context, ctlog *loglist.Log, position uint64, number uint64, sths []*cttypes.SignedTreeHead, batches chan<- *batch) (uint64, uint64, error) { func generateBatches(ctx context.Context, ctlog *loglist.Log, position uint64, number uint64, sths []*StoredSTH, batches chan<- *batch) (uint64, uint64, error) {
downloadJobSize := downloadJobSize(ctlog) downloadJobSize := downloadJobSize(ctlog)
if len(sths) == 0 { if len(sths) == 0 {
return position, number, nil return position, number, nil
} }
largestSTH := sths[len(sths)-1] largestSTH := sths[len(sths)-1]
treeSize := largestSTH.TreeSize treeSize := largestSTH.TreeSize
if ctlog.IsStaticCTAPI() && time.Since(tileEarliestTimestamp(sths)) < 5*time.Minute { if ctlog.IsStaticCTAPI() && time.Since(tileDiscoveryTime(sths)) < 5*time.Minute {
// Round down to the tile boundary to avoid downloading a partial tile that was recently discovered // Round down to the tile boundary to avoid downloading a partial tile that was recently discovered
// In a future invocation of this function, either enough time will have passed that this code path will be skipped, or the log will have grown and treeSize will be rounded to a larger tile boundary // In a future invocation of this function, either enough time will have passed that this code path will be skipped, or the log will have grown and treeSize will be rounded to a larger tile boundary
treeSize -= treeSize % ctclient.StaticTileWidth treeSize -= treeSize % ctclient.StaticTileWidth
@ -471,17 +470,18 @@ func saveStateWorker(ctx context.Context, config *Config, ctlog *loglist.Log, st
batch.sths = batch.sths[1:] batch.sths = batch.sths[1:]
if sth.RootHash != rootHash { if sth.RootHash != rootHash {
return &verifyEntriesError{ return &verifyEntriesError{
sth: sth, sth: &sth.SignedTreeHead,
entriesRootHash: rootHash, entriesRootHash: rootHash,
} }
} }
state.advanceVerifiedPosition() state.advanceVerifiedPosition()
state.VerifiedSTH = sth state.LastSuccess = sth.StoredAt
state.VerifiedSTH = &sth.SignedTreeHead
if err := config.State.StoreLogState(ctx, ctlog.LogID, state); err != nil { if err := config.State.StoreLogState(ctx, ctlog.LogID, state); err != nil {
return fmt.Errorf("error storing log state: %w", err) return fmt.Errorf("error storing log state: %w", err)
} }
// don't remove the STH until state has been durably stored // don't remove the STH until state has been durably stored
if err := config.State.RemoveSTH(ctx, ctlog.LogID, sth); err != nil { if err := config.State.RemoveSTH(ctx, ctlog.LogID, &sth.SignedTreeHead); err != nil {
return fmt.Errorf("error removing verified STH: %w", err) return fmt.Errorf("error removing verified STH: %w", err)
} }
} }

View File

@ -11,6 +11,8 @@ package monitor
import ( import (
"context" "context"
"time"
"software.sslmate.com/src/certspotter/cttypes" "software.sslmate.com/src/certspotter/cttypes"
"software.sslmate.com/src/certspotter/loglist" "software.sslmate.com/src/certspotter/loglist"
"software.sslmate.com/src/certspotter/merkletree" "software.sslmate.com/src/certspotter/merkletree"
@ -20,6 +22,7 @@ type LogState struct {
DownloadPosition *merkletree.CollapsedTree `json:"download_position"` DownloadPosition *merkletree.CollapsedTree `json:"download_position"`
VerifiedPosition *merkletree.CollapsedTree `json:"verified_position"` VerifiedPosition *merkletree.CollapsedTree `json:"verified_position"`
VerifiedSTH *cttypes.SignedTreeHead `json:"verified_sth"` VerifiedSTH *cttypes.SignedTreeHead `json:"verified_sth"`
LastSuccess time.Time `json:"last_success"`
} }
func (state *LogState) rewindDownloadPosition() { func (state *LogState) rewindDownloadPosition() {
@ -55,7 +58,7 @@ type StateProvider interface {
// Load all STHs for this log previously stored with StoreSTH. // Load all STHs for this log previously stored with StoreSTH.
// The returned slice must be sorted by tree size. // The returned slice must be sorted by tree size.
LoadSTHs(context.Context, LogID) ([]*cttypes.SignedTreeHead, error) LoadSTHs(context.Context, LogID) ([]*StoredSTH, error)
// Remove an STH so it is no longer returned by LoadSTHs. // Remove an STH so it is no longer returned by LoadSTHs.
RemoveSTH(context.Context, LogID, *cttypes.SignedTreeHead) error RemoveSTH(context.Context, LogID, *cttypes.SignedTreeHead) error

View File

@ -20,6 +20,7 @@ import (
"software.sslmate.com/src/certspotter/merkletree" "software.sslmate.com/src/certspotter/merkletree"
"strconv" "strconv"
"strings" "strings"
"time"
) )
func readVersion(stateDir string) (int, error) { func readVersion(stateDir string) (int, error) {
@ -79,6 +80,7 @@ func migrateLogStateDirV1(dir string) error {
DownloadPosition: &tree, DownloadPosition: &tree,
VerifiedPosition: &tree, VerifiedPosition: &tree,
VerifiedSTH: &sth, VerifiedSTH: &sth,
LastSuccess: time.Now(),
} }
if err := writeJSONFile(filepath.Join(dir, "state.json"), stateFile, 0666); err != nil { if err := writeJSONFile(filepath.Join(dir, "state.json"), stateFile, 0666); err != nil {
return err return err

View File

@ -17,6 +17,7 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"io"
"io/fs" "io/fs"
"os" "os"
"path/filepath" "path/filepath"
@ -24,16 +25,22 @@ import (
"software.sslmate.com/src/certspotter/cttypes" "software.sslmate.com/src/certspotter/cttypes"
"strconv" "strconv"
"strings" "strings"
"time"
) )
func loadSTHsFromDir(dirPath string) ([]*cttypes.SignedTreeHead, error) { type StoredSTH struct {
cttypes.SignedTreeHead
StoredAt time.Time `json:"stored_at"` // time at which the STH was first stored
}
func loadSTHsFromDir(dirPath string) ([]*StoredSTH, error) {
entries, err := os.ReadDir(dirPath) entries, err := os.ReadDir(dirPath)
if errors.Is(err, fs.ErrNotExist) { if errors.Is(err, fs.ErrNotExist) {
return []*cttypes.SignedTreeHead{}, nil return []*StoredSTH{}, nil
} else if err != nil { } else if err != nil {
return nil, err return nil, err
} }
sths := make([]*cttypes.SignedTreeHead, 0, len(entries)) sths := make([]*StoredSTH, 0, len(entries))
for _, entry := range entries { for _, entry := range entries {
filename := entry.Name() filename := entry.Name()
if strings.HasPrefix(filename, ".") || !strings.HasSuffix(filename, ".json") { if strings.HasPrefix(filename, ".") || !strings.HasSuffix(filename, ".json") {
@ -45,17 +52,30 @@ func loadSTHsFromDir(dirPath string) ([]*cttypes.SignedTreeHead, error) {
} }
sths = append(sths, sth) sths = append(sths, sth)
} }
slices.SortFunc(sths, func(a, b *cttypes.SignedTreeHead) int { return cmp.Compare(a.TreeSize, b.TreeSize) }) slices.SortFunc(sths, func(a, b *StoredSTH) int { return cmp.Compare(a.TreeSize, b.TreeSize) })
return sths, nil return sths, nil
} }
func readSTHFile(filePath string) (*cttypes.SignedTreeHead, error) { func readSTHFile(filePath string) (*StoredSTH, error) {
fileBytes, err := os.ReadFile(filePath) file, err := os.Open(filePath)
if err != nil { if err != nil {
return nil, err return nil, err
} }
sth := new(cttypes.SignedTreeHead) defer file.Close()
if err := json.Unmarshal(fileBytes, sth); err != nil {
info, err := file.Stat()
if err != nil {
return nil, err
}
sth := &StoredSTH{
StoredAt: info.ModTime(),
}
fileBytes, err := io.ReadAll(file)
if err != nil {
return nil, fmt.Errorf("error reading %s: %w", filePath, err)
}
if err := json.Unmarshal(fileBytes, &sth.SignedTreeHead); err != nil {
return nil, fmt.Errorf("error parsing %s: %w", filePath, err) return nil, fmt.Errorf("error parsing %s: %w", filePath, err)
} }
return sth, nil return sth, nil
@ -64,6 +84,8 @@ func readSTHFile(filePath string) (*cttypes.SignedTreeHead, error) {
func storeSTHInDir(dirPath string, sth *cttypes.SignedTreeHead) error { func storeSTHInDir(dirPath string, sth *cttypes.SignedTreeHead) error {
filePath := filepath.Join(dirPath, sthFilename(sth)) filePath := filepath.Join(dirPath, sthFilename(sth))
if fileExists(filePath) { if fileExists(filePath) {
// If the file already exists, we don't want its mtime to change
// because StoredSTH.StoredAt needs to be the time the STH was *first* stored.
return nil return nil
} }
return writeJSONFile(filePath, sth, 0666) return writeJSONFile(filePath, sth, 0666)