Avoid relying on STH timestamp during monitoring

Instead use the time at which the STH was observed (which for
FilesystemState is assumed to be the mtime of the STH file).  This is
easier to reason about: we don't have to worry about logs lying about
the time; we don't have to take into account the delay between STH fetch
and healthcheck; we won't raise spurious health checks about logs with
MMDs longer than the healthcheck interval.
This commit is contained in:
Andrew Ayer 2025-05-06 10:26:11 -04:00
parent 00fd77f6ed
commit 958e7a9efb
6 changed files with 66 additions and 37 deletions

View File

@ -83,7 +83,7 @@ func (s *FilesystemState) StoreSTH(ctx context.Context, logID LogID, sth *cttype
return storeSTHInDir(sthsDirPath, sth)
}
func (s *FilesystemState) LoadSTHs(ctx context.Context, logID LogID) ([]*cttypes.SignedTreeHead, error) {
func (s *FilesystemState) LoadSTHs(ctx context.Context, logID LogID) ([]*StoredSTH, error) {
sthsDirPath := filepath.Join(s.logStateDir(logID), "unverified_sths")
return loadSTHsFromDir(sthsDirPath)
}

View File

@ -31,7 +31,7 @@ func healthCheckLog(ctx context.Context, config *Config, ctlog *loglist.Log) err
return nil
}
if state.VerifiedSTH != nil && time.Since(state.VerifiedSTH.TimestampTime()) < config.HealthCheckInterval {
if time.Since(state.LastSuccess) < config.HealthCheckInterval {
return nil
}
@ -42,8 +42,9 @@ func healthCheckLog(ctx context.Context, config *Config, ctlog *loglist.Log) err
if len(sths) == 0 {
info := &StaleSTHInfo{
Log: ctlog,
LatestSTH: state.VerifiedSTH,
Log: ctlog,
LastSuccess: state.LastSuccess,
LatestSTH: state.VerifiedSTH,
}
if err := config.State.NotifyHealthCheckFailure(ctx, ctlog, info); err != nil {
return fmt.Errorf("error notifying about stale STH: %w", err)
@ -68,13 +69,14 @@ type HealthCheckFailure interface {
}
type StaleSTHInfo struct {
Log *loglist.Log
LatestSTH *cttypes.SignedTreeHead // may be nil
Log *loglist.Log
LastSuccess time.Time
LatestSTH *cttypes.SignedTreeHead // may be nil
}
type BacklogInfo struct {
Log *loglist.Log
LatestSTH *cttypes.SignedTreeHead
LatestSTH *StoredSTH
Position uint64
}
@ -90,7 +92,7 @@ func (e *BacklogInfo) Backlog() uint64 {
}
func (e *StaleSTHInfo) Summary() string {
return fmt.Sprintf("%s is out-of-date", e.Log.GetMonitoringURL())
return fmt.Sprintf("Unable to contact %s since %s", e.Log.GetMonitoringURL(), e.LastSuccess)
}
func (e *BacklogInfo) Summary() string {
return fmt.Sprintf("Backlog of size %d from %s", e.Backlog(), e.Log.GetMonitoringURL())
@ -101,12 +103,12 @@ func (e *StaleLogListInfo) Summary() string {
func (e *StaleSTHInfo) Text() string {
text := new(strings.Builder)
fmt.Fprintf(text, "certspotter has been unable to get up-to-date information about %s. Consequentially, certspotter may fail to notify you about certificates in this log.\n", e.Log.GetMonitoringURL())
fmt.Fprintf(text, "certspotter has been unable to contact %s since %s. Consequentially, certspotter may fail to notify you about certificates in this log.\n", e.Log.GetMonitoringURL(), e.LastSuccess)
fmt.Fprintf(text, "\n")
fmt.Fprintf(text, "For details, see certspotter's stderr output.\n")
fmt.Fprintf(text, "\n")
if e.LatestSTH != nil {
fmt.Fprintf(text, "Latest known log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.TimestampTime())
fmt.Fprintf(text, "Latest known log size = %d\n", e.LatestSTH.TreeSize)
} else {
fmt.Fprintf(text, "Latest known log size = none\n")
}
@ -118,7 +120,7 @@ func (e *BacklogInfo) Text() string {
fmt.Fprintf(text, "\n")
fmt.Fprintf(text, "For more details, see certspotter's stderr output.\n")
fmt.Fprintf(text, "\n")
fmt.Fprintf(text, "Current log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.TimestampTime())
fmt.Fprintf(text, "Current log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.StoredAt)
fmt.Fprintf(text, "Current position = %d\n", e.Position)
fmt.Fprintf(text, " Backlog = %d\n", e.Backlog())
return text.String()

View File

@ -104,9 +104,6 @@ func getAndVerifySTH(ctx context.Context, ctlog *loglist.Log, client ctclient.Lo
if err := ctcrypto.PublicKey(ctlog.Key).Verify(ctcrypto.SignatureInputForSTH(sth), sth.Signature); err != nil {
return nil, "", fmt.Errorf("STH has invalid signature: %w", err)
}
if now := time.Now(); sth.TimestampTime().After(now) {
return nil, "", fmt.Errorf("STH timestamp %s is after current time %s (either log is misbehaving or your system clock is incorrect)", sth.TimestampTime(), now)
}
return sth, url, nil
}
@ -222,12 +219,14 @@ func monitorLogContinously(ctx context.Context, config *Config, ctlog *loglist.L
DownloadPosition: tree,
VerifiedPosition: tree,
VerifiedSTH: sth,
LastSuccess: time.Now(),
}
} else {
state = &LogState{
DownloadPosition: merkletree.EmptyCollapsedTree(),
VerifiedPosition: merkletree.EmptyCollapsedTree(),
VerifiedSTH: nil,
LastSuccess: time.Now(),
}
}
if config.Verbose {
@ -303,8 +302,8 @@ func getSTHWorker(ctx context.Context, config *Config, ctlog *loglist.Log, clien
type batch struct {
number uint64
begin, end uint64
sths []*cttypes.SignedTreeHead // STHs with sizes in range [begin,end], sorted by TreeSize
entries []ctclient.Entry // in range [begin,end)
sths []*StoredSTH // STHs with sizes in range [begin,end], sorted by TreeSize
entries []ctclient.Entry // in range [begin,end)
}
func generateBatchesWorker(ctx context.Context, config *Config, ctlog *loglist.Log, position uint64, batches chan<- *batch) error {
@ -317,7 +316,7 @@ func generateBatchesWorker(ctx context.Context, config *Config, ctlog *loglist.L
}
for len(sths) > 0 && sths[0].TreeSize < position {
// TODO-4: audit sths[0] against log's verified STH
if err := config.State.RemoveSTH(ctx, ctlog.LogID, sths[0]); err != nil {
if err := config.State.RemoveSTH(ctx, ctlog.LogID, &sths[0].SignedTreeHead); err != nil {
return fmt.Errorf("error removing STH: %w", err)
}
sths = sths[1:]
@ -335,30 +334,30 @@ func generateBatchesWorker(ctx context.Context, config *Config, ctlog *loglist.L
return ctx.Err()
}
// return the earliest STH timestamp within the right-most tile
func tileEarliestTimestamp(sths []*cttypes.SignedTreeHead) time.Time {
// return the time at which the right-most tile indicated by sths was discovered
func tileDiscoveryTime(sths []*StoredSTH) time.Time {
largestSTH, sths := sths[len(sths)-1], sths[:len(sths)-1]
tileNumber := largestSTH.TreeSize / ctclient.StaticTileWidth
earliest := largestSTH.TimestampTime()
storedAt := largestSTH.StoredAt
for _, sth := range slices.Backward(sths) {
if sth.TreeSize/ctclient.StaticTileWidth != tileNumber {
break
}
if timestamp := sth.TimestampTime(); timestamp.Before(earliest) {
earliest = timestamp
if sth.StoredAt.Before(storedAt) {
storedAt = sth.StoredAt
}
}
return earliest
return storedAt
}
func generateBatches(ctx context.Context, ctlog *loglist.Log, position uint64, number uint64, sths []*cttypes.SignedTreeHead, batches chan<- *batch) (uint64, uint64, error) {
func generateBatches(ctx context.Context, ctlog *loglist.Log, position uint64, number uint64, sths []*StoredSTH, batches chan<- *batch) (uint64, uint64, error) {
downloadJobSize := downloadJobSize(ctlog)
if len(sths) == 0 {
return position, number, nil
}
largestSTH := sths[len(sths)-1]
treeSize := largestSTH.TreeSize
if ctlog.IsStaticCTAPI() && time.Since(tileEarliestTimestamp(sths)) < 5*time.Minute {
if ctlog.IsStaticCTAPI() && time.Since(tileDiscoveryTime(sths)) < 5*time.Minute {
// Round down to the tile boundary to avoid downloading a partial tile that was recently discovered
// In a future invocation of this function, either enough time will have passed that this code path will be skipped, or the log will have grown and treeSize will be rounded to a larger tile boundary
treeSize -= treeSize % ctclient.StaticTileWidth
@ -471,17 +470,18 @@ func saveStateWorker(ctx context.Context, config *Config, ctlog *loglist.Log, st
batch.sths = batch.sths[1:]
if sth.RootHash != rootHash {
return &verifyEntriesError{
sth: sth,
sth: &sth.SignedTreeHead,
entriesRootHash: rootHash,
}
}
state.advanceVerifiedPosition()
state.VerifiedSTH = sth
state.LastSuccess = sth.StoredAt
state.VerifiedSTH = &sth.SignedTreeHead
if err := config.State.StoreLogState(ctx, ctlog.LogID, state); err != nil {
return fmt.Errorf("error storing log state: %w", err)
}
// don't remove the STH until state has been durably stored
if err := config.State.RemoveSTH(ctx, ctlog.LogID, sth); err != nil {
if err := config.State.RemoveSTH(ctx, ctlog.LogID, &sth.SignedTreeHead); err != nil {
return fmt.Errorf("error removing verified STH: %w", err)
}
}

View File

@ -11,6 +11,8 @@ package monitor
import (
"context"
"time"
"software.sslmate.com/src/certspotter/cttypes"
"software.sslmate.com/src/certspotter/loglist"
"software.sslmate.com/src/certspotter/merkletree"
@ -20,6 +22,7 @@ type LogState struct {
DownloadPosition *merkletree.CollapsedTree `json:"download_position"`
VerifiedPosition *merkletree.CollapsedTree `json:"verified_position"`
VerifiedSTH *cttypes.SignedTreeHead `json:"verified_sth"`
LastSuccess time.Time `json:"last_success"`
}
func (state *LogState) rewindDownloadPosition() {
@ -55,7 +58,7 @@ type StateProvider interface {
// Load all STHs for this log previously stored with StoreSTH.
// The returned slice must be sorted by tree size.
LoadSTHs(context.Context, LogID) ([]*cttypes.SignedTreeHead, error)
LoadSTHs(context.Context, LogID) ([]*StoredSTH, error)
// Remove an STH so it is no longer returned by LoadSTHs.
RemoveSTH(context.Context, LogID, *cttypes.SignedTreeHead) error

View File

@ -20,6 +20,7 @@ import (
"software.sslmate.com/src/certspotter/merkletree"
"strconv"
"strings"
"time"
)
func readVersion(stateDir string) (int, error) {
@ -79,6 +80,7 @@ func migrateLogStateDirV1(dir string) error {
DownloadPosition: &tree,
VerifiedPosition: &tree,
VerifiedSTH: &sth,
LastSuccess: time.Now(),
}
if err := writeJSONFile(filepath.Join(dir, "state.json"), stateFile, 0666); err != nil {
return err

View File

@ -17,6 +17,7 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
@ -24,16 +25,22 @@ import (
"software.sslmate.com/src/certspotter/cttypes"
"strconv"
"strings"
"time"
)
func loadSTHsFromDir(dirPath string) ([]*cttypes.SignedTreeHead, error) {
type StoredSTH struct {
cttypes.SignedTreeHead
StoredAt time.Time `json:"stored_at"` // time at which the STH was first stored
}
func loadSTHsFromDir(dirPath string) ([]*StoredSTH, error) {
entries, err := os.ReadDir(dirPath)
if errors.Is(err, fs.ErrNotExist) {
return []*cttypes.SignedTreeHead{}, nil
return []*StoredSTH{}, nil
} else if err != nil {
return nil, err
}
sths := make([]*cttypes.SignedTreeHead, 0, len(entries))
sths := make([]*StoredSTH, 0, len(entries))
for _, entry := range entries {
filename := entry.Name()
if strings.HasPrefix(filename, ".") || !strings.HasSuffix(filename, ".json") {
@ -45,17 +52,30 @@ func loadSTHsFromDir(dirPath string) ([]*cttypes.SignedTreeHead, error) {
}
sths = append(sths, sth)
}
slices.SortFunc(sths, func(a, b *cttypes.SignedTreeHead) int { return cmp.Compare(a.TreeSize, b.TreeSize) })
slices.SortFunc(sths, func(a, b *StoredSTH) int { return cmp.Compare(a.TreeSize, b.TreeSize) })
return sths, nil
}
func readSTHFile(filePath string) (*cttypes.SignedTreeHead, error) {
fileBytes, err := os.ReadFile(filePath)
func readSTHFile(filePath string) (*StoredSTH, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, err
}
sth := new(cttypes.SignedTreeHead)
if err := json.Unmarshal(fileBytes, sth); err != nil {
defer file.Close()
info, err := file.Stat()
if err != nil {
return nil, err
}
sth := &StoredSTH{
StoredAt: info.ModTime(),
}
fileBytes, err := io.ReadAll(file)
if err != nil {
return nil, fmt.Errorf("error reading %s: %w", filePath, err)
}
if err := json.Unmarshal(fileBytes, &sth.SignedTreeHead); err != nil {
return nil, fmt.Errorf("error parsing %s: %w", filePath, err)
}
return sth, nil
@ -64,6 +84,8 @@ func readSTHFile(filePath string) (*cttypes.SignedTreeHead, error) {
func storeSTHInDir(dirPath string, sth *cttypes.SignedTreeHead) error {
filePath := filepath.Join(dirPath, sthFilename(sth))
if fileExists(filePath) {
// If the file already exists, we don't want its mtime to change
// because StoredSTH.StoredAt needs to be the time the STH was *first* stored.
return nil
}
return writeJSONFile(filePath, sth, 0666)