mirror of
				https://github.com/SSLMate/certspotter.git
				synced 2025-07-03 10:47:17 +02:00 
			
		
		
		
	Implement monitor health check
This commit is contained in:
		
							parent
							
								
									fe4ef6b05d
								
							
						
					
					
						commit
						e27e355b75
					
				@ -35,6 +35,7 @@ func reloadLogListInterval() time.Duration {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type task struct {
 | 
			
		||||
	log  *loglist.Log
 | 
			
		||||
	stop context.CancelFunc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -44,9 +45,27 @@ type daemon struct {
 | 
			
		||||
	tasks        map[LogID]task
 | 
			
		||||
	logsLoadedAt time.Time
 | 
			
		||||
	logListToken *loglist.ModificationToken
 | 
			
		||||
	logListError string
 | 
			
		||||
	logListErrorAt time.Time
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (daemon *daemon) healthCheck(ctx context.Context) error { // TODO-2
 | 
			
		||||
func (daemon *daemon) healthCheck(ctx context.Context) error {
 | 
			
		||||
	if time.Since(daemon.logsLoadedAt) >= healthCheckInterval {
 | 
			
		||||
		if err := notify(ctx, daemon.config, &staleLogListEvent{
 | 
			
		||||
			Source:        daemon.config.LogListSource,
 | 
			
		||||
			LastSuccess:   daemon.logsLoadedAt,
 | 
			
		||||
			LastError:     daemon.logListError,
 | 
			
		||||
			LastErrorTime: daemon.logListErrorAt,
 | 
			
		||||
		}); err != nil {
 | 
			
		||||
			return fmt.Errorf("error notifying about stale log list: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, task := range daemon.tasks {
 | 
			
		||||
		if err := healthCheckLog(ctx, daemon.config, task.log); err != nil {
 | 
			
		||||
			return fmt.Errorf("error checking health of log %q: %w", task.log.URL, err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -64,7 +83,7 @@ func (daemon *daemon) startTask(ctx context.Context, ctlog *loglist.Log) task {
 | 
			
		||||
			return fmt.Errorf("error while monitoring %s: %w", ctlog.URL, err)
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
	return task{stop: cancel}
 | 
			
		||||
	return task{log: ctlog, stop: cancel}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (daemon *daemon) loadLogList(ctx context.Context) error {
 | 
			
		||||
@ -124,6 +143,8 @@ func (daemon *daemon) run(ctx context.Context) error {
 | 
			
		||||
		case <-ctx.Done():
 | 
			
		||||
		case <-reloadLogListTicker.C:
 | 
			
		||||
			if err := daemon.loadLogList(ctx); err != nil {
 | 
			
		||||
				daemon.logListError = err.Error()
 | 
			
		||||
				daemon.logListErrorAt = time.Now()
 | 
			
		||||
				recordError(fmt.Errorf("error reloading log list (will try again later): %w", err))
 | 
			
		||||
			}
 | 
			
		||||
			reloadLogListTicker.Reset(reloadLogListInterval())
 | 
			
		||||
 | 
			
		||||
@ -10,7 +10,11 @@
 | 
			
		||||
package monitor
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"context"
 | 
			
		||||
	"errors"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io/fs"
 | 
			
		||||
	"path/filepath"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
@ -18,6 +22,49 @@ import (
 | 
			
		||||
	"software.sslmate.com/src/certspotter/loglist"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func healthCheckLog(ctx context.Context, config *Config, ctlog *loglist.Log) error {
 | 
			
		||||
	var (
 | 
			
		||||
		stateDirPath  = filepath.Join(config.StateDir, "logs", ctlog.LogID.Base64URLString())
 | 
			
		||||
		stateFilePath = filepath.Join(stateDirPath, "state.json")
 | 
			
		||||
		sthsDirPath   = filepath.Join(stateDirPath, "unverified_sths")
 | 
			
		||||
	)
 | 
			
		||||
	state, err := loadStateFile(stateFilePath)
 | 
			
		||||
	if errors.Is(err, fs.ErrNotExist) {
 | 
			
		||||
		return nil
 | 
			
		||||
	} else if err != nil {
 | 
			
		||||
		return fmt.Errorf("error loading state file: %w", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if time.Since(state.LastSuccess) < healthCheckInterval {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sths, err := loadSTHsFromDir(sthsDirPath)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return fmt.Errorf("error loading STHs directory: %w", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(sths) == 0 {
 | 
			
		||||
		if err := notify(ctx, config, &staleSTHEvent{
 | 
			
		||||
			Log:         ctlog,
 | 
			
		||||
			LastSuccess: state.LastSuccess,
 | 
			
		||||
			LatestSTH:   state.VerifiedSTH,
 | 
			
		||||
		}); err != nil {
 | 
			
		||||
			return fmt.Errorf("error notifying about stale STH: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		if err := notify(ctx, config, &backlogEvent{
 | 
			
		||||
			Log:       ctlog,
 | 
			
		||||
			LatestSTH: sths[len(sths)-1],
 | 
			
		||||
			Position:  state.DownloadPosition.Size(),
 | 
			
		||||
		}); err != nil {
 | 
			
		||||
			return fmt.Errorf("error notifying about backlog: %w", err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type staleSTHEvent struct {
 | 
			
		||||
	Log         *loglist.Log
 | 
			
		||||
	LastSuccess time.Time
 | 
			
		||||
@ -26,7 +73,6 @@ type staleSTHEvent struct {
 | 
			
		||||
type backlogEvent struct {
 | 
			
		||||
	Log       *loglist.Log
 | 
			
		||||
	LatestSTH *ct.SignedTreeHead
 | 
			
		||||
	Backlog   uint64
 | 
			
		||||
	Position  uint64
 | 
			
		||||
}
 | 
			
		||||
type staleLogListEvent struct {
 | 
			
		||||
@ -36,6 +82,10 @@ type staleLogListEvent struct {
 | 
			
		||||
	LastErrorTime time.Time
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (e *backlogEvent) Backlog() uint64 {
 | 
			
		||||
	return e.LatestSTH.TreeSize - e.Position
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (e *staleSTHEvent) Environ() []string {
 | 
			
		||||
	return []string{
 | 
			
		||||
		"EVENT=error",
 | 
			
		||||
@ -45,7 +95,7 @@ func (e *staleSTHEvent) Environ() []string {
 | 
			
		||||
func (e *backlogEvent) Environ() []string {
 | 
			
		||||
	return []string{
 | 
			
		||||
		"EVENT=error",
 | 
			
		||||
		"SUMMARY=" + fmt.Sprintf("backlog of size %d from %s", e.Backlog, e.Log.URL),
 | 
			
		||||
		"SUMMARY=" + fmt.Sprintf("backlog of size %d from %s", e.Backlog(), e.Log.URL),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
func (e *staleLogListEvent) Environ() []string {
 | 
			
		||||
@ -59,7 +109,7 @@ func (e *staleSTHEvent) EmailSubject() string {
 | 
			
		||||
	return fmt.Sprintf("[certspotter] Unable to contact %s since %s", e.Log.URL, e.LastSuccess)
 | 
			
		||||
}
 | 
			
		||||
func (e *backlogEvent) EmailSubject() string {
 | 
			
		||||
	return fmt.Sprintf("[certspotter] Backlog of size %d from %s", e.Backlog, e.Log.URL)
 | 
			
		||||
	return fmt.Sprintf("[certspotter] Backlog of size %d from %s", e.Backlog(), e.Log.URL)
 | 
			
		||||
}
 | 
			
		||||
func (e *staleLogListEvent) EmailSubject() string {
 | 
			
		||||
	return fmt.Sprintf("[certspotter] Unable to retrieve log list since %s", e.LastSuccess)
 | 
			
		||||
@ -72,7 +122,7 @@ func (e *staleSTHEvent) Text() string {
 | 
			
		||||
	fmt.Fprintf(text, "For details, see certspotter's stderr output.\n")
 | 
			
		||||
	fmt.Fprintf(text, "\n")
 | 
			
		||||
	if e.LatestSTH != nil {
 | 
			
		||||
		fmt.Fprintf(text, "Latest known log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.Timestamp)
 | 
			
		||||
		fmt.Fprintf(text, "Latest known log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.TimestampTime())
 | 
			
		||||
	} else {
 | 
			
		||||
		fmt.Fprintf(text, "Latest known log size = none\n")
 | 
			
		||||
	}
 | 
			
		||||
@ -84,9 +134,9 @@ func (e *backlogEvent) Text() string {
 | 
			
		||||
	fmt.Fprintf(text, "\n")
 | 
			
		||||
	fmt.Fprintf(text, "For more details, see certspotter's stderr output.\n")
 | 
			
		||||
	fmt.Fprintf(text, "\n")
 | 
			
		||||
	fmt.Fprintf(text, "Current log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.Timestamp)
 | 
			
		||||
	fmt.Fprintf(text, "Current log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.TimestampTime())
 | 
			
		||||
	fmt.Fprintf(text, "Current position = %d\n", e.Position)
 | 
			
		||||
	fmt.Fprintf(text, "         Backlog = %d\n", e.Backlog)
 | 
			
		||||
	fmt.Fprintf(text, "         Backlog = %d\n", e.Backlog())
 | 
			
		||||
	return text.String()
 | 
			
		||||
}
 | 
			
		||||
func (e *staleLogListEvent) Text() string {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user