Save failed healthchecks, and put path in $TEXT_FILENAME

To allow scripts to access them.
This commit is contained in:
Andrew Ayer 2023-02-19 08:45:01 -05:00
parent bd2bab5fcb
commit 152f4341d6
4 changed files with 51 additions and 12 deletions

View File

@ -16,6 +16,7 @@ import (
"golang.org/x/sync/errgroup"
"log"
insecurerand "math/rand"
"path/filepath"
"software.sslmate.com/src/certspotter/loglist"
"time"
)
@ -50,12 +51,18 @@ type daemon struct {
func (daemon *daemon) healthCheck(ctx context.Context) error {
if time.Since(daemon.logsLoadedAt) >= daemon.config.HealthCheckInterval {
if err := notify(ctx, daemon.config, &staleLogListEvent{
textPath := filepath.Join(daemon.config.StateDir, "healthchecks", healthCheckFilename())
event := &staleLogListEvent{
Source: daemon.config.LogListSource,
LastSuccess: daemon.logsLoadedAt,
LastError: daemon.logListError,
LastErrorTime: daemon.logListErrorAt,
}); err != nil {
TextPath: textPath,
}
if err := event.save(); err != nil {
return fmt.Errorf("error saving stale log list event: %w", err)
}
if err := notify(ctx, daemon.config, event); err != nil {
return fmt.Errorf("error notifying about stale log list: %w", err)
}
}

View File

@ -22,11 +22,16 @@ import (
"software.sslmate.com/src/certspotter/loglist"
)
func healthCheckFilename() string {
return time.Now().UTC().Format(time.RFC3339) + ".txt"
}
func healthCheckLog(ctx context.Context, config *Config, ctlog *loglist.Log) error {
var (
stateDirPath = filepath.Join(config.StateDir, "logs", ctlog.LogID.Base64URLString())
stateFilePath = filepath.Join(stateDirPath, "state.json")
sthsDirPath = filepath.Join(stateDirPath, "unverified_sths")
textPath = filepath.Join(stateDirPath, "healthchecks", healthCheckFilename())
)
state, err := loadStateFile(stateFilePath)
if errors.Is(err, fs.ErrNotExist) {
@ -45,19 +50,29 @@ func healthCheckLog(ctx context.Context, config *Config, ctlog *loglist.Log) err
}
if len(sths) == 0 {
if err := notify(ctx, config, &staleSTHEvent{
event := &staleSTHEvent{
Log: ctlog,
LastSuccess: state.LastSuccess,
LatestSTH: state.VerifiedSTH,
}); err != nil {
TextPath: textPath,
}
if err := event.save(); err != nil {
return fmt.Errorf("error saving stale STH event: %w", err)
}
if err := notify(ctx, config, event); err != nil {
return fmt.Errorf("error notifying about stale STH: %w", err)
}
} else {
if err := notify(ctx, config, &backlogEvent{
event := &backlogEvent{
Log: ctlog,
LatestSTH: sths[len(sths)-1],
Position: state.DownloadPosition.Size(),
}); err != nil {
TextPath: textPath,
}
if err := event.save(); err != nil {
return fmt.Errorf("error saving backlog event: %w", err)
}
if err := notify(ctx, config, event); err != nil {
return fmt.Errorf("error notifying about backlog: %w", err)
}
}
@ -69,17 +84,20 @@ type staleSTHEvent struct {
Log *loglist.Log
LastSuccess time.Time
LatestSTH *ct.SignedTreeHead // may be nil
TextPath string
}
type backlogEvent struct {
Log *loglist.Log
LatestSTH *ct.SignedTreeHead
Position uint64
TextPath string
}
type staleLogListEvent struct {
Source string
LastSuccess time.Time
LastError string
LastErrorTime time.Time
TextPath string
}
func (e *backlogEvent) Backlog() uint64 {
@ -89,18 +107,21 @@ func (e *backlogEvent) Backlog() uint64 {
func (e *staleSTHEvent) Environ() []string {
return []string{
"EVENT=error",
"TEXT_FILENAME=" + e.TextPath,
"SUMMARY=" + fmt.Sprintf("unable to contact %s since %s", e.Log.URL, e.LastSuccess),
}
}
func (e *backlogEvent) Environ() []string {
return []string{
"EVENT=error",
"TEXT_FILENAME=" + e.TextPath,
"SUMMARY=" + fmt.Sprintf("backlog of size %d from %s", e.Backlog(), e.Log.URL),
}
}
func (e *staleLogListEvent) Environ() []string {
return []string{
"EVENT=error",
"TEXT_FILENAME=" + e.TextPath,
"SUMMARY=" + fmt.Sprintf("unable to retrieve log list since %s: %s", e.LastSuccess, e.LastError),
}
}
@ -149,4 +170,14 @@ func (e *staleLogListEvent) Text() string {
return text.String()
}
func (e *staleSTHEvent) save() error {
return writeTextFile(e.TextPath, e.Text(), 0666)
}
func (e *backlogEvent) save() error {
return writeTextFile(e.TextPath, e.Text(), 0666)
}
func (e *staleLogListEvent) save() error {
return writeTextFile(e.TextPath, e.Text(), 0666)
}
// TODO-3: make the errors more actionable

View File

@ -78,8 +78,9 @@ func monitorLog(ctx context.Context, config *Config, ctlog *loglist.Log, logClie
stateFilePath = filepath.Join(stateDirPath, "state.json")
sthsDirPath = filepath.Join(stateDirPath, "unverified_sths")
malformedDirPath = filepath.Join(stateDirPath, "malformed_entries")
healthchecksDirPath = filepath.Join(stateDirPath, "healthchecks")
)
for _, dirPath := range []string{stateDirPath, sthsDirPath, malformedDirPath} {
for _, dirPath := range []string{stateDirPath, sthsDirPath, malformedDirPath, healthchecksDirPath} {
if err := os.Mkdir(dirPath, 0777); err != nil && !errors.Is(err, fs.ErrExist) {
return fmt.Errorf("error creating state directory: %w", err)
}

View File

@ -145,7 +145,7 @@ func prepareStateDir(stateDir string) error {
return fmt.Errorf("%s was created by a newer version of certspotter; upgrade to the latest version of certspotter or remove this directory to start from scratch", stateDir)
}
for _, subdir := range []string{"certs", "logs"} {
for _, subdir := range []string{"certs", "logs", "healthchecks"} {
if err := os.Mkdir(filepath.Join(stateDir, subdir), 0777); err != nil && !errors.Is(err, fs.ErrExist) {
return err
}