2023-02-03 20:05:04 +01:00
// Copyright (C) 2023 Opsmate, Inc.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License, v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
//
// This software is distributed WITHOUT A WARRANTY OF ANY KIND.
// See the Mozilla Public License for details.
package monitor
import (
2023-02-06 03:08:01 +01:00
"context"
2023-02-03 20:05:04 +01:00
"fmt"
"strings"
"time"
"software.sslmate.com/src/certspotter/ct"
"software.sslmate.com/src/certspotter/loglist"
)
2023-02-19 14:45:01 +01:00
func healthCheckFilename ( ) string {
return time . Now ( ) . UTC ( ) . Format ( time . RFC3339 ) + ".txt"
}
2023-02-06 03:08:01 +01:00
func healthCheckLog ( ctx context . Context , config * Config , ctlog * loglist . Log ) error {
2024-04-04 02:06:00 +02:00
state , err := config . State . LoadLogState ( ctx , ctlog . LogID )
if err != nil {
return fmt . Errorf ( "error loading log state: %w" , err )
} else if state == nil {
2023-02-06 03:08:01 +01:00
return nil
}
2023-02-06 15:18:37 +01:00
if time . Since ( state . LastSuccess ) < config . HealthCheckInterval {
2023-02-06 03:08:01 +01:00
return nil
}
2024-04-04 02:06:00 +02:00
sths , err := config . State . LoadSTHs ( ctx , ctlog . LogID )
2023-02-06 03:08:01 +01:00
if err != nil {
2024-04-04 02:06:00 +02:00
return fmt . Errorf ( "error loading STHs: %w" , err )
2023-02-06 03:08:01 +01:00
}
if len ( sths ) == 0 {
2024-04-04 02:06:00 +02:00
info := & StaleSTHInfo {
2024-04-04 13:53:35 +02:00
Log : ctlog ,
2023-02-06 03:08:01 +01:00
LastSuccess : state . LastSuccess ,
LatestSTH : state . VerifiedSTH ,
2023-02-19 14:45:01 +01:00
}
2024-04-04 13:53:35 +02:00
if err := config . State . NotifyHealthCheckFailure ( ctx , ctlog , info ) ; err != nil {
2023-02-06 03:08:01 +01:00
return fmt . Errorf ( "error notifying about stale STH: %w" , err )
}
} else {
2024-04-04 02:06:00 +02:00
info := & BacklogInfo {
2024-04-04 13:53:35 +02:00
Log : ctlog ,
2023-02-06 03:08:01 +01:00
LatestSTH : sths [ len ( sths ) - 1 ] ,
Position : state . DownloadPosition . Size ( ) ,
2023-02-19 14:45:01 +01:00
}
2024-04-04 13:53:35 +02:00
if err := config . State . NotifyHealthCheckFailure ( ctx , ctlog , info ) ; err != nil {
2023-02-06 03:08:01 +01:00
return fmt . Errorf ( "error notifying about backlog: %w" , err )
}
}
return nil
}
2024-04-04 02:06:00 +02:00
type HealthCheckFailure interface {
Summary ( ) string
Text ( ) string
}
type StaleSTHInfo struct {
2024-04-04 13:53:35 +02:00
Log * loglist . Log
2023-02-03 20:05:04 +01:00
LastSuccess time . Time
LatestSTH * ct . SignedTreeHead // may be nil
}
2024-04-04 02:06:00 +02:00
type BacklogInfo struct {
2024-04-04 13:53:35 +02:00
Log * loglist . Log
2023-02-03 20:05:04 +01:00
LatestSTH * ct . SignedTreeHead
Position uint64
}
2024-04-04 02:06:00 +02:00
type StaleLogListInfo struct {
2023-02-03 20:05:04 +01:00
Source string
LastSuccess time . Time
LastError string
LastErrorTime time . Time
}
2024-04-04 02:06:00 +02:00
func ( e * BacklogInfo ) Backlog ( ) uint64 {
2023-02-06 03:08:01 +01:00
return e . LatestSTH . TreeSize - e . Position
}
2024-04-04 02:06:00 +02:00
func ( e * StaleSTHInfo ) Summary ( ) string {
2024-04-04 13:53:35 +02:00
return fmt . Sprintf ( "Unable to contact %s since %s" , e . Log . URL , e . LastSuccess )
2023-02-03 20:05:04 +01:00
}
2024-04-04 02:06:00 +02:00
func ( e * BacklogInfo ) Summary ( ) string {
2024-04-04 13:53:35 +02:00
return fmt . Sprintf ( "Backlog of size %d from %s" , e . Backlog ( ) , e . Log . URL )
2023-02-03 20:05:04 +01:00
}
2024-04-04 02:06:00 +02:00
func ( e * StaleLogListInfo ) Summary ( ) string {
2023-02-19 14:48:30 +01:00
return fmt . Sprintf ( "Unable to retrieve log list since %s" , e . LastSuccess )
2023-02-03 20:05:04 +01:00
}
2024-04-04 02:06:00 +02:00
func ( e * StaleSTHInfo ) Text ( ) string {
2023-02-03 20:05:04 +01:00
text := new ( strings . Builder )
2024-04-04 13:53:35 +02:00
fmt . Fprintf ( text , "certspotter has been unable to contact %s since %s. Consequentially, certspotter may fail to notify you about certificates in this log.\n" , e . Log . URL , e . LastSuccess )
2023-02-03 20:05:04 +01:00
fmt . Fprintf ( text , "\n" )
fmt . Fprintf ( text , "For details, see certspotter's stderr output.\n" )
fmt . Fprintf ( text , "\n" )
if e . LatestSTH != nil {
2023-02-06 03:08:01 +01:00
fmt . Fprintf ( text , "Latest known log size = %d (as of %s)\n" , e . LatestSTH . TreeSize , e . LatestSTH . TimestampTime ( ) )
2023-02-03 20:05:04 +01:00
} else {
fmt . Fprintf ( text , "Latest known log size = none\n" )
}
return text . String ( )
}
2024-04-04 02:06:00 +02:00
func ( e * BacklogInfo ) Text ( ) string {
2023-02-03 20:05:04 +01:00
text := new ( strings . Builder )
2024-04-04 13:53:35 +02:00
fmt . Fprintf ( text , "certspotter has been unable to download entries from %s in a timely manner. Consequentially, certspotter may be slow to notify you about certificates in this log.\n" , e . Log . URL )
2023-02-03 20:05:04 +01:00
fmt . Fprintf ( text , "\n" )
fmt . Fprintf ( text , "For more details, see certspotter's stderr output.\n" )
fmt . Fprintf ( text , "\n" )
2023-02-06 03:08:01 +01:00
fmt . Fprintf ( text , "Current log size = %d (as of %s)\n" , e . LatestSTH . TreeSize , e . LatestSTH . TimestampTime ( ) )
2023-02-03 20:05:04 +01:00
fmt . Fprintf ( text , "Current position = %d\n" , e . Position )
2023-02-06 03:08:01 +01:00
fmt . Fprintf ( text , " Backlog = %d\n" , e . Backlog ( ) )
2023-02-03 20:05:04 +01:00
return text . String ( )
}
2024-04-04 02:06:00 +02:00
func ( e * StaleLogListInfo ) Text ( ) string {
2023-02-03 20:05:04 +01:00
text := new ( strings . Builder )
fmt . Fprintf ( text , "certspotter has been unable to retrieve the log list from %s since %s.\n" , e . Source , e . LastSuccess )
fmt . Fprintf ( text , "\n" )
fmt . Fprintf ( text , "Last error (at %s): %s\n" , e . LastErrorTime , e . LastError )
fmt . Fprintf ( text , "\n" )
fmt . Fprintf ( text , "Consequentially, certspotter may not be monitoring all logs, and might fail to detect certificates.\n" )
return text . String ( )
}
// TODO-3: make the errors more actionable