chore(websocket): logging and metrics improvement (#347)

* chore(websocket): only show warning if websocket is closed abnormally

* chore(websocket): add counter for ping requests received
This commit is contained in:
Aveline
2025-04-10 15:53:26 +02:00
committed by Siyuan Miao
parent 66a3352e5d
commit dc1ce03697
2 changed files with 50 additions and 9 deletions

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
@@ -59,6 +60,13 @@ var (
},
[]string{"type", "source"},
)
metricConnectionLastPingReceivedTimestamp = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "jetkvm_connection_last_ping_received_timestamp",
Help: "The timestamp when the last ping request was received",
},
[]string{"type", "source"},
)
metricConnectionLastPingDuration = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "jetkvm_connection_last_ping_duration",
@@ -76,16 +84,23 @@ var (
},
[]string{"type", "source"},
)
metricConnectionTotalPingCount = promauto.NewCounterVec(
metricConnectionTotalPingSentCount = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "jetkvm_connection_total_ping_count",
Name: "jetkvm_connection_total_ping_sent",
Help: "The total number of pings sent to the connection",
},
[]string{"type", "source"},
)
metricConnectionTotalPingReceivedCount = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "jetkvm_connection_total_ping_received",
Help: "The total number of pings received from the connection",
},
[]string{"type", "source"},
)
metricConnectionSessionRequestCount = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "jetkvm_connection_session_total_request_count",
Name: "jetkvm_connection_session_total_requests",
Help: "The total number of session requests received",
},
[]string{"type", "source"},
@@ -131,6 +146,8 @@ func wsResetMetrics(established bool, sourceType string, source string) {
metricConnectionLastPingTimestamp.WithLabelValues(sourceType, source).Set(-1)
metricConnectionLastPingDuration.WithLabelValues(sourceType, source).Set(-1)
metricConnectionLastPingReceivedTimestamp.WithLabelValues(sourceType, source).Set(-1)
metricConnectionLastSessionRequestTimestamp.WithLabelValues(sourceType, source).Set(-1)
metricConnectionLastSessionRequestDuration.WithLabelValues(sourceType, source).Set(-1)
@@ -277,20 +294,29 @@ func runWebsocketClient() error {
HTTPHeader: header,
OnPingReceived: func(ctx context.Context, payload []byte) bool {
websocketLogger.Infof("ping frame received: %v, source: %s, sourceType: cloud", payload, wsURL.Host)
metricConnectionTotalPingReceivedCount.WithLabelValues("cloud", wsURL.Host).Inc()
metricConnectionLastPingReceivedTimestamp.WithLabelValues("cloud", wsURL.Host).SetToCurrentTime()
return true
},
})
// if the context is canceled, we don't want to return an error
if err != nil {
if errors.Is(err, context.Canceled) {
cloudLogger.Infof("websocket connection canceled")
return nil
}
return err
}
defer c.CloseNow() //nolint:errcheck
cloudLogger.Infof("websocket connected to %s", wsURL)
// set the metrics when we successfully connect to the cloud.
wsResetMetrics(true, "cloud", "")
wsResetMetrics(true, "cloud", wsURL.Host)
// we don't have a source for the cloud connection
return handleWebRTCSignalWsMessages(c, true, "")
return handleWebRTCSignalWsMessages(c, true, wsURL.Host)
}
func authenticateSession(ctx context.Context, c *websocket.Conn, req WebRTCSessionRequest) error {
@@ -379,9 +405,6 @@ func handleSessionRequest(ctx context.Context, c *websocket.Conn, req WebRTCSess
func RunWebsocketClient() {
for {
// reset the metrics when we start the websocket client.
wsResetMetrics(false, "cloud", "")
// If the cloud token is not set, we don't need to run the websocket client.
if config.CloudToken == "" {
time.Sleep(5 * time.Second)