mirror of
https://github.com/luckfox-eng29/kvm.git
synced 2026-01-18 03:28:19 +01:00
Feat/Trickle ice (#336)
* feat(cloud): Use Websocket signaling in cloud mode * refactor: Enhance WebRTC signaling and connection handling * refactor: Improve WebRTC connection management and logging in KvmIdRoute * refactor: Update PeerConnectionDisconnectedOverlay to use Card component for better UI structure * refactor: Standardize metric naming and improve websocket logging * refactor: Rename WebRTC signaling functions and update deployment script for debug version * fix: Handle error when writing new ICE candidate to WebRTC signaling channel * refactor: Rename signaling handler function for clarity * refactor: Remove old http local http endpoint * refactor: Improve metric help text and standardize comparison operator in KvmIdRoute * chore(websocket): use MetricVec instead of Metric to store metrics * fix conflicts * fix: use wss when the page is served over https * feat: Add app version header and update WebRTC signaling endpoint * fix: Handle error when writing device metadata to WebRTC signaling channel --------- Co-authored-by: Siyuan Miao <i@xswan.net>
This commit is contained in:
179
cloud.go
179
cloud.go
@@ -35,8 +35,8 @@ const (
|
||||
// CloudOidcRequestTimeout is the timeout for OIDC token verification requests
|
||||
// should be lower than the websocket response timeout set in cloud-api
|
||||
CloudOidcRequestTimeout = 10 * time.Second
|
||||
// CloudWebSocketPingInterval is the interval at which the websocket client sends ping messages to the cloud
|
||||
CloudWebSocketPingInterval = 15 * time.Second
|
||||
// WebsocketPingInterval is the interval at which the websocket client sends ping messages to the cloud
|
||||
WebsocketPingInterval = 15 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -52,59 +52,67 @@ var (
|
||||
Help: "The timestamp when the cloud connection was established",
|
||||
},
|
||||
)
|
||||
metricCloudConnectionLastPingTimestamp = promauto.NewGauge(
|
||||
metricConnectionLastPingTimestamp = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "jetkvm_cloud_connection_last_ping_timestamp",
|
||||
Name: "jetkvm_connection_last_ping_timestamp",
|
||||
Help: "The timestamp when the last ping response was received",
|
||||
},
|
||||
[]string{"type", "source"},
|
||||
)
|
||||
metricCloudConnectionLastPingDuration = promauto.NewGauge(
|
||||
metricConnectionLastPingDuration = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "jetkvm_cloud_connection_last_ping_duration",
|
||||
Name: "jetkvm_connection_last_ping_duration",
|
||||
Help: "The duration of the last ping response",
|
||||
},
|
||||
[]string{"type", "source"},
|
||||
)
|
||||
metricCloudConnectionPingDuration = promauto.NewHistogram(
|
||||
metricConnectionPingDuration = promauto.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "jetkvm_cloud_connection_ping_duration",
|
||||
Name: "jetkvm_connection_ping_duration",
|
||||
Help: "The duration of the ping response",
|
||||
Buckets: []float64{
|
||||
0.1, 0.5, 1, 10,
|
||||
},
|
||||
},
|
||||
[]string{"type", "source"},
|
||||
)
|
||||
metricCloudConnectionTotalPingCount = promauto.NewCounter(
|
||||
metricConnectionTotalPingCount = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "jetkvm_cloud_connection_total_ping_count",
|
||||
Help: "The total number of pings sent to the cloud",
|
||||
Name: "jetkvm_connection_total_ping_count",
|
||||
Help: "The total number of pings sent to the connection",
|
||||
},
|
||||
[]string{"type", "source"},
|
||||
)
|
||||
metricCloudConnectionSessionRequestCount = promauto.NewCounter(
|
||||
metricConnectionSessionRequestCount = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "jetkvm_cloud_connection_session_total_request_count",
|
||||
Help: "The total number of session requests received from the cloud",
|
||||
Name: "jetkvm_connection_session_total_request_count",
|
||||
Help: "The total number of session requests received",
|
||||
},
|
||||
[]string{"type", "source"},
|
||||
)
|
||||
metricCloudConnectionSessionRequestDuration = promauto.NewHistogram(
|
||||
metricConnectionSessionRequestDuration = promauto.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "jetkvm_cloud_connection_session_request_duration",
|
||||
Name: "jetkvm_connection_session_request_duration",
|
||||
Help: "The duration of session requests",
|
||||
Buckets: []float64{
|
||||
0.1, 0.5, 1, 10,
|
||||
},
|
||||
},
|
||||
[]string{"type", "source"},
|
||||
)
|
||||
metricCloudConnectionLastSessionRequestTimestamp = promauto.NewGauge(
|
||||
metricConnectionLastSessionRequestTimestamp = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "jetkvm_cloud_connection_last_session_request_timestamp",
|
||||
Name: "jetkvm_connection_last_session_request_timestamp",
|
||||
Help: "The timestamp of the last session request",
|
||||
},
|
||||
[]string{"type", "source"},
|
||||
)
|
||||
metricCloudConnectionLastSessionRequestDuration = promauto.NewGauge(
|
||||
metricConnectionLastSessionRequestDuration = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "jetkvm_cloud_connection_last_session_request_duration",
|
||||
Name: "jetkvm_connection_last_session_request_duration",
|
||||
Help: "The duration of the last session request",
|
||||
},
|
||||
[]string{"type", "source"},
|
||||
)
|
||||
metricCloudConnectionFailureCount = promauto.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
@@ -119,12 +127,16 @@ var (
|
||||
cloudDisconnectLock = &sync.Mutex{}
|
||||
)
|
||||
|
||||
func cloudResetMetrics(established bool) {
|
||||
metricCloudConnectionLastPingTimestamp.Set(-1)
|
||||
metricCloudConnectionLastPingDuration.Set(-1)
|
||||
func wsResetMetrics(established bool, sourceType string, source string) {
|
||||
metricConnectionLastPingTimestamp.WithLabelValues(sourceType, source).Set(-1)
|
||||
metricConnectionLastPingDuration.WithLabelValues(sourceType, source).Set(-1)
|
||||
|
||||
metricCloudConnectionLastSessionRequestTimestamp.Set(-1)
|
||||
metricCloudConnectionLastSessionRequestDuration.Set(-1)
|
||||
metricConnectionLastSessionRequestTimestamp.WithLabelValues(sourceType, source).Set(-1)
|
||||
metricConnectionLastSessionRequestDuration.WithLabelValues(sourceType, source).Set(-1)
|
||||
|
||||
if sourceType != "cloud" {
|
||||
return
|
||||
}
|
||||
|
||||
if established {
|
||||
metricCloudConnectionEstablishedTimestamp.SetToCurrentTime()
|
||||
@@ -256,6 +268,7 @@ func runWebsocketClient() error {
|
||||
|
||||
header := http.Header{}
|
||||
header.Set("X-Device-ID", GetDeviceID())
|
||||
header.Set("X-App-Version", builtAppVersion)
|
||||
header.Set("Authorization", "Bearer "+config.CloudToken)
|
||||
dialCtx, cancelDial := context.WithTimeout(context.Background(), CloudWebSocketConnectTimeout)
|
||||
|
||||
@@ -270,88 +283,13 @@ func runWebsocketClient() error {
|
||||
cloudLogger.Infof("websocket connected to %s", wsURL)
|
||||
|
||||
// set the metrics when we successfully connect to the cloud.
|
||||
cloudResetMetrics(true)
|
||||
wsResetMetrics(true, "cloud", "")
|
||||
|
||||
runCtx, cancelRun := context.WithCancel(context.Background())
|
||||
defer cancelRun()
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(CloudWebSocketPingInterval)
|
||||
|
||||
// set the timer for the ping duration
|
||||
timer := prometheus.NewTimer(prometheus.ObserverFunc(func(v float64) {
|
||||
metricCloudConnectionLastPingDuration.Set(v)
|
||||
metricCloudConnectionPingDuration.Observe(v)
|
||||
}))
|
||||
|
||||
err := c.Ping(runCtx)
|
||||
|
||||
if err != nil {
|
||||
cloudLogger.Warnf("websocket ping error: %v", err)
|
||||
cancelRun()
|
||||
return
|
||||
}
|
||||
|
||||
// dont use `defer` here because we want to observe the duration of the ping
|
||||
timer.ObserveDuration()
|
||||
|
||||
metricCloudConnectionTotalPingCount.Inc()
|
||||
metricCloudConnectionLastPingTimestamp.SetToCurrentTime()
|
||||
}
|
||||
}()
|
||||
|
||||
// create a channel to receive the disconnect event, once received, we cancelRun
|
||||
cloudDisconnectChan = make(chan error)
|
||||
defer func() {
|
||||
close(cloudDisconnectChan)
|
||||
cloudDisconnectChan = nil
|
||||
}()
|
||||
go func() {
|
||||
for err := range cloudDisconnectChan {
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
cloudLogger.Infof("disconnecting from cloud due to: %v", err)
|
||||
cancelRun()
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
typ, msg, err := c.Read(runCtx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if typ != websocket.MessageText {
|
||||
// ignore non-text messages
|
||||
continue
|
||||
}
|
||||
var req WebRTCSessionRequest
|
||||
err = json.Unmarshal(msg, &req)
|
||||
if err != nil {
|
||||
cloudLogger.Warnf("unable to parse ws message: %v", string(msg))
|
||||
continue
|
||||
}
|
||||
|
||||
cloudLogger.Infof("new session request: %v", req.OidcGoogle)
|
||||
cloudLogger.Tracef("session request info: %v", req)
|
||||
|
||||
metricCloudConnectionSessionRequestCount.Inc()
|
||||
metricCloudConnectionLastSessionRequestTimestamp.SetToCurrentTime()
|
||||
err = handleSessionRequest(runCtx, c, req)
|
||||
if err != nil {
|
||||
cloudLogger.Infof("error starting new session: %v", err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
// we don't have a source for the cloud connection
|
||||
return handleWebRTCSignalWsMessages(c, true, "")
|
||||
}
|
||||
|
||||
func handleSessionRequest(ctx context.Context, c *websocket.Conn, req WebRTCSessionRequest) error {
|
||||
timer := prometheus.NewTimer(prometheus.ObserverFunc(func(v float64) {
|
||||
metricCloudConnectionLastSessionRequestDuration.Set(v)
|
||||
metricCloudConnectionSessionRequestDuration.Observe(v)
|
||||
}))
|
||||
defer timer.ObserveDuration()
|
||||
|
||||
func authenticateSession(ctx context.Context, c *websocket.Conn, req WebRTCSessionRequest) error {
|
||||
oidcCtx, cancelOIDC := context.WithTimeout(ctx, CloudOidcRequestTimeout)
|
||||
defer cancelOIDC()
|
||||
provider, err := oidc.NewProvider(oidcCtx, "https://accounts.google.com")
|
||||
@@ -379,10 +317,35 @@ func handleSessionRequest(ctx context.Context, c *websocket.Conn, req WebRTCSess
|
||||
return fmt.Errorf("google identity mismatch")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func handleSessionRequest(ctx context.Context, c *websocket.Conn, req WebRTCSessionRequest, isCloudConnection bool, source string) error {
|
||||
var sourceType string
|
||||
if isCloudConnection {
|
||||
sourceType = "cloud"
|
||||
} else {
|
||||
sourceType = "local"
|
||||
}
|
||||
|
||||
timer := prometheus.NewTimer(prometheus.ObserverFunc(func(v float64) {
|
||||
metricConnectionLastSessionRequestDuration.WithLabelValues(sourceType, source).Set(v)
|
||||
metricConnectionSessionRequestDuration.WithLabelValues(sourceType, source).Observe(v)
|
||||
}))
|
||||
defer timer.ObserveDuration()
|
||||
|
||||
// If the message is from the cloud, we need to authenticate the session.
|
||||
if isCloudConnection {
|
||||
if err := authenticateSession(ctx, c, req); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
session, err := newSession(SessionConfig{
|
||||
ICEServers: req.ICEServers,
|
||||
ws: c,
|
||||
IsCloud: isCloudConnection,
|
||||
LocalIP: req.IP,
|
||||
IsCloud: true,
|
||||
ICEServers: req.ICEServers,
|
||||
})
|
||||
if err != nil {
|
||||
_ = wsjson.Write(context.Background(), c, gin.H{"error": err})
|
||||
@@ -406,14 +369,14 @@ func handleSessionRequest(ctx context.Context, c *websocket.Conn, req WebRTCSess
|
||||
cloudLogger.Info("new session accepted")
|
||||
cloudLogger.Tracef("new session accepted: %v", session)
|
||||
currentSession = session
|
||||
_ = wsjson.Write(context.Background(), c, gin.H{"sd": sd})
|
||||
_ = wsjson.Write(context.Background(), c, gin.H{"type": "answer", "data": sd})
|
||||
return nil
|
||||
}
|
||||
|
||||
func RunWebsocketClient() {
|
||||
for {
|
||||
// reset the metrics when we start the websocket client.
|
||||
cloudResetMetrics(false)
|
||||
wsResetMetrics(false, "cloud", "")
|
||||
|
||||
// If the cloud token is not set, we don't need to run the websocket client.
|
||||
if config.CloudToken == "" {
|
||||
|
||||
Reference in New Issue
Block a user