Files
Stanislav Dmitrenko 307211a47f android, desktop: landscape calls on Android and better local camera ratio management (#5124)
* android, desktop: landscape calls on Android and better local camera ratio management

The main thing is that now when exiting from CallActivity while in call
audio devices are not reset to default. It allows to have landscape mode
enabled

* styles

* fix changing calls
2024-12-07 17:09:00 +00:00

1873 lines
67 KiB
TypeScript

// Inspired by
// https://github.com/webrtc/samples/blob/gh-pages/src/content/insertable-streams/endtoend-encryption
interface WVApiMessage {
corrId?: number
resp: WCallResponse
command?: WCallCommand
}
type WCallCommand =
| WCCapabilities
| WCPermission
| WCStartCall
| WCAcceptOffer
| WCallAnswer
| WCallIceCandidates
| WCEnableMedia
| WCToggleCamera
| WCDescription
| WCLayout
| WCEndCall
type WCallResponse =
| WRCapabilities
| WCallOffer
| WCallAnswer
| WCallIceCandidates
| WRConnection
| WRCallConnected
| WRPeerMedia
| WRCallEnd
| WRCallEnded
| WROk
| WRError
| WCAcceptOffer
type WCallCommandTag =
| "capabilities"
| "permission"
| "start"
| "offer"
| "answer"
| "ice"
| "media"
| "camera"
| "description"
| "layout"
| "end"
type WCallResponseTag =
| "capabilities"
| "offer"
| "answer"
| "ice"
| "connection"
| "connected"
| "peerMedia"
| "end"
| "ended"
| "ok"
| "error"
enum CallMediaType {
Audio = "audio",
Video = "video",
}
enum CallMediaSource {
Mic = "mic",
Camera = "camera",
ScreenAudio = "screenAudio",
ScreenVideo = "screenVideo",
Unknown = "unknown",
}
enum VideoCamera {
User = "user",
Environment = "environment",
}
enum LayoutType {
Default = "default",
LocalVideo = "localVideo",
RemoteVideo = "remoteVideo",
}
interface CallMediaSources {
mic: boolean
camera: boolean
screenAudio: boolean
screenVideo: boolean
}
interface IWCallCommand {
type: WCallCommandTag
}
interface IWCallResponse {
type: WCallResponseTag
}
interface WCCapabilities extends IWCallCommand {
type: "capabilities"
media: CallMediaType
}
interface WCStartCall extends IWCallCommand {
type: "start"
media: CallMediaType
aesKey?: string
iceServers?: RTCIceServer[]
relay?: boolean
}
interface WCEndCall extends IWCallCommand {
type: "end"
}
interface WCAcceptOffer extends IWCallCommand {
type: "offer"
offer: string // JSON string for RTCSessionDescriptionInit
iceCandidates: string // JSON strings for RTCIceCandidateInit
media: CallMediaType
aesKey?: string
iceServers?: RTCIceServer[]
relay?: boolean
}
interface WCallOffer extends IWCallResponse {
type: "offer"
offer: string // JSON string for RTCSessionDescriptionInit
iceCandidates: string // JSON strings for RTCIceCandidateInit[]
capabilities: CallCapabilities
}
interface WCallAnswer extends IWCallCommand, IWCallResponse {
type: "answer"
answer: string // JSON string for RTCSessionDescriptionInit
iceCandidates: string // JSON strings for RTCIceCandidateInit[]
}
interface WCallIceCandidates extends IWCallCommand, IWCallResponse {
type: "ice"
iceCandidates: string // JSON strings for RTCIceCandidateInit[]
}
interface WCEnableMedia extends IWCallCommand {
type: "media"
source: CallMediaSource
enable: boolean
}
interface WCToggleCamera extends IWCallCommand {
type: "camera"
camera: VideoCamera
}
interface WCDescription extends IWCallCommand {
type: "description"
state: string
description: string
}
interface WCLayout extends IWCallCommand {
type: "layout"
layout: LayoutType
}
interface WCPermission extends IWCallCommand {
type: "permission"
title: string
chrome: string
safari: string
}
interface WRCapabilities extends IWCallResponse {
type: "capabilities"
capabilities: CallCapabilities
}
interface CallCapabilities {
encryption: boolean
}
interface WRConnection extends IWCallResponse {
type: "connection"
state: {
connectionState: string
iceConnectionState: string
iceGatheringState: string
signalingState: string
}
}
interface WRCallConnected extends IWCallResponse {
type: "connected"
connectionInfo: ConnectionInfo
}
interface WRPeerMedia extends IWCallResponse {
type: "peerMedia"
source: CallMediaSource
enabled: boolean
}
interface WRCallEnd extends IWCallResponse {
type: "end"
}
interface WRCallEnded extends IWCallResponse {
type: "ended"
}
interface WROk extends IWCallResponse {
type: "ok"
}
interface WRError extends IWCallResponse {
type: "error"
message: string
}
interface ConnectionInfo {
iceCandidatePair: RTCIceCandidatePairStats
localCandidate?: RTCIceCandidate
remoteCandidate?: RTCIceCandidate
}
interface VideoElements {
local: HTMLVideoElement
localScreen: HTMLMediaElement
remote: HTMLMediaElement
remoteScreen: HTMLMediaElement
}
// for debugging
// var sendMessageToNative = ({resp}: WVApiMessage) => console.log(JSON.stringify({command: resp}))
var sendMessageToNative = (msg: WVApiMessage) => console.log(JSON.stringify(msg))
var toggleScreenShare = async () => {}
var localOrPeerMediaSourcesChanged = (_call: Call) => {}
var inactiveCallMediaSourcesChanged = (_inactiveCallMediaSources: CallMediaSources) => {}
var failedToGetPermissions = (_title: string, _description: string) => {}
// Global object with cryptrographic/encoding functions
const callCrypto = callCryptoFunction()
declare var RTCRtpScriptTransform: {
prototype: RTCRtpScriptTransform
new (worker: Worker, options?: any, transfer?: any[] | undefined): RTCRtpScriptTransform
}
enum TransformOperation {
Encrypt = "encrypt",
Decrypt = "decrypt",
}
interface RTCRtpScriptTransform {}
interface WVAPICall {
corrId?: number
command: WCallCommand
}
interface Call {
connection: RTCPeerConnection
iceCandidates: Promise<string> // JSON strings for RTCIceCandidate
localMediaSources: CallMediaSources
localCamera: VideoCamera
localStream: MediaStream
localScreenStream: MediaStream
// has no tracks in the beggining, see addRemoteTracksWhenUnmuted
remoteStream: MediaStream
remoteTracks: Map<CallMediaSource, MediaStreamTrack>
// has no tracks in the beggining too
remoteScreenStream: MediaStream
peerMediaSources: CallMediaSources
aesKey?: string
worker?: Worker
key?: CryptoKey
layout: LayoutType
// controls whether transceiver's track was replaced already or set initially (in video call)
cameraTrackWasSetBefore: boolean
peerHasOldVersion: boolean
}
interface NotConnectedCall {
localCamera: VideoCamera
localStream: MediaStream
layout: LayoutType
}
function localMedia(call: Call): CallMediaType {
return call.localMediaSources.camera || call.localMediaSources.screenVideo ? CallMediaType.Video : CallMediaType.Audio
}
function peerMedia(call: Call): CallMediaType {
return call.peerMediaSources.camera || call.peerMediaSources.screenVideo ? CallMediaType.Video : CallMediaType.Audio
}
let inactiveCallMediaSources: CallMediaSources = {
mic: false,
camera: false,
screenAudio: false,
screenVideo: false,
}
let activeCall: Call | undefined
let notConnectedCall: NotConnectedCall | undefined
let answerTimeout = 30_000
var useWorker = false
var isDesktop = false
var localizedState = ""
var localizedDescription = ""
// Passing true here will send audio in screen record stream
const allowSendScreenAudio = false
// When one side of a call sends candidates tot fast (until local & remote descriptions are set), that candidates
// will be stored here and then set when the call will be ready to process them
let afterCallInitializedCandidates: RTCIceCandidateInit[] = []
const stopTrackOnAndroid = false
const processCommand = (function () {
type RTCRtpSenderWithEncryption = RTCRtpSender & {
createEncodedStreams: () => TransformStream
transform: RTCRtpScriptTransform
}
type RTCRtpReceiverWithEncryption = RTCRtpReceiver & {
createEncodedStreams: () => TransformStream
transform: RTCRtpScriptTransform
}
type RTCConfigurationWithEncryption = RTCConfiguration & {
encodedInsertableStreams: boolean
}
type RTCConfigurationWithSdpSemantics = RTCConfiguration & {
sdpSemantics: string
}
interface CallConfig {
peerConnectionConfig: RTCConfigurationWithEncryption & RTCConfigurationWithSdpSemantics
iceCandidates: {
delay: number
extrasInterval: number
extrasTimeout: number
}
}
const defaultIceServers: RTCIceServer[] = [
{urls: ["stuns:stun.simplex.im:443"]},
{urls: ["stun:stun.simplex.im:443"]},
//{urls: ["turns:turn.simplex.im:443?transport=udp"], username: "private2", credential: "Hxuq2QxUjnhj96Zq2r4HjqHRj"},
{urls: ["turns:turn.simplex.im:443?transport=tcp"], username: "private2", credential: "Hxuq2QxUjnhj96Zq2r4HjqHRj"},
]
function getCallConfig(encodedInsertableStreams: boolean, iceServers?: RTCIceServer[], relay?: boolean): CallConfig {
return {
peerConnectionConfig: {
iceServers: iceServers ?? defaultIceServers,
iceCandidatePoolSize: 10,
encodedInsertableStreams,
iceTransportPolicy: relay ? "relay" : "all",
// needed for Android WebView >= 69 && <= 72 where default was "plan-b" which is incompatible with transceivers
sdpSemantics: "unified-plan",
},
iceCandidates: {
delay: 750,
extrasInterval: 1500,
extrasTimeout: 12000,
},
}
}
function getIceCandidates(conn: RTCPeerConnection, config: CallConfig) {
return new Promise<string>((resolve, _) => {
let candidates: RTCIceCandidate[] = []
let resolved = false
let extrasInterval: number | undefined
let extrasTimeout: number | undefined
const delay = setTimeout(() => {
if (!resolved) {
resolveIceCandidates()
extrasInterval = setInterval(() => {
sendIceCandidates()
}, config.iceCandidates.extrasInterval)
extrasTimeout = setTimeout(() => {
clearInterval(extrasInterval)
sendIceCandidates()
}, config.iceCandidates.extrasTimeout)
}
}, config.iceCandidates.delay)
conn.onicecandidate = ({candidate: c}) => c && candidates.push(c)
conn.onicegatheringstatechange = () => {
if (conn.iceGatheringState == "complete") {
if (resolved) {
if (extrasInterval) clearInterval(extrasInterval)
if (extrasTimeout) clearTimeout(extrasTimeout)
sendIceCandidates()
} else {
resolveIceCandidates()
}
}
}
function resolveIceCandidates() {
if (delay) clearTimeout(delay)
resolved = true
// console.log("resolveIceCandidates", JSON.stringify(candidates))
console.log("resolveIceCandidates")
const iceCandidates = serialize(candidates)
candidates = []
resolve(iceCandidates)
}
function sendIceCandidates() {
if (candidates.length === 0) return
// console.log("sendIceCandidates", JSON.stringify(candidates))
console.log("sendIceCandidates")
const iceCandidates = serialize(candidates)
candidates = []
sendMessageToNative({resp: {type: "ice", iceCandidates}})
}
})
}
async function initializeCall(config: CallConfig, mediaType: CallMediaType, aesKey?: string): Promise<Call> {
let pc: RTCPeerConnection
try {
pc = new RTCPeerConnection(config.peerConnectionConfig)
} catch (e) {
console.log("Error while constructing RTCPeerConnection, will try without 'stuns' specified: " + e)
const withoutStuns = config.peerConnectionConfig.iceServers?.filter((elem) =>
typeof elem.urls === "string" ? !elem.urls.startsWith("stuns:") : !elem.urls.some((url) => url.startsWith("stuns:"))
)
config.peerConnectionConfig.iceServers = withoutStuns
pc = new RTCPeerConnection(config.peerConnectionConfig)
}
const remoteStream = new MediaStream()
const remoteScreenStream = new MediaStream()
const localCamera = notConnectedCall?.localCamera ?? VideoCamera.User
let localStream: MediaStream
try {
localStream = notConnectedCall?.localStream
? notConnectedCall.localStream
: await getLocalMediaStream(
inactiveCallMediaSources.mic,
inactiveCallMediaSources.camera && (await browserHasCamera()),
localCamera
)
} catch (e) {
console.log("Error while getting local media stream", e)
if (isDesktop) {
desktopShowPermissionsAlert(mediaType)
localStream = new MediaStream()
} else {
// On Android all streams should be present
throw e
}
}
const localScreenStream = new MediaStream()
// Will become video when any video tracks will be added
const iceCandidates = getIceCandidates(pc, config)
const call: Call = {
connection: pc,
iceCandidates,
localMediaSources: {
mic: localStream.getAudioTracks().length > 0,
camera: localStream.getVideoTracks().length > 0,
screenAudio: localScreenStream.getAudioTracks().length > 0,
screenVideo: localScreenStream.getVideoTracks().length > 0,
},
localCamera,
localStream,
localScreenStream,
remoteStream,
remoteTracks: new Map(),
remoteScreenStream,
peerMediaSources: {
mic: false,
camera: false,
screenAudio: false,
screenVideo: false,
},
aesKey,
layout: notConnectedCall?.layout ?? LayoutType.Default,
cameraTrackWasSetBefore: localStream.getVideoTracks().length > 0,
peerHasOldVersion: false,
}
localOrPeerMediaSourcesChanged(call)
await setupMediaStreams(call)
let connectionTimeout: number | undefined = setTimeout(connectionHandler, answerTimeout)
if (pc.connectionState) {
pc.addEventListener("connectionstatechange", connectionStateChange)
} else {
pc.addEventListener("iceconnectionstatechange", connectionStateChange)
}
return call
async function connectionStateChange() {
// "failed" means the second party did not answer in time (15 sec timeout in Chrome WebView)
// See https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/p2p/base/p2p_constants.cc;l=70)
if (pc.connectionState !== "failed") connectionHandler()
}
async function connectionHandler() {
sendMessageToNative({
resp: {
type: "connection",
state: {
connectionState:
pc.connectionState ??
(pc.iceConnectionState != "completed" && pc.iceConnectionState != "checking"
? pc.iceConnectionState
: pc.iceConnectionState == "completed"
? "connected"
: "connecting") /* webView 69-70 doesn't have connectionState yet */,
iceConnectionState: pc.iceConnectionState,
iceGatheringState: pc.iceGatheringState,
signalingState: pc.signalingState,
},
},
})
if (
pc.connectionState == "disconnected" ||
pc.connectionState == "failed" ||
(!pc.connectionState && (pc.iceConnectionState == "disconnected" || pc.iceConnectionState == "failed"))
) {
clearConnectionTimeout()
if (pc.connectionState) {
pc.removeEventListener("connectionstatechange", connectionStateChange)
} else {
pc.removeEventListener("iceconnectionstatechange", connectionStateChange)
}
if (activeCall) {
setTimeout(() => sendMessageToNative({resp: {type: "ended"}}), 0)
}
endCall()
} else if (pc.connectionState == "connected" || (!pc.connectionState && pc.iceConnectionState == "connected")) {
clearConnectionTimeout()
const stats = (await pc.getStats()) as Map<string, any>
for (const stat of stats.values()) {
const {type, state} = stat
if (type === "candidate-pair" && state === "succeeded") {
const iceCandidatePair = stat as RTCIceCandidatePairStats
const resp: WRCallConnected = {
type: "connected",
connectionInfo: {
iceCandidatePair,
localCandidate: stats.get(iceCandidatePair.localCandidateId),
remoteCandidate: stats.get(iceCandidatePair.remoteCandidateId),
},
}
setTimeout(() => sendMessageToNative({resp}), 500)
break
}
}
}
}
function clearConnectionTimeout() {
if (connectionTimeout) {
clearTimeout(connectionTimeout)
connectionTimeout = undefined
}
}
}
function serialize<T>(x: T): string {
return LZString.compressToBase64(JSON.stringify(x))
}
function parse<T>(s: string): T {
return JSON.parse(LZString.decompressFromBase64(s)!)
}
async function processCommand(body: WVAPICall): Promise<WVApiMessage> {
const {corrId, command} = body
const pc = activeCall?.connection
let resp: WCallResponse
try {
switch (command.type) {
case "capabilities":
console.log("starting outgoing call - capabilities")
if (activeCall) endCall()
let localStream: MediaStream | null = null
try {
localStream = await getLocalMediaStream(
true,
command.media == CallMediaType.Video && (await browserHasCamera()),
VideoCamera.User
)
const videos = getVideoElements()
if (videos) {
setupLocalVideoRatio(videos.local)
videos.local.srcObject = localStream
videos.local.play().catch((e) => console.log(e))
}
} catch (e) {
console.log(e)
// Do not allow to continue the call without audio permission
resp = {type: "error", message: "capabilities: no permissions were granted for mic and/or camera"}
break
localStream = new MediaStream()
// Will be shown on the next stage of call estabilishing, can work without any streams
//desktopShowPermissionsAlert(command.media)
}
// Specify defaults that can be changed via UI before call estabilished. It's only used before activeCall instance appears
inactiveCallMediaSources.mic = localStream != null && localStream.getAudioTracks().length > 0
inactiveCallMediaSources.camera = localStream != null && localStream.getVideoTracks().length > 0
inactiveCallMediaSourcesChanged(inactiveCallMediaSources)
notConnectedCall = {
localCamera: VideoCamera.User,
localStream: localStream,
layout: LayoutType.Default,
}
const encryption = supportsInsertableStreams(useWorker)
resp = {type: "capabilities", capabilities: {encryption}}
break
case "start": {
console.log("starting incoming call - create webrtc session")
if (activeCall) endCall()
// It can be already defined on Android when switching calls (if the previous call was outgoing)
notConnectedCall = undefined
inactiveCallMediaSources.mic = true
inactiveCallMediaSources.camera = command.media == CallMediaType.Video
inactiveCallMediaSourcesChanged(inactiveCallMediaSources)
setupLocalVideoRatio(getVideoElements()!.local)
const {media, iceServers, relay} = command
const encryption = supportsInsertableStreams(useWorker)
const aesKey = encryption ? command.aesKey : undefined
activeCall = await initializeCall(getCallConfig(encryption && !!aesKey, iceServers, relay), media, aesKey)
await setupLocalStream(true, activeCall)
setupCodecPreferences(activeCall)
const pc = activeCall.connection
const offer = await pc.createOffer()
await pc.setLocalDescription(offer)
// should be called after setLocalDescription in order to have transceiver.mid set
setupEncryptionForLocalStream(activeCall)
addIceCandidates(pc, afterCallInitializedCandidates)
afterCallInitializedCandidates = []
// for debugging, returning the command for callee to use
// resp = {
// type: "offer",
// offer: serialize(offer),
// iceCandidates: await activeCall.iceCandidates,
// capabilities: {encryption},
// media,
// iceServers,
// relay,
// aesKey,
// }
resp = {
type: "offer",
offer: serialize(offer),
iceCandidates: await activeCall.iceCandidates,
capabilities: {encryption},
}
// console.log("offer response", JSON.stringify(resp))
break
}
case "offer":
if (activeCall) {
resp = {type: "error", message: "accept: call already started"}
} else if (!supportsInsertableStreams(useWorker) && command.aesKey) {
resp = {type: "error", message: "accept: encryption is not supported"}
} else {
const offer: RTCSessionDescriptionInit = parse(command.offer)
const remoteIceCandidates: RTCIceCandidateInit[] = parse(command.iceCandidates)
const {media, aesKey, iceServers, relay} = command
activeCall = await initializeCall(getCallConfig(!!aesKey, iceServers, relay), media, aesKey)
const pc = activeCall.connection
// console.log("offer remoteIceCandidates", JSON.stringify(remoteIceCandidates))
await pc.setRemoteDescription(new RTCSessionDescription(!webView69Or70() ? offer : adaptSdpToOldWebView(offer)))
// setting up local stream only after setRemoteDescription in order to have transceivers set
await setupLocalStream(false, activeCall)
setupEncryptionForLocalStream(activeCall)
setupCodecPreferences(activeCall)
// enable using the same transceivers for sending media too, so total number of transceivers will be: audio, camera, screen audio, screen video
pc.getTransceivers().forEach((elem) => (elem.direction = "sendrecv"))
// setting media streams after remote description in order to have all transceivers ready (so ordering will be preserved)
// console.log(
// "Transceivers",
// pc.getTransceivers(),
// pc.getTransceivers().map((elem) => "" + elem.mid + " " + elem.sender.track?.kind + " " + elem.sender.track?.label)
// )
adaptToOldVersion(pc.getTransceivers().length <= 2, activeCall)
let answer = await pc.createAnswer()
await pc.setLocalDescription(answer)
addIceCandidates(pc, remoteIceCandidates)
addIceCandidates(pc, afterCallInitializedCandidates)
afterCallInitializedCandidates = []
// same as command for caller to use
resp = {
type: "answer",
answer: serialize(answer),
iceCandidates: await activeCall.iceCandidates,
}
}
// console.log("answer response", JSON.stringify(resp))
break
case "answer":
if (!pc) {
resp = {type: "error", message: "answer: call not started"}
} else if (!pc.localDescription) {
resp = {type: "error", message: "answer: local description is not set"}
} else if (pc.currentRemoteDescription) {
resp = {type: "error", message: "answer: remote description already set"}
} else {
const answer: RTCSessionDescriptionInit = parse(command.answer)
const remoteIceCandidates: RTCIceCandidateInit[] = parse(command.iceCandidates)
// console.log("answer remoteIceCandidates", JSON.stringify(remoteIceCandidates))
await pc.setRemoteDescription(new RTCSessionDescription(!webView69Or70() ? answer : adaptSdpToOldWebView(answer)))
adaptToOldVersion(pc.getTransceivers()[2].currentDirection == "sendonly", activeCall!)
addIceCandidates(pc, remoteIceCandidates)
addIceCandidates(pc, afterCallInitializedCandidates)
afterCallInitializedCandidates = []
resp = {type: "ok"}
}
break
case "ice":
const remoteIceCandidates: RTCIceCandidateInit[] = parse(command.iceCandidates)
if (pc) {
addIceCandidates(pc, remoteIceCandidates)
resp = {type: "ok"}
} else {
afterCallInitializedCandidates.push(...remoteIceCandidates)
resp = {type: "error", message: "ice: call not started yet, will add candidates later"}
}
break
case "media":
if (!activeCall) {
if (!notConnectedCall) {
// call can have a slow startup and be in this place even before "capabilities" stage
resp = {type: "error", message: "media: call has not yet pass capabilities stage"}
break
}
switch (command.source) {
case CallMediaSource.Mic:
inactiveCallMediaSources.mic = command.enable
break
case CallMediaSource.Camera:
inactiveCallMediaSources.camera = command.enable
break
case CallMediaSource.ScreenAudio:
inactiveCallMediaSources.screenAudio = command.enable
break
case CallMediaSource.ScreenVideo:
inactiveCallMediaSources.screenVideo = command.enable
break
}
inactiveCallMediaSourcesChanged(inactiveCallMediaSources)
recreateLocalStreamWhileNotConnected(notConnectedCall?.localCamera ?? VideoCamera.User)
resp = {type: "ok"}
} else if (activeCall.peerHasOldVersion && command.source == CallMediaSource.Camera && !activeCall.peerMediaSources.camera) {
resp = {type: "error", message: "media: cannot enable camera since the peer has an old version"}
} else if (!activeCall.cameraTrackWasSetBefore && command.source == CallMediaSource.Camera && command.enable) {
await startSendingCamera(activeCall, activeCall.localCamera)
resp = {type: "ok"}
} else if (
(command.source == CallMediaSource.Mic && activeCall.localStream.getAudioTracks().length > 0) ||
(command.source == CallMediaSource.Camera && activeCall.localStream.getVideoTracks().length > 0)
) {
if (enableMedia(activeCall.localStream, command.source, command.enable)) {
resp = {type: "ok"}
} else {
resp = {type: "error", message: "media: cannot enable media source"}
}
} else {
if (await replaceMedia(activeCall, command.source, command.enable, activeCall.localCamera)) {
resp = {type: "ok"}
} else {
resp = {type: "error", message: "media: cannot replace media source"}
}
}
break
case "camera":
if (!activeCall || !pc) {
if (notConnectedCall) {
recreateLocalStreamWhileNotConnected(command.camera)
resp = {type: "ok"}
} else {
resp = {type: "error", message: "camera: call has not yet pass capabilities stage"}
}
} else {
if (await replaceMedia(activeCall, CallMediaSource.Camera, true, command.camera)) {
resp = {type: "ok"}
} else {
resp = {type: "error", message: "camera: cannot replace media source"}
}
}
break
case "description":
localizedState = command.state
localizedDescription = command.description
resp = {type: "ok"}
break
case "layout":
if (activeCall) {
activeCall.layout = command.layout
} else if (notConnectedCall) {
notConnectedCall.layout = command.layout
}
changeLayout(command.layout)
resp = {type: "ok"}
break
case "end":
endCall()
resp = {type: "ok"}
break
case "permission":
failedToGetPermissions(command.title, permissionDescription(command))
resp = {type: "ok"}
break
default:
resp = {type: "error", message: "unknown command"}
break
}
} catch (e) {
resp = {type: "error", message: `${command.type}: ${(e as Error).message}`}
}
const apiResp = {corrId, resp, command}
sendMessageToNative(apiResp)
return apiResp
}
function endCall() {
shutdownCameraAndMic()
try {
activeCall?.connection?.close()
} catch (e) {
console.log(e)
}
activeCall = undefined
resetVideoElements()
}
function addIceCandidates(conn: RTCPeerConnection, iceCandidates: RTCIceCandidateInit[]) {
for (const c of iceCandidates) {
conn.addIceCandidate(new RTCIceCandidate(c))
// console.log("addIceCandidates", JSON.stringify(c))
}
}
async function setupMediaStreams(call: Call): Promise<void> {
const videos = getVideoElements()
if (!videos) throw Error("no video elements")
await setupEncryptionWorker(call)
setupRemoteStream(call)
videos.localScreen.srcObject = call.localScreenStream
videos.remote.srcObject = call.remoteStream
videos.remoteScreen.srcObject = call.remoteScreenStream
// videos.localScreen.play()
// For example, exception can be: NotAllowedError: play() failed because the user didn't interact with the document first
videos.remote.play().catch((e) => console.log(e))
videos.remoteScreen.play().catch((e) => console.log(e))
}
async function setupEncryptionWorker(call: Call) {
if (call.aesKey) {
if (!call.key) call.key = await callCrypto.decodeAesKey(call.aesKey)
if (useWorker && !call.worker) {
const workerCode = `const callCrypto = (${callCryptoFunction.toString()})(); (${workerFunction.toString()})()`
call.worker = new Worker(URL.createObjectURL(new Blob([workerCode], {type: "text/javascript"})))
call.worker.onerror = ({error, filename, lineno, message}: ErrorEvent) => console.log({error, filename, lineno, message})
// call.worker.onmessage = ({data}) => console.log(JSON.stringify({message: data}))
}
}
}
async function setupLocalStream(incomingCall: boolean, call: Call) {
const videos = getVideoElements()
if (!videos) throw Error("no video elements")
const pc = call.connection
let {localStream} = call
const transceivers = call.connection.getTransceivers()
const audioTracks = localStream.getAudioTracks()
const videoTracks = localStream.getVideoTracks()
if (incomingCall) {
// incoming call, no transceivers yet. But they should be added in order: mic, camera, screen audio, screen video
// mid = 0
const audioTransceiver = pc.addTransceiver("audio", {streams: [localStream]})
if (audioTracks.length != 0) {
audioTransceiver.sender.replaceTrack(audioTracks[0])
}
// mid = 1
const videoTransceiver = pc.addTransceiver("video", {streams: [localStream]})
if (videoTracks.length != 0) {
videoTransceiver.sender.replaceTrack(videoTracks[0])
}
if (call.localScreenStream.getAudioTracks().length == 0) {
// mid = 2
pc.addTransceiver("audio", {streams: [call.localScreenStream]})
}
if (call.localScreenStream.getVideoTracks().length == 0) {
// mid = 3
pc.addTransceiver("video", {streams: [call.localScreenStream]})
}
} else {
// new version
if (transceivers.length > 2) {
// Outgoing call. All transceivers are ready. Don't addTrack() because it will create new transceivers, replace existing (null) tracks
await transceivers
.find((elem) => mediaSourceFromTransceiverMid(elem.mid) == CallMediaSource.Mic)
?.sender?.replaceTrack(audioTracks[0])
await transceivers
.find((elem) => mediaSourceFromTransceiverMid(elem.mid) == CallMediaSource.Camera)
?.sender?.replaceTrack(videoTracks[0])
} else {
// old version, only two transceivers
for (const track of localStream.getTracks()) {
pc.addTrack(track, localStream)
}
}
}
// src can be set to notConnectedCall.localStream which is the same as call.localStream
if (!videos.local.srcObject) {
videos.local.srcObject = call.localStream
}
// Without doing it manually Firefox shows black screen but video can be played in Picture-in-Picture
videos.local.play().catch((e) => console.log(e))
}
function setupLocalVideoRatio(local: HTMLVideoElement) {
local.addEventListener("loadedmetadata", function () {
console.log("Local video videoWidth: " + local.videoWidth + "px, videoHeight: " + local.videoHeight + "px")
if (local.videoWidth == 0 || local.videoHeight == 0) return
const ratio = local.videoWidth > local.videoHeight ? 0.2 : 0.3
local.style.height = `${(ratio / (local.videoWidth / local.videoHeight)) * 100}vw`
})
local.onresize = function () {
console.log("Local video size changed to " + local.videoWidth + "x" + local.videoHeight)
if (local.videoWidth == 0 || local.videoHeight == 0) return
const ratio = local.videoWidth > local.videoHeight ? 0.2 : 0.3
local.style.height = `${(ratio / (local.videoWidth / local.videoHeight)) * 100}vw`
}
}
function setupEncryptionForLocalStream(call: Call) {
if (call.aesKey && call.key) {
const pc = call.connection
console.log("set up encryption for sending")
let mid = 0
for (const transceiver of pc.getTransceivers()) {
const sender = transceiver.sender as RTCRtpSenderWithEncryption
const source = mediaSourceFromTransceiverMid(mid.toString())
setupPeerTransform(
TransformOperation.Encrypt,
sender,
call.worker,
call.aesKey,
call.key,
source == CallMediaSource.Camera || source == CallMediaSource.ScreenVideo ? CallMediaType.Video : CallMediaType.Audio
)
mid++
}
}
}
function setupRemoteStream(call: Call) {
// Pull tracks from remote stream as they arrive add them to remoteStream video
const pc = call.connection
pc.ontrack = (event) => {
const track = event.track
//console.log("On track", event)
try {
if (call.aesKey && call.key) {
console.log("set up decryption for receiving")
setupPeerTransform(
TransformOperation.Decrypt,
event.receiver as RTCRtpReceiverWithEncryption,
call.worker,
call.aesKey,
call.key,
event.receiver.track.kind == "video" ? CallMediaType.Video : CallMediaType.Audio
)
}
setupMuteUnmuteListener(event.transceiver, track)
const mediaSource = mediaSourceFromTransceiverMid(event.transceiver.mid)
call.remoteTracks.set(mediaSource, track)
console.log(`ontrack success`)
} catch (e) {
console.log(`ontrack error: ${(e as Error).message}`)
}
}
}
function setupCodecPreferences(call: Call) {
// We assume VP8 encoding in the decode/encode stages to get the initial
// bytes to pass as plaintext so we enforce that here.
// VP8 is supported by all supports of webrtc.
// Use of VP8 by default may also reduce depacketisation issues.
// We do not encrypt the first couple of bytes of the payload so that the
// video elements can work by determining video keyframes and the opus mode
// being used. This appears to be necessary for any video feed at all.
// For VP8 this is the content described in
// https://tools.ietf.org/html/rfc6386#section-9.1
// which is 10 bytes for key frames and 3 bytes for delta frames.
// For opus (where encodedFrame.type is not set) this is the TOC byte from
// https://tools.ietf.org/html/rfc6716#section-3.1
// Using RTCRtpReceiver instead of RTCRtpSender, see these lines:
// - if (!is_recv_codec && !is_send_codec) {
// + if (!is_recv_codec) {
// https://webrtc.googlesource.com/src.git/+/db2f52ba88cf9f98211df2dabb3f8aca9251c4a2%5E%21/
const capabilities = RTCRtpReceiver.getCapabilities("video")
if (capabilities) {
const {codecs} = capabilities
const selectedCodecIndex = codecs.findIndex((c) => c.mimeType === "video/VP8")
const selectedCodec = codecs[selectedCodecIndex]
codecs.splice(selectedCodecIndex, 1)
codecs.unshift(selectedCodec)
// On this stage transceiver.mid may not be set so using a sequence starting from 0 to decide which track.kind is inside
let mid = 0
for (const t of call.connection.getTransceivers()) {
// Firefox doesn't have this function implemented:
// https://bugzilla.mozilla.org/show_bug.cgi?id=1396922
const source = mediaSourceFromTransceiverMid(mid.toString())
if ((source == CallMediaSource.Camera || source == CallMediaSource.ScreenVideo) && t.setCodecPreferences) {
try {
t.setCodecPreferences(codecs)
} catch (error) {
// Shouldn't be here but in case something goes wrong, it will allow to make a call with auto-selected codecs
console.log("Failed to set codec preferences, trying without any preferences: " + error)
}
}
mid++
}
}
}
async function startSendingCamera(call: Call, camera: VideoCamera): Promise<void> {
console.log("Starting sending video...")
const videos = getVideoElements()
if (!videos) throw Error("no video elements")
const pc = call.connection
// Taking the first video transceiver and use it for sending video from camera. Following tracks are for other purposes
const tc = pc.getTransceivers().find((tc) => tc.receiver.track.kind == "video" && tc.direction == "sendrecv")
if (!tc) {
console.log("No camera transceiver. Probably, calling to an old version")
return
}
console.log(pc.getTransceivers().map((elem) => "" + elem.sender.track?.kind + " " + elem.receiver.track?.kind + " " + elem.direction))
let localStream: MediaStream
try {
localStream = await getLocalMediaStream(call.localMediaSources.mic, true, camera)
for (const t of localStream.getVideoTracks()) {
call.localStream.addTrack(t)
tc?.sender.replaceTrack(t)
localStream.removeTrack(t)
}
call.localMediaSources.camera = true
call.cameraTrackWasSetBefore = true
localOrPeerMediaSourcesChanged(call)
changeLayout(call.layout)
} catch (e: any) {
console.log("Start sending camera error", e)
desktopShowPermissionsAlert(CallMediaType.Video)
return
}
// Without doing it manually Firefox shows black screen but video can be played in Picture-in-Picture
videos.local.play().catch((e) => console.log(e))
}
toggleScreenShare = async function () {
const call = activeCall
if (!call) return
const videos = getVideoElements()
if (!videos) throw Error("no video elements")
const pc = call.connection
if (!call.localMediaSources.screenVideo) {
let localScreenStream: MediaStream
try {
localScreenStream = await getLocalScreenCaptureStream()
} catch (e: any) {
return
}
for (const t of localScreenStream.getTracks()) call.localScreenStream.addTrack(t)
for (const t of localScreenStream.getTracks()) localScreenStream.removeTrack(t)
pc.getTransceivers().forEach((elem) => {
const source = mediaSourceFromTransceiverMid(elem.mid)
const screenAudioTrack = call.localScreenStream.getTracks().find((elem) => elem.kind == "audio")
const screenVideoTrack = call.localScreenStream.getTracks().find((elem) => elem.kind == "video")
if (source == CallMediaSource.ScreenAudio && screenAudioTrack) {
elem.sender.replaceTrack(screenAudioTrack)
console.log("Replaced audio screen track")
} else if (source == CallMediaSource.ScreenVideo && screenVideoTrack) {
elem.sender.replaceTrack(screenVideoTrack)
screenVideoTrack.onended = () => {
console.log("Ended screen video track")
toggleScreenShare()
}
console.log("Replaced screen video track")
}
})
// videos.localScreen.pause()
// videos.localScreen.srcObject = call.localScreenStream
videos.localScreen.play().catch((e) => console.log(e))
} else {
pc.getTransceivers().forEach((elem) => {
const source = mediaSourceFromTransceiverMid(elem.mid)
if (source == CallMediaSource.ScreenAudio || source == CallMediaSource.ScreenVideo) {
elem.sender.replaceTrack(null)
}
})
for (const t of call.localScreenStream.getTracks()) t.stop()
for (const t of call.localScreenStream.getTracks()) call.localScreenStream.removeTrack(t)
}
if (allowSendScreenAudio) {
call.localMediaSources.screenAudio = !call.localMediaSources.screenAudio
}
call.localMediaSources.screenVideo = !call.localMediaSources.screenVideo
localOrPeerMediaSourcesChanged(call)
changeLayout(call.layout)
}
async function replaceMedia(call: Call, source: CallMediaSource, enable: boolean, camera: VideoCamera): Promise<boolean> {
const videos = getVideoElements()
if (!videos) throw Error("no video elements")
const pc = call.connection
// disabling track first, then asking for a new one.
// doing it vice versa gives an error like "too many cameras were open" on some Android devices or webViews
// which means the second camera will never be opened
for (const t of source == CallMediaSource.Mic ? call.localStream.getAudioTracks() : call.localStream.getVideoTracks()) {
if (isDesktop || source != CallMediaSource.Mic || stopTrackOnAndroid) t.stop()
else t.enabled = false
call.localStream.removeTrack(t)
}
let localStream: MediaStream
try {
localStream = await getLocalMediaStream(
source == CallMediaSource.Mic ? enable : false,
source == CallMediaSource.Camera ? enable : false,
camera
)
} catch (e: any) {
console.log("Replace media error", e)
desktopShowPermissionsAlert(source == CallMediaSource.Mic ? CallMediaType.Audio : CallMediaType.Video)
return false
}
for (const t of localStream.getTracks()) {
call.localStream.addTrack(t)
localStream.removeTrack(t)
}
call.localCamera = camera
const audioTracks = call.localStream.getAudioTracks()
const videoTracks = call.localStream.getVideoTracks()
replaceTracks(pc, CallMediaSource.Mic, audioTracks)
replaceTracks(pc, CallMediaSource.Camera, videoTracks)
videos.local.play().catch((e) => console.log("replace media: local play", JSON.stringify(e)))
call.localMediaSources.mic = call.localStream.getAudioTracks().length > 0
call.localMediaSources.camera = call.localStream.getVideoTracks().length > 0
localOrPeerMediaSourcesChanged(call)
changeLayout(call.layout)
return true
}
function replaceTracks(pc: RTCPeerConnection, source: CallMediaSource, tracks: MediaStreamTrack[]) {
const sender = pc.getTransceivers().find((elem) => mediaSourceFromTransceiverMid(elem.mid) == source)?.sender
if (sender) {
if (tracks.length > 0)
for (const t of tracks) {
console.log("Replaced media track")
sender.replaceTrack(t)
}
else {
console.log("Media track set to null")
sender.replaceTrack(null)
}
}
}
async function recreateLocalStreamWhileNotConnected(newCamera: VideoCamera) {
const videos = getVideoElements()
const localStream = notConnectedCall?.localStream
const oldCamera = notConnectedCall?.localCamera
if (!localStream || !oldCamera || !videos) return
if (!inactiveCallMediaSources.mic) {
localStream.getAudioTracks().forEach((elem) => (isDesktop || stopTrackOnAndroid ? elem.stop() : (elem.enabled = false)))
localStream.getAudioTracks().forEach((elem) => localStream.removeTrack(elem))
}
if (!inactiveCallMediaSources.camera || oldCamera != newCamera) {
localStream.getVideoTracks().forEach((elem) => elem.stop())
localStream.getVideoTracks().forEach((elem) => localStream.removeTrack(elem))
}
await getLocalMediaStream(
inactiveCallMediaSources.mic && localStream.getAudioTracks().length == 0,
inactiveCallMediaSources.camera && (localStream.getVideoTracks().length == 0 || oldCamera != newCamera) && (await browserHasCamera()),
newCamera
)
.then((stream) => {
stream.getTracks().forEach((elem) => {
localStream.addTrack(elem)
stream.removeTrack(elem)
})
if (notConnectedCall && localStream.getVideoTracks().length > 0) {
notConnectedCall.localCamera = newCamera
}
})
.catch((e) => console.log("Error while enabling camera in not connected call", e))
if (!videos.local.srcObject && localStream.getTracks().length > 0) {
videos.local.srcObject = localStream
}
videos.local.play().catch((e) => console.log(e))
}
function mediaSourceFromTransceiverMid(mid: string | null) {
switch (mid) {
case "0":
return CallMediaSource.Mic
case "1":
return CallMediaSource.Camera
case "2":
return CallMediaSource.ScreenAudio
case "3":
return CallMediaSource.ScreenVideo
default:
return CallMediaSource.Unknown
}
}
function setupPeerTransform(
operation: TransformOperation,
peer: RTCRtpReceiverWithEncryption | RTCRtpSenderWithEncryption,
worker: Worker | undefined,
aesKey: string,
key: CryptoKey,
media: CallMediaType
) {
if (worker && "RTCRtpScriptTransform" in window) {
console.log(`${operation} with worker & RTCRtpScriptTransform`)
peer.transform = new RTCRtpScriptTransform(worker, {operation, aesKey, media})
} else if ("createEncodedStreams" in peer) {
const {readable, writable} = peer.createEncodedStreams()
if (worker) {
console.log(`${operation} with worker`)
worker.postMessage({operation, readable, writable, aesKey, media}, [readable, writable] as unknown as Transferable[])
} else {
console.log(`${operation} without worker`)
const transform = callCrypto.transformFrame[operation](key)
readable.pipeThrough(new TransformStream({transform})).pipeTo(writable)
}
} else {
console.log(`no ${operation}`)
}
}
function setupMuteUnmuteListener(transceiver: RTCRtpTransceiver, track: MediaStreamTrack) {
// console.log("Setting up mute/unmute listener in the call without encryption for mid = ", transceiver.mid)
let inboundStatsId = ""
let lastBytesReceived = 0
// muted initially
let mutedSeconds = 4
let statsInterval = setInterval(async () => {
const stats: RTCStatsReport = await transceiver.receiver.getStats()
if (!inboundStatsId) {
stats.forEach((elem) => {
if (elem.type == "inbound-rtp") {
inboundStatsId = elem.id
}
})
}
if (inboundStatsId) {
// even though MSDN site says `bytesReceived` is available in WebView 80+, in reality it's available even in 69
const bytes = (stats as any).get(inboundStatsId)?.bytesReceived
if (bytes <= lastBytesReceived) {
mutedSeconds++
if (mutedSeconds == 3) {
onMediaMuteUnmute(transceiver.mid, true)
}
} else {
if (mutedSeconds >= 3) {
onMediaMuteUnmute(transceiver.mid, false)
}
lastBytesReceived = bytes
mutedSeconds = 0
}
}
}, 1000)
track.onended = (_) => {
clearInterval(statsInterval)
}
}
function onMediaMuteUnmute(transceiverMid: string | null, mute: boolean) {
const videos = getVideoElements()
if (!videos) throw Error("no video elements")
if (!activeCall) return
const source = mediaSourceFromTransceiverMid(transceiverMid)
console.log(`Mute/unmute ${source} track = ${mute} with mid = ${transceiverMid}`)
const sources = activeCall.peerMediaSources
if (source == CallMediaSource.Mic && activeCall.peerMediaSources.mic == mute) {
const resp: WRPeerMedia = {
type: "peerMedia",
source: source,
enabled: !mute,
}
sources.mic = !mute
activeCall.peerMediaSources = sources
sendMessageToNative({resp: resp})
if (!mute) videos.remote.play().catch((e) => console.log(e))
} else if (source == CallMediaSource.Camera && activeCall.peerMediaSources.camera == mute) {
const resp: WRPeerMedia = {
type: "peerMedia",
source: source,
enabled: !mute,
}
sources.camera = !mute
activeCall.peerMediaSources = sources
sendMessageToNative({resp: resp})
if (!mute) videos.remote.play().catch((e) => console.log(e))
} else if (source == CallMediaSource.ScreenAudio && activeCall.peerMediaSources.screenAudio == mute) {
const resp: WRPeerMedia = {
type: "peerMedia",
source: source,
enabled: !mute,
}
sources.screenAudio = !mute
activeCall.peerMediaSources = sources
sendMessageToNative({resp: resp})
if (!mute) videos.remoteScreen.play().catch((e) => console.log(e))
} else if (source == CallMediaSource.ScreenVideo && activeCall.peerMediaSources.screenVideo == mute) {
const resp: WRPeerMedia = {
type: "peerMedia",
source: source,
enabled: !mute,
}
sources.screenVideo = !mute
activeCall.peerMediaSources = sources
sendMessageToNative({resp: resp})
if (!mute) videos.remoteScreen.play().catch((e) => console.log(e))
}
if (!mute) addRemoteTracksWhenUnmuted(source, activeCall)
localOrPeerMediaSourcesChanged(activeCall)
// Make sure that remote camera and remote screen video in their places and shown/hidden based on layout type currently in use
changeLayout(activeCall.layout)
}
/*
When new remote tracks are coming, they don't get added to remote streams. They are stored in a map and once any of them "unmuted",
that track is added to the stream. Such workaround needed because Safari doesn't play one stream
if another one is not playing too, eg. no audio if only audio is playing while video track is present too but muted.
But we have possibility to have only one currently active track, even no active track at all.
*/
function addRemoteTracksWhenUnmuted(source: CallMediaSource, call: Call) {
const track = call.remoteTracks.get(source)
if (track) {
const stream = source == CallMediaSource.Mic || source == CallMediaSource.Camera ? call.remoteStream : call.remoteScreenStream
stream.addTrack(track)
call.remoteTracks.delete(source)
}
}
async function getLocalMediaStream(mic: boolean, camera: boolean, facingMode: VideoCamera): Promise<MediaStream> {
if (!mic && !camera) return new MediaStream()
const constraints = callMediaConstraints(mic, camera, facingMode)
return await navigator.mediaDevices.getUserMedia(constraints)
}
function getLocalScreenCaptureStream(): Promise<MediaStream> {
const constraints: any /* DisplayMediaStreamConstraints */ = {
video: {
frameRate: 24,
//width: {
//min: 480,
//ideal: 720,
//max: 1280,
//},
//aspectRatio: 1.33,
},
audio: allowSendScreenAudio,
// This works with Chrome, Edge, Opera, but not with Firefox and Safari
// systemAudio: "include"
}
return navigator.mediaDevices.getDisplayMedia(constraints)
}
async function browserHasCamera(): Promise<boolean> {
try {
const devices = await navigator.mediaDevices.enumerateDevices()
const hasCamera = devices.some((elem) => elem.kind == "videoinput")
console.log("Camera is available: " + hasCamera)
return hasCamera
} catch (error) {
console.log("Error while enumerating devices: " + error, error)
return false
}
}
function callMediaConstraints(mic: boolean, camera: boolean, facingMode: VideoCamera): MediaStreamConstraints {
return {
audio: mic,
video: !camera
? false
: {
frameRate: 24,
width: {
min: 480,
ideal: 720,
max: 1280,
},
aspectRatio: 1.33,
facingMode,
},
}
}
function supportsInsertableStreams(useWorker: boolean | undefined): boolean {
return (
("createEncodedStreams" in RTCRtpSender.prototype && "createEncodedStreams" in RTCRtpReceiver.prototype) ||
(!!useWorker && "RTCRtpScriptTransform" in window)
)
}
function shutdownCameraAndMic() {
if (activeCall) {
activeCall.localStream.getTracks().forEach((track) => track.stop())
activeCall.localScreenStream.getTracks().forEach((track) => track.stop())
}
}
function resetVideoElements() {
const videos = getVideoElements()
if (!videos) return
videos.local.srcObject = null
videos.localScreen.srcObject = null
videos.remote.srcObject = null
videos.remoteScreen.srcObject = null
}
// function setupVideoElement(video: HTMLElement) {
// // TODO use display: none
// video.style.opacity = "0"
// video.onplaying = () => {
// video.style.opacity = "1"
// }
// }
function enableMedia(s: MediaStream, source: CallMediaSource, enable: boolean): boolean {
if (!activeCall) return false
const tracks = source == CallMediaSource.Camera ? s.getVideoTracks() : s.getAudioTracks()
let changedSource = false
for (const t of tracks) {
for (const transceiver of activeCall.connection.getTransceivers()) {
if (
(t.kind == CallMediaType.Audio && mediaSourceFromTransceiverMid(transceiver.mid) == CallMediaSource.Mic) ||
(t.kind == CallMediaType.Video && mediaSourceFromTransceiverMid(transceiver.mid) == CallMediaSource.Camera)
) {
if (enable) {
transceiver.sender.replaceTrack(t)
} else {
if (isDesktop || t.kind == CallMediaType.Video || stopTrackOnAndroid) t.stop()
else t.enabled = false
s.removeTrack(t)
transceiver.sender.replaceTrack(null)
}
if (source == CallMediaSource.Mic) {
activeCall.localMediaSources.mic = enable
changedSource = true
} else if (source == CallMediaSource.Camera) {
activeCall.localMediaSources.camera = enable
changedSource = true
}
}
}
}
if (changedSource) {
localOrPeerMediaSourcesChanged(activeCall)
changeLayout(activeCall.layout)
return true
} else {
console.log("Enable media error")
desktopShowPermissionsAlert(source == CallMediaSource.Mic ? CallMediaType.Audio : CallMediaType.Video)
return false
}
}
function adaptToOldVersion(peerHasOldVersion: boolean, activeCall: Call) {
activeCall.peerHasOldVersion = peerHasOldVersion
if (peerHasOldVersion) {
console.log(
"The peer has an old version.",
"Tracks size:",
activeCall.remoteStream.getAudioTracks().length,
activeCall.remoteStream.getVideoTracks().length
)
onMediaMuteUnmute("0", false)
if (activeCall.remoteStream.getVideoTracks().length > 0 || activeCall.remoteTracks.get(CallMediaSource.Camera)) {
onMediaMuteUnmute("1", false)
}
if (activeCall.localMediaSources.camera && !activeCall.peerMediaSources.camera) {
console.log("Stopping video track for the old version")
activeCall.localStream.getVideoTracks().forEach((elem) => {
elem.stop()
activeCall.localStream.removeTrack(elem)
})
activeCall.localMediaSources.camera = false
sendMessageToNative({resp: {type: "ok"}, command: {type: "media", source: CallMediaSource.Camera, enable: false}})
localOrPeerMediaSourcesChanged(activeCall)
changeLayout(activeCall.layout)
}
}
}
function webView69Or70(): boolean {
return !isDesktop && (navigator.userAgent.includes("Chrome/69.") || navigator.userAgent.includes("Chrome/70."))
}
// Adding `a=extmap-allow-mixed` causes exception on old WebViews
// https://groups.google.com/a/chromium.org/g/blink-dev/c/7z3uvp0-ZAc/m/8Z7qpp71BgAJ
function adaptSdpToOldWebView(desc: RTCSessionDescriptionInit): RTCSessionDescriptionInit {
const res: string[] = []
desc.sdp?.split("\n").forEach((line) => {
// Chrome has a bug related to SDP parser in old web view versions
if (!line.includes("a=extmap-allow-mixed")) {
res.push(line)
}
})
return {sdp: res.join("\n"), type: desc.type}
}
return processCommand
})()
function toggleRemoteVideoFitFill() {
const remote = document.getElementById("remote-video-stream")!
remote.style.objectFit = remote.style.objectFit != "contain" ? "contain" : "cover"
}
function toggleRemoteScreenVideoFitFill() {
const remoteScreen = document.getElementById("remote-screen-video-stream")!
remoteScreen.style.objectFit = remoteScreen.style.objectFit != "contain" ? "contain" : "cover"
}
function togglePeerMedia(s: MediaStream, media: CallMediaType): boolean {
if (!activeCall) return false
let res = false
const tracks = media == CallMediaType.Video ? s.getVideoTracks() : s.getAudioTracks()
for (const t of tracks) {
t.enabled = !t.enabled
res = t.enabled
}
return res
}
function changeLayout(layout: LayoutType) {
const videos = getVideoElements()
const localSources = activeCall?.localMediaSources ?? inactiveCallMediaSources
const peerSources = activeCall?.peerMediaSources ?? {mic: false, camera: false, screenAudio: false, screenVideo: false}
if (!videos || !localSources || !peerSources) return
switch (layout) {
case LayoutType.Default:
videos.local.className = "inline"
videos.remote.className = peerSources.screenVideo ? "collapsed" : "inline"
videos.local.style.visibility = "visible"
videos.remote.style.visibility = peerSources.camera ? "visible" : "hidden"
videos.remoteScreen.style.visibility = peerSources.screenVideo ? "visible" : "hidden"
break
case LayoutType.LocalVideo:
videos.local.className = "fullscreen"
videos.local.style.visibility = "visible"
videos.remote.style.visibility = "hidden"
videos.remoteScreen.style.visibility = "hidden"
break
case LayoutType.RemoteVideo:
if (peerSources.screenVideo && peerSources.camera) {
videos.remoteScreen.className = "fullscreen"
videos.remoteScreen.style.visibility = "visible"
videos.remote.style.visibility = "visible"
videos.remote.className = "collapsed-pip"
} else if (peerSources.screenVideo) {
videos.remoteScreen.className = "fullscreen"
videos.remoteScreen.style.visibility = "visible"
videos.remote.style.visibility = "hidden"
videos.remote.className = "inline"
} else if (peerSources.camera) {
videos.remote.className = "fullscreen"
videos.remote.style.visibility = "visible"
videos.remoteScreen.style.visibility = "hidden"
videos.remoteScreen.className = "inline"
} else {
videos.remote.style.visibility = "hidden"
}
videos.local.style.visibility = "hidden"
break
}
videos.localScreen.style.visibility = localSources.screenVideo ? "visible" : "hidden"
if (!isDesktop && !localSources.camera) {
resetLocalVideoElementHeight(videos.local)
}
}
function getVideoElements(): VideoElements | undefined {
const local = document.getElementById("local-video-stream")
const localScreen = document.getElementById("local-screen-video-stream")
const remote = document.getElementById("remote-video-stream")
const remoteScreen = document.getElementById("remote-screen-video-stream")
if (
!(
local &&
localScreen &&
remote &&
remoteScreen &&
local instanceof HTMLVideoElement &&
localScreen instanceof HTMLMediaElement &&
remote instanceof HTMLMediaElement &&
remoteScreen instanceof HTMLMediaElement
)
)
return
return {local, localScreen, remote, remoteScreen}
}
// Allow CSS to figure out the size of view by itself on Android because rotating to different orientation
// without dropping override will cause the view to have not normal proportion while no video is present
function resetLocalVideoElementHeight(local: HTMLVideoElement) {
local.style.height = ""
}
function desktopShowPermissionsAlert(mediaType: CallMediaType) {
if (!isDesktop) return
if (mediaType == CallMediaType.Audio) {
window.alert("Permissions denied. Please, allow access to mic to make the call working and hit unmute button. Don't reload the page.")
} else {
window.alert(
"Permissions denied. Please, allow access to mic and camera to make the call working and hit unmute/camera button. Don't reload the page."
)
}
}
function permissionDescription(command: WCPermission): string {
if ((window as any).safari) {
return command.safari
} else if (
(navigator.userAgent.includes("Chrome") && navigator.vendor.includes("Google Inc")) ||
navigator.userAgent.includes("Firefox")
) {
return command.chrome
} else {
return ""
}
}
type TransformFrameFunc = (key: CryptoKey) => (frame: RTCEncodedVideoFrame, controller: TransformStreamDefaultController) => Promise<void>
interface CallCrypto {
transformFrame: {[x in TransformOperation]: TransformFrameFunc}
decodeAesKey: (aesKey: string) => Promise<CryptoKey>
encodeAscii: (s: string) => Uint8Array
decodeAscii: (a: Uint8Array) => string
encodeBase64url: (a: Uint8Array) => Uint8Array
decodeBase64url: (b64: Uint8Array) => Uint8Array | undefined
}
// Cryptography function - it is loaded both in the main window and in worker context (if the worker is used)
function callCryptoFunction(): CallCrypto {
const initialPlainTextRequired = {
key: 10,
delta: 3,
empty: 1,
}
const IV_LENGTH = 12
function encryptFrame(key: CryptoKey): (frame: RTCEncodedVideoFrame, controller: TransformStreamDefaultController) => Promise<void> {
return async (frame, controller) => {
const data = new Uint8Array(frame.data)
const n = initialPlainTextRequired[frame.type] || 1
const iv = randomIV()
const initial = data.subarray(0, n)
const plaintext = data.subarray(n, data.byteLength)
try {
const ciphertext = plaintext.length
? new Uint8Array(await crypto.subtle.encrypt({name: "AES-GCM", iv: iv.buffer}, key, plaintext))
: new Uint8Array(0)
frame.data = concatN(initial, ciphertext, iv).buffer
controller.enqueue(frame)
} catch (e) {
console.log(`encryption error ${e}`)
throw e
}
}
}
function decryptFrame(key: CryptoKey): (frame: RTCEncodedVideoFrame, controller: TransformStreamDefaultController) => Promise<void> {
return async (frame, controller) => {
const data = new Uint8Array(frame.data)
const n = initialPlainTextRequired[frame.type] || 1
const initial = data.subarray(0, n)
const ciphertext = data.subarray(n, data.byteLength - IV_LENGTH)
const iv = data.subarray(data.byteLength - IV_LENGTH, data.byteLength)
try {
const plaintext = ciphertext.length
? new Uint8Array(await crypto.subtle.decrypt({name: "AES-GCM", iv}, key, ciphertext))
: new Uint8Array(0)
frame.data = concatN(initial, plaintext).buffer
controller.enqueue(frame)
} catch (e) {
console.log(`decryption error ${e}`)
throw e
}
}
}
function decodeAesKey(aesKey: string): Promise<CryptoKey> {
const keyData = callCrypto.decodeBase64url(callCrypto.encodeAscii(aesKey))
return crypto.subtle.importKey("raw", keyData!, {name: "AES-GCM", length: 256}, true, ["encrypt", "decrypt"])
}
function concatN(...bs: Uint8Array[]): Uint8Array {
const a = new Uint8Array(bs.reduce((size, b) => size + b.byteLength, 0))
bs.reduce((offset, b: Uint8Array) => {
a.set(b, offset)
return offset + b.byteLength
}, 0)
return a
}
function randomIV() {
return crypto.getRandomValues(new Uint8Array(IV_LENGTH))
}
const base64urlChars = new Uint8Array(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".split("").map((c) => c.charCodeAt(0))
)
const base64urlLookup = new Array(256) as (number | undefined)[]
base64urlChars.forEach((c, i) => (base64urlLookup[c] = i))
const char_equal = "=".charCodeAt(0)
function encodeAscii(s: string): Uint8Array {
const a = new Uint8Array(s.length)
let i = s.length
while (i--) a[i] = s.charCodeAt(i)
return a
}
function decodeAscii(a: Uint8Array): string {
let s = ""
for (let i = 0; i < a.length; i++) s += String.fromCharCode(a[i])
return s
}
function encodeBase64url(a: Uint8Array): Uint8Array {
const len = a.length
const b64len = Math.ceil(len / 3) * 4
const b64 = new Uint8Array(b64len)
let j = 0
for (let i = 0; i < len; i += 3) {
b64[j++] = base64urlChars[a[i] >> 2]
b64[j++] = base64urlChars[((a[i] & 3) << 4) | (a[i + 1] >> 4)]
b64[j++] = base64urlChars[((a[i + 1] & 15) << 2) | (a[i + 2] >> 6)]
b64[j++] = base64urlChars[a[i + 2] & 63]
}
if (len % 3) b64[b64len - 1] = char_equal
if (len % 3 === 1) b64[b64len - 2] = char_equal
return b64
}
function decodeBase64url(b64: Uint8Array): Uint8Array | undefined {
let len = b64.length
if (len % 4) return
let bLen = (len * 3) / 4
if (b64[len - 1] === char_equal) {
len--
bLen--
if (b64[len - 1] === char_equal) {
len--
bLen--
}
}
const bytes = new Uint8Array(bLen)
let i = 0
let pos = 0
while (i < len) {
const enc1 = base64urlLookup[b64[i++]]
const enc2 = i < len ? base64urlLookup[b64[i++]] : 0
const enc3 = i < len ? base64urlLookup[b64[i++]] : 0
const enc4 = i < len ? base64urlLookup[b64[i++]] : 0
if (enc1 === undefined || enc2 === undefined || enc3 === undefined || enc4 === undefined) return
bytes[pos++] = (enc1 << 2) | (enc2 >> 4)
bytes[pos++] = ((enc2 & 15) << 4) | (enc3 >> 2)
bytes[pos++] = ((enc3 & 3) << 6) | (enc4 & 63)
}
return bytes
}
return {
transformFrame: {encrypt: encryptFrame, decrypt: decryptFrame},
decodeAesKey,
encodeAscii,
decodeAscii,
encodeBase64url,
decodeBase64url,
}
}
// If the worker is used for decryption, this function code (as string) is used to load the worker via Blob
// We have to use worker optionally, as it crashes in Android web view, regardless of how it is loaded
function workerFunction() {
interface WorkerMessage {
data: Transform
}
interface Transform {
operation: TransformOperation
readable: ReadableStream<RTCEncodedVideoFrame>
writable: WritableStream<RTCEncodedVideoFrame>
aesKey: string
}
// encryption with createEncodedStreams support
self.addEventListener("message", async ({data}: WorkerMessage) => {
await setupTransform(data)
})
// encryption using RTCRtpScriptTransform.
if ("RTCTransformEvent" in self) {
self.addEventListener("rtctransform", async ({transformer}: any) => {
try {
const {operation, aesKey} = transformer.options
const {readable, writable} = transformer
await setupTransform({operation, aesKey, readable, writable})
self.postMessage({result: "setupTransform success"})
} catch (e) {
self.postMessage({message: `setupTransform error: ${(e as Error).message}`})
}
})
}
async function setupTransform({operation, aesKey, readable, writable}: Transform): Promise<void> {
const key = await callCrypto.decodeAesKey(aesKey)
const transform = callCrypto.transformFrame[operation](key)
readable.pipeThrough(new TransformStream({transform})).pipeTo(writable)
}
}