Browse Source

initial voice

pull/101/head
Dan 7 years ago
parent
commit
a7379e0e89
  1. 2
      disco/gateway/packets.py
  2. 86
      disco/voice/client.py
  3. 4
      disco/voice/packets.py
  4. 132
      disco/voice/udp.py

2
disco/gateway/packets.py

@ -16,5 +16,5 @@ OPCode = Enum(
INVALID_SESSION=9,
HELLO=10,
HEARTBEAT_ACK=11,
GUILD_SYNC=12,
GUILD_SYNC=12
)

86
disco/voice/client.py

@ -3,6 +3,8 @@ from __future__ import print_function
import gevent
import time
from collections import namedtuple
from holster.enum import Enum
from holster.emitter import Emitter
@ -13,6 +15,16 @@ from disco.gateway.packets import OPCode
from disco.voice.packets import VoiceOPCode
from disco.voice.udp import UDPVoiceClient
AudioCodecs = ('opus',)
PayloadTypes = Enum(OPUS=0x78)
SpeakingCodes = Enum(
NONE=0
VOICE=1 << 0,
SOUNDSHARE=1 << 1
)
VoiceState = Enum(
DISCONNECTED=0,
RECONNECTING=1,
@ -25,6 +37,18 @@ VoiceState = Enum(
VOICE_CONNECTED=8,
)
class VoiceSpeaking(namedtuple('VoiceSpeaking', ['user_id', 'speaking', 'soundshare'])):
"""
Voice Speaking Event
Attributes
---------
user_id : snowflake
the id of the user
speaking : bool
if they are speaking
soundshare : bool
if they are using soundshare
"""
class VoiceException(Exception):
def __init__(self, msg, client):
@ -33,7 +57,7 @@ class VoiceException(Exception):
class VoiceClient(LoggingClass):
VOICE_GATEWAY_VERSION = 3
VOICE_GATEWAY_VERSION = 4
SUPPORTED_MODES = {
'xsalsa20_poly1305_lite',
@ -58,6 +82,10 @@ class VoiceClient(LoggingClass):
self.packets.on(VoiceOPCode.READY, self.on_voice_ready)
self.packets.on(VoiceOPCode.RESUMED, self.on_voice_resumed)
self.packets.on(VoiceOPCode.SESSION_DESCRIPTION, self.on_voice_sdp)
self.packets.on(VoiceOPCode.SPEAKING, self.on_voice_speaking)
self.packets.on(VoiceOPCode.CLIENT_CONNECT, self.on_voice_client_connect)
self.packets.on(VoiceOPCode.CLIENT_DISCONNECT, self.on_voice_client_disconnect)
self.packets.on(VoiceOPCode.CODECS, self.on_voice_codecs)
# State + state change emitter
self.state = VoiceState.DISCONNECTED
@ -71,6 +99,9 @@ class VoiceClient(LoggingClass):
self.port = None
self.mode = None
self.udp = None
self.audio_codec = None
self.video_codec = None
self.transport_id = None
# Websocket connection
self.ws = None
@ -80,6 +111,10 @@ class VoiceClient(LoggingClass):
self._update_listener = None
self._heartbeat_task = None
# SSRCs
self.audio_ssrcs = {}
def __repr__(self):
return u'<VoiceClient {}>'.format(self.channel)
@ -102,10 +137,11 @@ class VoiceClient(LoggingClass):
self.send(VoiceOPCode.HEARTBEAT, time.time())
gevent.sleep(interval / 1000)
def set_speaking(self, value):
def set_speaking(self, value, delay=0):
self.send(VoiceOPCode.SPEAKING, {
'speaking': value,
'delay': 0,
'speaking': int(value),
'delay': delay,
'ssrc': self.ssrc
})
def send(self, op, data):
@ -114,6 +150,21 @@ class VoiceClient(LoggingClass):
'op': op.value,
'd': data,
}), self.encoder.OPCODE)
def on_voice_client_connect(self, data):
self.audio_ssrcs[data['audio_ssrc']] = data['user_id']
# ignore data['voice_ssrc'] for now
def on_voice_client_disconnect(self, data):
for ssrc in self.audio_ssrcs.keys():
if self.audio_ssrcs[ssrc] == data['user_id']:
del self.audio_ssrcs[ssrc]
break
def on_voice_codecs(self, data):
self.audio_codec = data['audio_codec']
self.video_codec = data['video_codec']
self.transport_id = data['media_session_id']
def on_voice_hello(self, data):
self.log.info('[%s] Recieved Voice HELLO payload, starting heartbeater', self)
@ -143,6 +194,17 @@ class VoiceClient(LoggingClass):
self.log.error('Failed to discover our IP, perhaps a NAT or firewall is fucking us')
self.disconnect()
return
codecs = []
for i in range(len(AudioCodecs)):
codec = AudioCodecs[i]
codecs.append({
'name': codec,
'type': 'audio',
'priority': i * 1000,
'payload_type': PayloadTypes.get(codec.upper())
})
self.log.debug('[%s] IP discovery completed (ip = %s, port = %s), sending SELECT_PROTOCOL', self, ip, port)
self.send(VoiceOPCode.SELECT_PROTOCOL, {
@ -152,6 +214,7 @@ class VoiceClient(LoggingClass):
'address': ip,
'mode': self.mode,
},
'codecs': codecs
})
def on_voice_resumed(self, data):
@ -161,6 +224,11 @@ class VoiceClient(LoggingClass):
def on_voice_sdp(self, sdp):
self.log.info('[%s] Recieved session description, connection completed', self)
self.mode = sdp['mode']
self.audio_codec = sdp['audio_codec']
self.video_codec = sdp['video_codec']
self.transport_id = sdp['media_session_id']
# Create a secret box for encryption/decryption
self.udp.setup_encryption(bytes(bytearray(sdp['secret_key'])))
@ -187,6 +255,16 @@ class VoiceClient(LoggingClass):
self._connect_and_run()
def on_voice_speaking(self, data):
self.audio_ssrcs[data['ssrc']] = data['user_id']
payload = VoiceSpeaking()
payload.user_id = data['user_id']
payload.speaking = (data['speaking'] & SpeakingCodes.VOICE) == SpeakingCodes.VOICE
payload.soundshare = (data['speaking'] & SpeakingCodes.SOUNDSHARE) == SpeakingCodes.SOUNDSHARE
self.client.gw.events.emit('VoiceSpeaking', payload)
def on_message(self, msg):
try:
data = self.encoder.decode(msg)

4
disco/voice/packets.py

@ -11,5 +11,7 @@ VoiceOPCode = Enum(
RESUME=7,
HELLO=8,
RESUMED=9,
CLIENT_CONNECT=12,
CLIENT_DISCONNECT=13,
)
CODECS=14
)

132
disco/voice/udp.py

@ -2,16 +2,59 @@ import struct
import socket
import gevent
from collections import namedtuple
try:
import nacl.secret
except ImportError:
print('WARNING: nacl is not installed, voice support is disabled')
from holster.enum import Enum
from disco.util.logging import LoggingClass
from disco.voice.client import PayloadTypes
MAX_UINT32 = 4294967295
MAX_SEQUENCE = 65535
RTP_HEADER_ONE_BYTE = (0xBE, 0xDE)
class RTPHeader(namedtuple('RTPHeader', ['version', 'padding', 'extension', 'csrc_count', 'marker', 'payload_type', 'sequence', 'timestamp', 'ssrc'])):
"""
RTP Packet's Header information
Attributes
---------
version : integer
the RTP version the packet's using
padding : integer
is this RTP packet using padding
extension : integer
is this RTP packet using extension
csrc_count : integer
marker : integer
is this RTP packet having a marker
payload_type : integer
RTP packet's payload type, currently should only be OPUS data
sequence : integer
RTP packet's sequence
timestamp : integer
RTP packet's timestamp
ssrc : integer
RTP packet's SSRC, the person talking
"""
class VoiceData(namedtuple('VoiceData', ['data', 'user_id', 'rtp'])):
"""
Voice Data received from the UDP socket
Attributes
---------
data : bytes
the decrypted data
user_id: snowflake
the id of the user who sent this data
rtp : RTPHeader
the rtp packet's header data
"""
class UDPVoiceClient(LoggingClass):
def __init__(self, vc):
@ -36,8 +79,8 @@ class UDPVoiceClient(LoggingClass):
# Buffer used for encoding/sending frames
self._buffer = bytearray(24)
self._buffer[0] = 0x80
self._buffer[1] = 0x78
self._buffer[0] = 2 << 6 # Only RTP Version set in the first byte of the header, 0x80
self._buffer[1] = PayloadTypes.OPUS
def increment_timestamp(self, by):
self.timestamp += by
@ -85,7 +128,90 @@ class UDPVoiceClient(LoggingClass):
def run(self):
while True:
self.conn.recvfrom(4096)
data, addr = self.conn.recvfrom(4096)
# Data cannot be less than the bare minimum, just ignore
if len(data) <= 12:
continue
rtp = RTPHeader()
rtp.version = data[1] >> 6
rtp.padding = (data[1] >> 5) & 1
rtp.extension = (data[1] >> 4) & 1
rtp.csrc_count = data[1] & 0x0F
rtp.marker = data[2] >> 7
rtp.payload_type = data[2] & 0x7F
rtp.sequence = struct.unpack('>H', data[2:])
rtp.timestamp = struct.unpack('>I', data[4:])
rtp.ssrc = struct.unpack('>I', data[8:])
# Check if rtp version is 2
if rtp.version != 2:
continue
payload_type = PayloadTypes.get(rtp.payload_type)
# Unsupported payload type received
if not payload_type:
continue
nonce = bytearray(24)
if self.vc.mode == 'xsalsa20_poly1305_lite':
struct.pack_into('>I', nonce, 0, data[-4:])
data = data[-4:]
elif self.vc.mode == 'xsalsa20_poly1305_suffx':
struct.pack_into('>I', nonce, 0, data[-24:])
data = data[-24:]
else:
struct.pack_into('>I', nonce, 0, data[:12])
data = self._secret_box.decrypt(bytes(data[12:]), bytes(nonce))
# RFC3550 Section 5.1 (Padding)
if rtp.padding:
padding_amount = data[:-1]
data = data[-padding_amount:]
if rtp.extension:
# RFC5285 Section 4.2: One-Byte Header
if all(data[i] == RTP_HEADER_ONE_BYTE[i] for i in range(len(RTP_HEADER_ONE_BYTE))):
fields_amount = struct.unpack_from('>H', data)
fields = []
offset = 4
for i in range(fields_amount):
offset += 1
first_byte = data[offset]
rtp_extension_identifer = first_byte & 0xF
rtp_extension_len = ((first_byte >> 4) & 0xF) + 1
# Ignore data if identifer == 15, so skip if this is set as 0
if rtp_extension_identifer:
fields.append(data[offset:offset + rtp_extension_len])
offset += rtp_extension_len
# skip padding
while data[offset] == 0:
offset += 1
if len(fields):
data = b''.join(fields + [data[offset:]])
else:
data = data[offset:]
# RFC3550 Section 5.3: Profile-Specific Modifications to the RTP Header
# clients send it sometimes, definitely on fresh connects to a server, dunno what to do here
if rtp.marker:
continue
user_id = self.vc.audio_ssrcs.get(rtp.ssrc, None)
payload = VoiceData(data=data, user_id=user_id, rtp=rtp)
self.vc.client.gw.events.emit('VoiceReceived', payload)
def send(self, data):
self.conn.sendto(data, (self.ip, self.port))

Loading…
Cancel
Save