initial voice

7 years ago · a7379e0e89
4 changed files with 215 additions and 9 deletions
--- a/disco/gateway/packets.py
+++ b/disco/gateway/packets.py
@ -16,5 +16,5 @@ OPCode = Enum(
    INVALID_SESSION=9,
    HELLO=10,
    HEARTBEAT_ACK=11,
-    GUILD_SYNC=12,
+    GUILD_SYNC=12
 )
--- a/disco/voice/client.py
+++ b/disco/voice/client.py
@ -3,6 +3,8 @@ from __future__ import print_function
 import gevent
 import time

+from collections import namedtuple
+
 from holster.enum import Enum
 from holster.emitter import Emitter

@ -13,6 +15,16 @@ from disco.gateway.packets import OPCode
 from disco.voice.packets import VoiceOPCode
 from disco.voice.udp import UDPVoiceClient

+AudioCodecs = ('opus',)
+
+PayloadTypes = Enum(OPUS=0x78)
+
+SpeakingCodes = Enum(
+    NONE=0
+    VOICE=1 << 0,
+    SOUNDSHARE=1 << 1
+)
+
 VoiceState = Enum(
    DISCONNECTED=0,
    RECONNECTING=1,
@ -25,6 +37,18 @@ VoiceState = Enum(
    VOICE_CONNECTED=8,
 )

+class VoiceSpeaking(namedtuple('VoiceSpeaking', ['user_id', 'speaking', 'soundshare'])):
+    """
+    Voice Speaking Event
+    Attributes
+    ---------
+    user_id : snowflake
+        the id of the user
+    speaking : bool
+        if they are speaking
+    soundshare : bool
+        if they are using soundshare
+    """

 class VoiceException(Exception):
    def __init__(self, msg, client):
@ -33,7 +57,7 @@ class VoiceException(Exception):


 class VoiceClient(LoggingClass):
-    VOICE_GATEWAY_VERSION = 3
+    VOICE_GATEWAY_VERSION = 4

    SUPPORTED_MODES = {
        'xsalsa20_poly1305_lite',
@ -58,6 +82,10 @@ class VoiceClient(LoggingClass):
        self.packets.on(VoiceOPCode.READY, self.on_voice_ready)
        self.packets.on(VoiceOPCode.RESUMED, self.on_voice_resumed)
        self.packets.on(VoiceOPCode.SESSION_DESCRIPTION, self.on_voice_sdp)
+        self.packets.on(VoiceOPCode.SPEAKING, self.on_voice_speaking)
+        self.packets.on(VoiceOPCode.CLIENT_CONNECT, self.on_voice_client_connect)
+        self.packets.on(VoiceOPCode.CLIENT_DISCONNECT, self.on_voice_client_disconnect)
+        self.packets.on(VoiceOPCode.CODECS, self.on_voice_codecs)

        # State + state change emitter
        self.state = VoiceState.DISCONNECTED
@ -71,6 +99,9 @@ class VoiceClient(LoggingClass):
        self.port = None
        self.mode = None
        self.udp = None
+        self.audio_codec = None
+        self.video_codec = None
+        self.transport_id = None

        # Websocket connection
        self.ws = None
@ -80,6 +111,10 @@ class VoiceClient(LoggingClass):
        self._update_listener = None
        self._heartbeat_task = None

+        # SSRCs
+        
+        self.audio_ssrcs = {}
+
    def __repr__(self):
        return u'<VoiceClient {}>'.format(self.channel)

@ -102,10 +137,11 @@ class VoiceClient(LoggingClass):
            self.send(VoiceOPCode.HEARTBEAT, time.time())
            gevent.sleep(interval / 1000)

-    def set_speaking(self, value):
+    def set_speaking(self, value, delay=0):
        self.send(VoiceOPCode.SPEAKING, {
-            'speaking': value,
-            'delay': 0,
+            'speaking': int(value),
+            'delay': delay,
+            'ssrc': self.ssrc
        })

    def send(self, op, data):
@ -114,6 +150,21 @@ class VoiceClient(LoggingClass):
            'op': op.value,
            'd': data,
        }), self.encoder.OPCODE)
+    
+    def on_voice_client_connect(self, data):
+        self.audio_ssrcs[data['audio_ssrc']] = data['user_id']
+        # ignore data['voice_ssrc'] for now
+
+    def on_voice_client_disconnect(self, data):
+        for ssrc in self.audio_ssrcs.keys():
+            if self.audio_ssrcs[ssrc] == data['user_id']:
+                del self.audio_ssrcs[ssrc]
+                break
+
+    def on_voice_codecs(self, data):
+        self.audio_codec = data['audio_codec']
+        self.video_codec = data['video_codec']
+        self.transport_id = data['media_session_id']

    def on_voice_hello(self, data):
        self.log.info('[%s] Recieved Voice HELLO payload, starting heartbeater', self)
@ -143,6 +194,17 @@ class VoiceClient(LoggingClass):
            self.log.error('Failed to discover our IP, perhaps a NAT or firewall is fucking us')
            self.disconnect()
            return
+        
+        codecs = []
+
+        for i in range(len(AudioCodecs)):
+            codec = AudioCodecs[i]
+            codecs.append({
+                'name': codec,
+                'type': 'audio',
+                'priority': i * 1000,
+                'payload_type': PayloadTypes.get(codec.upper())
+            })

        self.log.debug('[%s] IP discovery completed (ip = %s, port = %s), sending SELECT_PROTOCOL', self, ip, port)
        self.send(VoiceOPCode.SELECT_PROTOCOL, {
@ -152,6 +214,7 @@ class VoiceClient(LoggingClass):
                'address': ip,
                'mode': self.mode,
            },
+            'codecs': codecs
        })

    def on_voice_resumed(self, data):
@ -161,6 +224,11 @@ class VoiceClient(LoggingClass):
    def on_voice_sdp(self, sdp):
        self.log.info('[%s] Recieved session description, connection completed', self)

+        self.mode = sdp['mode']
+        self.audio_codec = sdp['audio_codec']
+        self.video_codec = sdp['video_codec']
+        self.transport_id = sdp['media_session_id']
+
        # Create a secret box for encryption/decryption
        self.udp.setup_encryption(bytes(bytearray(sdp['secret_key'])))

@ -187,6 +255,16 @@ class VoiceClient(LoggingClass):

        self._connect_and_run()

+    def on_voice_speaking(self, data):
+        self.audio_ssrcs[data['ssrc']] = data['user_id']
+
+        payload = VoiceSpeaking()
+        payload.user_id = data['user_id']
+        payload.speaking = (data['speaking'] & SpeakingCodes.VOICE) == SpeakingCodes.VOICE
+        payload.soundshare = (data['speaking'] & SpeakingCodes.SOUNDSHARE) == SpeakingCodes.SOUNDSHARE
+
+        self.client.gw.events.emit('VoiceSpeaking', payload)
+
    def on_message(self, msg):
        try:
            data = self.encoder.decode(msg)
--- a/disco/voice/packets.py
+++ b/disco/voice/packets.py
@ -11,5 +11,7 @@ VoiceOPCode = Enum(
    RESUME=7,
    HELLO=8,
    RESUMED=9,
+    CLIENT_CONNECT=12,
    CLIENT_DISCONNECT=13,
-)
+    CODECS=14
+)
--- a/disco/voice/udp.py
+++ b/disco/voice/udp.py
@ -2,16 +2,59 @@ import struct
 import socket
 import gevent

+from collections import namedtuple
+
 try:
    import nacl.secret
 except ImportError:
    print('WARNING: nacl is not installed, voice support is disabled')

+from holster.enum import Enum
+
 from disco.util.logging import LoggingClass
+from disco.voice.client import PayloadTypes

 MAX_UINT32 = 4294967295
 MAX_SEQUENCE = 65535

+RTP_HEADER_ONE_BYTE = (0xBE, 0xDE)
+
+class RTPHeader(namedtuple('RTPHeader', ['version', 'padding', 'extension', 'csrc_count', 'marker', 'payload_type', 'sequence', 'timestamp', 'ssrc'])):
+    """
+    RTP Packet's Header information
+    Attributes
+    ---------
+    version : integer
+        the RTP version the packet's using
+    padding : integer
+        is this RTP packet using padding
+    extension : integer
+        is this RTP packet using extension
+    csrc_count : integer
+    marker : integer
+        is this RTP packet having a marker
+    payload_type : integer
+        RTP packet's payload type, currently should only be OPUS data
+    sequence : integer
+        RTP packet's sequence
+    timestamp : integer
+        RTP packet's timestamp
+    ssrc : integer
+        RTP packet's SSRC, the person talking
+    """
+
+class VoiceData(namedtuple('VoiceData', ['data', 'user_id', 'rtp'])):
+    """
+    Voice Data received from the UDP socket
+    Attributes
+    ---------
+    data : bytes
+        the decrypted data
+    user_id: snowflake
+        the id of the user who sent this data
+    rtp : RTPHeader
+        the rtp packet's header data
+    """

 class UDPVoiceClient(LoggingClass):
    def __init__(self, vc):
@ -36,8 +79,8 @@ class UDPVoiceClient(LoggingClass):

        # Buffer used for encoding/sending frames
        self._buffer = bytearray(24)
-        self._buffer[0] = 0x80
-        self._buffer[1] = 0x78
+        self._buffer[0] = 2 << 6 # Only RTP Version set in the first byte of the header, 0x80
+        self._buffer[1] = PayloadTypes.OPUS

    def increment_timestamp(self, by):
        self.timestamp += by
@ -85,7 +128,90 @@ class UDPVoiceClient(LoggingClass):

    def run(self):
        while True:
-            self.conn.recvfrom(4096)
+            data, addr = self.conn.recvfrom(4096)
+
+            # Data cannot be less than the bare minimum, just ignore
+            if len(data) <= 12:
+                continue
+            
+            rtp = RTPHeader()
+            rtp.version = data[1] >> 6
+            rtp.padding = (data[1] >> 5) & 1
+            rtp.extension = (data[1] >> 4) & 1
+            rtp.csrc_count = data[1] & 0x0F
+            
+            rtp.marker = data[2] >> 7
+            rtp.payload_type = data[2] & 0x7F
+
+            rtp.sequence = struct.unpack('>H', data[2:])
+            rtp.timestamp = struct.unpack('>I', data[4:])
+            rtp.ssrc = struct.unpack('>I', data[8:])
+
+            # Check if rtp version is 2
+            if rtp.version != 2:
+                continue
+            
+            payload_type = PayloadTypes.get(rtp.payload_type)
+
+            # Unsupported payload type received
+            if not payload_type:
+                continue
+
+            nonce = bytearray(24)
+            if self.vc.mode == 'xsalsa20_poly1305_lite':
+                struct.pack_into('>I', nonce, 0, data[-4:])
+                data = data[-4:]
+            elif self.vc.mode == 'xsalsa20_poly1305_suffx':
+                struct.pack_into('>I', nonce, 0, data[-24:])
+                data = data[-24:]
+            else:
+                struct.pack_into('>I', nonce, 0, data[:12])
+            
+            data = self._secret_box.decrypt(bytes(data[12:]), bytes(nonce))
+
+            # RFC3550 Section 5.1 (Padding)
+            if rtp.padding:
+                padding_amount = data[:-1]
+                data = data[-padding_amount:]
+            
+            if rtp.extension:
+                # RFC5285 Section 4.2: One-Byte Header
+                if all(data[i] == RTP_HEADER_ONE_BYTE[i] for i in range(len(RTP_HEADER_ONE_BYTE))):
+                    fields_amount = struct.unpack_from('>H', data)
+                    fields = []
+
+                    offset = 4
+                    for i in range(fields_amount):
+                        offset += 1
+                        first_byte = data[offset]
+                        
+                        rtp_extension_identifer = first_byte & 0xF
+                        rtp_extension_len = ((first_byte >> 4) & 0xF) + 1
+
+                        # Ignore data if identifer == 15, so skip if this is set as 0
+                        if rtp_extension_identifer:
+                            fields.append(data[offset:offset + rtp_extension_len])
+                        
+                        offset += rtp_extension_len
+
+                        # skip padding
+                        while data[offset] == 0:
+                            offset += 1
+                    
+                    if len(fields):
+                        data = b''.join(fields + [data[offset:]])
+                    else:
+                        data = data[offset:]
+            
+            # RFC3550 Section 5.3: Profile-Specific Modifications to the RTP Header
+			# clients send it sometimes, definitely on fresh connects to a server, dunno what to do here
+            if rtp.marker:
+                continue
+            
+            user_id = self.vc.audio_ssrcs.get(rtp.ssrc, None)
+            payload = VoiceData(data=data, user_id=user_id, rtp=rtp)
+
+            self.vc.client.gw.events.emit('VoiceReceived', payload)

    def send(self, data):
        self.conn.sendto(data, (self.ip, self.port))