diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fbbab3..e4e177f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,15 +42,23 @@ set(MUMLIB_SRC src/Audio.cpp ) +set(SPEEX_LIBRARIES + speex + speexdsp + ) + PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS Mumble.proto) add_library(mumlib SHARED ${MUMLIB_SRC} ${MUMLIB_PUBLIC_HEADERS} ${MUMLIB_PRIVATE_HEADERS} ${PROTO_SRCS} ${PROTO_HDRS}) -target_link_libraries(mumlib ${PROTOBUF_LIBRARIES}) -target_link_libraries(mumlib ${Boost_LIBRARIES}) -target_link_libraries(mumlib ${OPENSSL_LIBRARIES}) -target_link_libraries(mumlib ${LOG4CPP_LIBRARIES}) -target_link_libraries(mumlib ${OPUS_LIBRARIES}) +target_link_libraries(mumlib + ${SPEEX_LIBRARIES} + ${PROTOBUF_LIBRARIES} + ${Boost_LIBRARIES} + ${OPENSSL_LIBRARIES} + ${LOG4CPP_LIBRARIES} + ${OPUS_LIBRARIES}) +# add_executable(mumlib_example mumlib_example.cpp) +# target_link_libraries(mumlib_example mumlib) -add_executable(mumlib_example mumlib_example.cpp) -target_link_libraries(mumlib_example mumlib) +install(TARGETS mumlib LIBRARY DESTINATION lib) diff --git a/include/mumlib.hpp b/include/mumlib.hpp index cab8e31..08297d1 100644 --- a/include/mumlib.hpp +++ b/include/mumlib.hpp @@ -79,9 +79,9 @@ namespace mumlib { void joinChannel(std::string channelName); - void sendVoiceTarget(mumlib::VoiceTargetType type, int targetId, int id); + void sendVoiceTarget(int targetId, mumlib::VoiceTargetType type, int sessionId); - bool sendVoiceTarget(mumlib::VoiceTargetType type, int targetId, std::string name); + void sendVoiceTarget(int targetId, mumlib::VoiceTargetType type, std::string name, int &error); void sendUserState(mumlib::UserState state, bool val); @@ -93,5 +93,9 @@ namespace mumlib { int getChannelIdBy(std::string channelName); int getUserIdBy(std::string userName); + + bool isSessionIdValid(int sessionId); + + bool isChannelIdValid(int channelId); }; } diff --git a/include/mumlib/Audio.hpp b/include/mumlib/Audio.hpp index 7e1433a..19d89a3 100644 --- a/include/mumlib/Audio.hpp +++ b/include/mumlib/Audio.hpp @@ -4,6 +4,8 @@ #include +#include + #include namespace mumlib { @@ -26,14 +28,24 @@ namespace mumlib { class Audio : boost::noncopyable { public: - explicit Audio(int opusSampleRate=DEFAULT_OPUS_SAMPLE_RATE, - int opusEncoderBitrate=DEFAULT_OPUS_ENCODER_BITRATE, + explicit Audio(int sampleRate=DEFAULT_OPUS_SAMPLE_RATE, + int bitrate=DEFAULT_OPUS_ENCODER_BITRATE, int channels=DEFAULT_OPUS_NUM_CHANNELS); virtual ~Audio(); IncomingAudioPacket decodeIncomingAudioPacket(uint8_t *inputBuffer, int inputBufferLength); + void addFrameToBuffer(uint8_t *inputBuffer, int inputLength, int sequence); + + // todo: mix audio + void mixAudio(uint8_t *dest, uint8_t *src, int bufferOffset, int inputLength); + + void resizeBuffer(); + + std::pair decodeOpusPayload(int16_t *pcmBuffer, + int pcmBufferSize); + std::pair decodeOpusPayload(uint8_t *inputBuffer, int inputLength, int16_t *pcmBuffer, @@ -52,14 +64,24 @@ namespace mumlib { void resetEncoder(); + void resetJitterBuffer(); + private: log4cpp::Category &logger; OpusDecoder *opusDecoder; OpusEncoder *opusEncoder; + JitterBuffer *jbBuffer; int64_t outgoingSequenceNumber; - int sampleRate; + + unsigned int iSampleRate; + unsigned int iChannels; + unsigned int iFrameSize; + unsigned int iAudioBufferSize; + + float *fFadeIn; + float *fFadeOut; std::chrono::time_point lastEncodedAudioPacketTimestamp; }; diff --git a/src/Audio.cpp b/src/Audio.cpp index 60277df..393b674 100644 --- a/src/Audio.cpp +++ b/src/Audio.cpp @@ -4,30 +4,69 @@ static boost::posix_time::seconds RESET_SEQUENCE_NUMBER_INTERVAL(5); -mumlib::Audio::Audio(int opusSampleRate, int opusEncoderBitrate, int channels) +mumlib::Audio::Audio(int sampleRate, int bitrate, int channels) : logger(log4cpp::Category::getInstance("mumlib.Audio")), opusDecoder(nullptr), opusEncoder(nullptr), - outgoingSequenceNumber(0) { + outgoingSequenceNumber(0), + iSampleRate(sampleRate), + iChannels(channels) { - int error; - this->sampleRate = opusSampleRate; + int error, ret; + iFrameSize = sampleRate / 100; + iAudioBufferSize = iFrameSize; + iAudioBufferSize *= 12; - opusDecoder = opus_decoder_create(opusSampleRate, channels, &error); + opusDecoder = opus_decoder_create(sampleRate, channels, &error); if (error != OPUS_OK) { throw AudioException((boost::format("failed to initialize OPUS decoder: %s") % opus_strerror(error)).str()); } - opusEncoder = opus_encoder_create(opusSampleRate, channels, OPUS_APPLICATION_VOIP, &error); + opusEncoder = opus_encoder_create(sampleRate, channels, OPUS_APPLICATION_VOIP, &error); if (error != OPUS_OK) { throw AudioException((boost::format("failed to initialize OPUS encoder: %s") % opus_strerror(error)).str()); } - opus_encoder_ctl(opusEncoder, OPUS_SET_VBR(0)); - - setOpusEncoderBitrate(opusEncoderBitrate); + ret = opus_encoder_ctl(opusEncoder, OPUS_SET_BITRATE(bitrate)); + if (ret != OPUS_OK) { + throw AudioException((boost::format("failed to initialize transmission bitrate to %d B/s: %s") + % bitrate % opus_strerror(ret)).str()); + } + ret = opus_encoder_ctl(opusEncoder, OPUS_SET_VBR(0)); + if (ret != OPUS_OK) { + throw AudioException((boost::format("failed to initialize variable bitrate: %s") + % opus_strerror(ret)).str()); + } + ret = opus_encoder_ctl(opusEncoder, OPUS_SET_VBR_CONSTRAINT(0)); + if (ret != OPUS_OK) { + throw AudioException((boost::format("failed to initialize variable bitrate constraint: %s") + % opus_strerror(ret)).str()); + } + ret = opus_encoder_ctl(opusEncoder, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND)); + if (ret != OPUS_OK) { + throw AudioException((boost::format("failed to initialize bandwidth narrow: %s") + % opus_strerror(ret)).str()); + } + ret = opus_encoder_ctl(opusEncoder, OPUS_SET_MAX_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND)); + if (ret != OPUS_OK) { + throw AudioException((boost::format("failed to initialize maximum bandwidth narrow: %s") + % opus_strerror(ret)).str()); + } resetEncoder(); + + jbBuffer = jitter_buffer_init(iFrameSize); + int margin = 10 * iFrameSize; + jitter_buffer_ctl(jbBuffer, JITTER_BUFFER_SET_MARGIN, &margin); + + fFadeIn = new float[iFrameSize]; + fFadeOut = new float[iFrameSize]; + + // Sine function to represent fade in/out. Period is FRAME_SIZE. + float mul = static_cast(M_PI / 2.0 * static_cast(iFrameSize)); + for(unsigned int i = 0; i < iFrameSize; i++) { + fFadeIn[i] = fFadeOut[iFrameSize - 1 - 1] = sinf(static_cast(i) * mul); + } } mumlib::Audio::~Audio() { @@ -38,6 +77,11 @@ mumlib::Audio::~Audio() { if (opusEncoder) { opus_encoder_destroy(opusEncoder); } + + jitter_buffer_destroy(jbBuffer); + + delete[] fFadeIn; + delete[] fFadeOut; } void mumlib::Audio::setOpusEncoderBitrate(int bitrate) { @@ -57,6 +101,105 @@ int mumlib::Audio::getOpusEncoderBitrate() { return bitrate; } +void mumlib::Audio::addFrameToBuffer(uint8_t *inputBuffer, int inputLength, int sequence) { + int dataPointer = 0; + VarInt varInt(inputBuffer); + int opusDataLength = varInt.getValue(); + dataPointer += varInt.getEncoded().size(); + bool lastPacket = (opusDataLength & 0x2000) != 0; + opusDataLength &= 0x1fff; + + auto *packet = reinterpret_cast(&inputBuffer[dataPointer]); + int frame = opus_packet_get_nb_frames(packet, opusDataLength); + int samples = frame * opus_packet_get_samples_per_frame(packet, iSampleRate); + int channel = opus_packet_get_nb_channels(packet); + + if(not sequence) { + resetJitterBuffer(); + } + + logger.info("Opus packet, frame: %d, samples: %d, channel: %d", frame, samples, channel); + + JitterBufferPacket jbPacket; + jbPacket.data = reinterpret_cast(&inputBuffer[dataPointer]); + jbPacket.len = opusDataLength; + jbPacket.span = samples; + jbPacket.timestamp = iFrameSize * sequence; + jbPacket.user_data = lastPacket; + + jitter_buffer_put(jbBuffer, &jbPacket); +} + +std::pair mumlib::Audio::decodeOpusPayload(int16_t *pcmBuffer, int pcmBufferSize) { + int avail = 0; + spx_uint32_t remaining = 0; + jitter_buffer_ctl(jbBuffer, JITTER_BUFFER_GET_AVAILABLE_COUNT, &avail); + jitter_buffer_remaining_span(jbBuffer, remaining); + int timestamp = jitter_buffer_get_pointer_timestamp(jbBuffer); + + logger.warn("jbBufer, avail: %d, remain: %d, timestamp: %d", avail, remaining, timestamp); + + char data[4096]; + JitterBufferPacket jbPacket; + jbPacket.data = data; + jbPacket.len = 4096; + + spx_int32_t startofs = 0; + int opusDataLength; + int outputSize; + spx_uint32_t lastPacket; + + if(jitter_buffer_get(jbBuffer, &jbPacket, iFrameSize, &startofs) == JITTER_BUFFER_OK) { + opusDataLength = jbPacket.len; + lastPacket = jbPacket.user_data; + } else { + jitter_buffer_update_delay(jbBuffer, &jbPacket, NULL); + } + + if(opusDataLength) { + outputSize = opus_decode(opusDecoder, + reinterpret_cast(jbPacket.data), + jbPacket.len, + pcmBuffer, + pcmBufferSize, 0); + } else { + outputSize = opus_decode(opusDecoder, + NULL, 0, pcmBuffer, pcmBufferSize, 0); + } + + if(outputSize < 0) { + outputSize = iFrameSize; + memset(pcmBuffer, 0, iFrameSize * sizeof(float)); + } + + if(lastPacket) { + for(unsigned int i = 0; i < iFrameSize; i++) + pcmBuffer[i] *= fFadeOut[i]; + } + + for (int i = outputSize / iFrameSize; i > 0; --i) { + jitter_buffer_tick(jbBuffer); + } + + logger.debug("%d B of Opus data decoded to %d PCM samples, last packet: %d.", + opusDataLength, outputSize, lastPacket); + + return std::make_pair(outputSize, lastPacket); +} + +void mumlib::Audio::mixAudio(uint8_t *dest, uint8_t *src, int bufferOffset, int inputLength) { + for(int i = 0; i < inputLength; i++) { + float mix = 0; + + // Clip to [-1,1] + if(mix > 1) + mix = 1; + else if(mix < -1) + mix = -1; + dest[i + bufferOffset] = mix; + } +} + std::pair mumlib::Audio::decodeOpusPayload(uint8_t *inputBuffer, int inputLength, int16_t *pcmBuffer, @@ -83,11 +226,11 @@ std::pair mumlib::Audio::decodeOpusPayload(uint8_t *inputBuffer, // Lost packets can be replaced with loss concealment by calling the decoder with a null pointer and zero length for the missing packet. // A single codec state may only be accessed from a single thread at a time and any required locking must be performed by the caller. // Separate streams must be decoded with separate decoder states and can be decoded in parallel unless the library was compiled with NONTHREADSAFE_PSEUDOSTACK defined. - - int frame = opus_packet_get_nb_frames(&inputBuffer[dataPointer], opusDataLength); - int samples = frame * opus_packet_get_samples_per_frame(&inputBuffer[dataPointer], sampleRate); + auto *packet = reinterpret_cast(&inputBuffer[dataPointer]); + int frame = opus_packet_get_nb_frames(packet, opusDataLength); + int samples = frame * opus_packet_get_samples_per_frame(packet, iSampleRate); int outputSize = opus_decode(opusDecoder, - reinterpret_cast(&inputBuffer[dataPointer]), + packet, opusDataLength, pcmBuffer, pcmBufferSize, @@ -143,7 +286,7 @@ int mumlib::Audio::encodeAudioPacket(int target, int16_t *inputPcmBuffer, int in memcpy(outputBuffer, &header[0], header.size()); memcpy(outputBuffer + header.size(), tmpOpusBuffer, (size_t) outputSize); - int incrementNumber = 100 * inputLength / sampleRate; + int incrementNumber = 100 * inputLength / iSampleRate; outgoingSequenceNumber += incrementNumber; @@ -162,6 +305,11 @@ void mumlib::Audio::resetEncoder() { outgoingSequenceNumber = 0; } +void mumlib::Audio::resetJitterBuffer() { + logger.debug("Last audio packet, resetting jitter buffer"); + jitter_buffer_reset(jbBuffer); +} + mumlib::IncomingAudioPacket mumlib::Audio::decodeIncomingAudioPacket(uint8_t *inputBuffer, int inputBufferLength) { mumlib::IncomingAudioPacket incomingAudioPacket{}; diff --git a/src/mumlib.cpp b/src/mumlib.cpp index c8101b5..3c85cec 100644 --- a/src/mumlib.cpp +++ b/src/mumlib.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include @@ -33,6 +33,7 @@ namespace mumlib { int sessionId = 0; int channelId = 0; + int64_t seq = 0; std::vector listMumbleUser; std::vector listMumbleChannel; @@ -65,19 +66,27 @@ namespace mumlib { auto incomingAudioPacket = audio.decodeIncomingAudioPacket(buffer, length); if (type == AudioPacketType::OPUS) { - // todo: multiple users speaking simultaneously (Issue #3) - // something weird while decoding the opus payload int16_t pcmData[5000]; - auto status = audio.decodeOpusPayload(incomingAudioPacket.audioPayload, - incomingAudioPacket.audioPayloadLength, - pcmData, - 5000); + + audio.addFrameToBuffer(incomingAudioPacket.audioPayload, + incomingAudioPacket.audioPayloadLength, + seq); + + auto status = audio.decodeOpusPayload(pcmData, 5000); + + // auto status = audio.decodeOpusPayload(incomingAudioPacket.audioPayload, + // incomingAudioPacket.audioPayloadLength, + // pcmData, + // 5000); + + if(status.second) seq = 0; else seq++; callback.audio(incomingAudioPacket.target, - incomingAudioPacket.sessionId, - incomingAudioPacket.sequenceNumber, - pcmData, - status.first); + incomingAudioPacket.sessionId, + incomingAudioPacket.sequenceNumber, + pcmData, + status.first); + } else { logger.warn("Incoming audio packet doesn't contain Opus data, calling unsupportedAudio callback."); callback.unsupportedAudio(incomingAudioPacket.target, @@ -377,9 +386,12 @@ namespace mumlib { } void listUserRemovedBy(int sessionId) { - for(int i = 0; i < listMumbleUser.size(); i++) - if(listMumbleUser[i].sessionId == sessionId) + for(int i = 0; i < listMumbleUser.size(); i++) { + if(listMumbleUser[i].sessionId == sessionId) { listMumbleUser.erase(listMumbleUser.begin() + i); + return; + } + } } bool isListChannelContains(int channelId) { @@ -390,9 +402,12 @@ namespace mumlib { } void listChannelRemovedBy(int channelId) { - for(int i = 0; i < listMumbleChannel.size(); i++) - if(listMumbleChannel[i].channelId == channelId) + for(int i = 0; i < listMumbleChannel.size(); i++) { + if(listMumbleChannel[i].channelId == channelId) { listMumbleChannel.erase(listMumbleChannel.begin() + i); + return; + } + } } }; @@ -474,6 +489,8 @@ namespace mumlib { } void Mumlib::joinChannel(int channelId) { + if(!isChannelIdValid(channelId)) // when channel has not been registered / create + return; MumbleProto::UserState userState; userState.set_channel_id(channelId); impl->transport.sendControlMessage(MessageType::USERSTATE, userState); @@ -482,11 +499,10 @@ namespace mumlib { void Mumlib::joinChannel(string name) { int channelId = Mumlib::getChannelIdBy(name); - if(!channelId < 0) // when channel has not been registered / create - Mumlib::joinChannel(channelId); + Mumlib::joinChannel(channelId); } - void Mumlib::sendVoiceTarget(VoiceTargetType type, int targetId, int id) { + void Mumlib::sendVoiceTarget(int targetId, VoiceTargetType type, int id) { MumbleProto::VoiceTarget voiceTarget; MumbleProto::VoiceTarget_Target voiceTargetTarget; switch(type) { @@ -507,8 +523,8 @@ namespace mumlib { impl->transport.sendControlMessage(MessageType::VOICETARGET, voiceTarget); } - bool Mumlib::sendVoiceTarget(VoiceTargetType type, int targetId, string name) { - int id = -1; + void Mumlib::sendVoiceTarget(int targetId, VoiceTargetType type, string name, int &error) { + int id; switch(type) { case VoiceTargetType::CHANNEL: id = getChannelIdBy(name); @@ -519,10 +535,9 @@ namespace mumlib { default: break; } - if(id < 0) - return false; - sendVoiceTarget(type, targetId, id); - return true; + error = id < 0 ? 1: 0; + if(error) return; + sendVoiceTarget(targetId, type, id); } void Mumlib::sendUserState(mumlib::UserState field, bool val) { @@ -560,26 +575,22 @@ namespace mumlib { void Mumlib::sendUserState(mumlib::UserState field, std::string val) { MumbleProto::UserState userState; + + // if comment longer than 128 bytes, we need to set the SHA1 hash + // http://www.askyb.com/cpp/openssl-sha1-hashing-example-in-cpp/ + unsigned char digest[SHA_DIGEST_LENGTH]; + char mdString[SHA_DIGEST_LENGTH * 2 + 1]; + SHA1((unsigned char*) val.c_str(), val.size(), digest); + for(int i = 0; i < SHA_DIGEST_LENGTH; i++) + sprintf(&mdString[i*2], "%02x", (unsigned int) digest[i]); + switch (field) { case UserState::COMMENT: - - if(val.size() < 128) { + if(val.size() < 128) userState.set_comment(val); - } else { - // if comment longer than 128 bytes, we need to set the SHA1 hash - boost::uuids::detail::sha1 sha1; - uint hash[5]; - sha1.process_bytes(val.c_str(), val.size()); - sha1.get_digest(hash); - - std::stringstream valStream; - for(std::size_t i=0; i listMumbleChannel = impl->listMumbleChannel; - int channelId = -1; for(int i = 0; i < listMumbleChannel.size(); i++) if(listMumbleChannel[i].name == name) - channelId = listMumbleChannel[i].channelId; - return channelId; + return listMumbleChannel[i].channelId; + return -1; } int Mumlib::getUserIdBy(string name) { vector listMumbleUser = impl->listMumbleUser; - int sessionId = -1; for(int i = 0; i < listMumbleUser.size(); i++) if(listMumbleUser[i].name == name) - sessionId = listMumbleUser[i].sessionId; - return sessionId; + return listMumbleUser[i].sessionId; + return -1; + } + + bool Mumlib::isSessionIdValid(int sessionId) { + vector listMumbleUser = impl->listMumbleUser; + for(int i = 0; i < listMumbleUser.size(); i++) + if(listMumbleUser[i].sessionId == sessionId) + return true; + return false; + } + + bool Mumlib::isChannelIdValid(int channelId) { + vector listMumbleChannel = impl->listMumbleChannel; + for(int i = 0; i < listMumbleChannel.size(); i++) + if(listMumbleChannel[i].channelId == channelId) + return true; + return false; } }