349 lines
13 KiB
C++
349 lines
13 KiB
C++
#include "mumlib/Audio.hpp"
|
|
|
|
#include <boost/format.hpp>
|
|
|
|
static boost::posix_time::seconds RESET_SEQUENCE_NUMBER_INTERVAL(5);
|
|
|
|
mumlib::Audio::Audio(int sampleRate, int bitrate, int channels)
|
|
: logger(log4cpp::Category::getInstance("mumlib.Audio")),
|
|
opusDecoder(nullptr),
|
|
opusEncoder(nullptr),
|
|
outgoingSequenceNumber(0),
|
|
iSampleRate(sampleRate),
|
|
iChannels(channels) {
|
|
|
|
int error, ret;
|
|
iFrameSize = sampleRate / 100;
|
|
iAudioBufferSize = iFrameSize;
|
|
iAudioBufferSize *= 12;
|
|
|
|
opusDecoder = opus_decoder_create(sampleRate, channels, &error);
|
|
if (error != OPUS_OK) {
|
|
throw AudioException((boost::format("failed to initialize OPUS decoder: %s") % opus_strerror(error)).str());
|
|
}
|
|
|
|
opusEncoder = opus_encoder_create(sampleRate, channels, OPUS_APPLICATION_VOIP, &error);
|
|
if (error != OPUS_OK) {
|
|
throw AudioException((boost::format("failed to initialize OPUS encoder: %s") % opus_strerror(error)).str());
|
|
}
|
|
|
|
ret = opus_encoder_ctl(opusEncoder, OPUS_SET_BITRATE(bitrate));
|
|
if (ret != OPUS_OK) {
|
|
throw AudioException((boost::format("failed to initialize transmission bitrate to %d B/s: %s")
|
|
% bitrate % opus_strerror(ret)).str());
|
|
}
|
|
ret = opus_encoder_ctl(opusEncoder, OPUS_SET_VBR(0));
|
|
if (ret != OPUS_OK) {
|
|
throw AudioException((boost::format("failed to initialize variable bitrate: %s")
|
|
% opus_strerror(ret)).str());
|
|
}
|
|
ret = opus_encoder_ctl(opusEncoder, OPUS_SET_VBR_CONSTRAINT(0));
|
|
if (ret != OPUS_OK) {
|
|
throw AudioException((boost::format("failed to initialize variable bitrate constraint: %s")
|
|
% opus_strerror(ret)).str());
|
|
}
|
|
ret = opus_encoder_ctl(opusEncoder, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND));
|
|
if (ret != OPUS_OK) {
|
|
throw AudioException((boost::format("failed to initialize bandwidth narrow: %s")
|
|
% opus_strerror(ret)).str());
|
|
}
|
|
ret = opus_encoder_ctl(opusEncoder, OPUS_SET_MAX_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND));
|
|
if (ret != OPUS_OK) {
|
|
throw AudioException((boost::format("failed to initialize maximum bandwidth narrow: %s")
|
|
% opus_strerror(ret)).str());
|
|
}
|
|
|
|
resetEncoder();
|
|
|
|
jbBuffer = jitter_buffer_init(iFrameSize);
|
|
int margin = 10 * iFrameSize;
|
|
jitter_buffer_ctl(jbBuffer, JITTER_BUFFER_SET_MARGIN, &margin);
|
|
|
|
fFadeIn = new float[iFrameSize];
|
|
fFadeOut = new float[iFrameSize];
|
|
|
|
// Sine function to represent fade in/out. Period is FRAME_SIZE.
|
|
float mul = static_cast<float>(M_PI / 2.0 * static_cast<double>(iFrameSize));
|
|
for(unsigned int i = 0; i < iFrameSize; i++) {
|
|
fFadeIn[i] = fFadeOut[iFrameSize - 1 - 1] = sinf(static_cast<float>(i) * mul);
|
|
}
|
|
}
|
|
|
|
mumlib::Audio::~Audio() {
|
|
if (opusDecoder) {
|
|
opus_decoder_destroy(opusDecoder);
|
|
}
|
|
|
|
if (opusEncoder) {
|
|
opus_encoder_destroy(opusEncoder);
|
|
}
|
|
|
|
jitter_buffer_destroy(jbBuffer);
|
|
|
|
delete[] fFadeIn;
|
|
delete[] fFadeOut;
|
|
}
|
|
|
|
void mumlib::Audio::setOpusEncoderBitrate(int bitrate) {
|
|
int error = opus_encoder_ctl(opusEncoder, OPUS_SET_BITRATE(bitrate));
|
|
if (error != OPUS_OK) {
|
|
throw AudioException((boost::format("failed to initialize transmission bitrate to %d B/s: %s")
|
|
% bitrate % opus_strerror(error)).str());
|
|
}
|
|
}
|
|
|
|
int mumlib::Audio::getOpusEncoderBitrate() {
|
|
opus_int32 bitrate;
|
|
int error = opus_encoder_ctl(opusEncoder, OPUS_GET_BITRATE(&bitrate));
|
|
if (error != OPUS_OK) {
|
|
throw AudioException((boost::format("failed to read Opus bitrate: %s") % opus_strerror(error)).str());
|
|
}
|
|
return bitrate;
|
|
}
|
|
|
|
void mumlib::Audio::addFrameToBuffer(uint8_t *inputBuffer, int inputLength, int sequence) {
|
|
int dataPointer = 0;
|
|
VarInt varInt(inputBuffer);
|
|
int opusDataLength = varInt.getValue();
|
|
dataPointer += varInt.getEncoded().size();
|
|
bool lastPacket = (opusDataLength & 0x2000) != 0;
|
|
opusDataLength &= 0x1fff;
|
|
|
|
auto *packet = reinterpret_cast<const unsigned char *>(&inputBuffer[dataPointer]);
|
|
int frame = opus_packet_get_nb_frames(packet, opusDataLength);
|
|
int samples = frame * opus_packet_get_samples_per_frame(packet, iSampleRate);
|
|
int channel = opus_packet_get_nb_channels(packet);
|
|
|
|
if(not sequence) {
|
|
resetJitterBuffer();
|
|
}
|
|
|
|
logger.info("Opus packet, frame: %d, samples: %d, channel: %d", frame, samples, channel);
|
|
|
|
JitterBufferPacket jbPacket;
|
|
jbPacket.data = reinterpret_cast<char *>(&inputBuffer[dataPointer]);
|
|
jbPacket.len = opusDataLength;
|
|
jbPacket.span = samples;
|
|
jbPacket.timestamp = iFrameSize * sequence;
|
|
jbPacket.user_data = lastPacket;
|
|
|
|
jitter_buffer_put(jbBuffer, &jbPacket);
|
|
}
|
|
|
|
std::pair<int, bool> mumlib::Audio::decodeOpusPayload(int16_t *pcmBuffer, int pcmBufferSize) {
|
|
int avail = 0;
|
|
spx_uint32_t remaining = 0;
|
|
jitter_buffer_ctl(jbBuffer, JITTER_BUFFER_GET_AVAILABLE_COUNT, &avail);
|
|
jitter_buffer_remaining_span(jbBuffer, remaining);
|
|
int timestamp = jitter_buffer_get_pointer_timestamp(jbBuffer);
|
|
|
|
logger.warn("jbBufer, avail: %d, remain: %d, timestamp: %d", avail, remaining, timestamp);
|
|
|
|
char data[4096];
|
|
JitterBufferPacket jbPacket;
|
|
jbPacket.data = data;
|
|
jbPacket.len = 4096;
|
|
|
|
spx_int32_t startofs = 0;
|
|
int opusDataLength;
|
|
int outputSize;
|
|
spx_uint32_t lastPacket;
|
|
|
|
if(jitter_buffer_get(jbBuffer, &jbPacket, iFrameSize, &startofs) == JITTER_BUFFER_OK) {
|
|
opusDataLength = jbPacket.len;
|
|
lastPacket = jbPacket.user_data;
|
|
} else {
|
|
jitter_buffer_update_delay(jbBuffer, &jbPacket, NULL);
|
|
}
|
|
|
|
if(opusDataLength) {
|
|
outputSize = opus_decode(opusDecoder,
|
|
reinterpret_cast<const unsigned char *>(jbPacket.data),
|
|
jbPacket.len,
|
|
pcmBuffer,
|
|
pcmBufferSize, 0);
|
|
} else {
|
|
outputSize = opus_decode(opusDecoder,
|
|
NULL, 0, pcmBuffer, pcmBufferSize, 0);
|
|
}
|
|
|
|
if(outputSize < 0) {
|
|
outputSize = iFrameSize;
|
|
memset(pcmBuffer, 0, iFrameSize * sizeof(float));
|
|
}
|
|
|
|
if(lastPacket) {
|
|
for(unsigned int i = 0; i < iFrameSize; i++)
|
|
pcmBuffer[i] *= fFadeOut[i];
|
|
}
|
|
|
|
for (int i = outputSize / iFrameSize; i > 0; --i) {
|
|
jitter_buffer_tick(jbBuffer);
|
|
}
|
|
|
|
logger.debug("%d B of Opus data decoded to %d PCM samples, last packet: %d.",
|
|
opusDataLength, outputSize, lastPacket);
|
|
|
|
return std::make_pair(outputSize, lastPacket);
|
|
}
|
|
|
|
void mumlib::Audio::mixAudio(uint8_t *dest, uint8_t *src, int bufferOffset, int inputLength) {
|
|
for(int i = 0; i < inputLength; i++) {
|
|
float mix = 0;
|
|
|
|
// Clip to [-1,1]
|
|
if(mix > 1)
|
|
mix = 1;
|
|
else if(mix < -1)
|
|
mix = -1;
|
|
dest[i + bufferOffset] = mix;
|
|
}
|
|
}
|
|
|
|
std::pair<int, bool> mumlib::Audio::decodeOpusPayload(uint8_t *inputBuffer,
|
|
int inputLength,
|
|
int16_t *pcmBuffer,
|
|
int pcmBufferSize) {
|
|
int64_t opusDataLength;
|
|
|
|
int dataPointer = 0;
|
|
VarInt varInt(inputBuffer);
|
|
opusDataLength = varInt.getValue();
|
|
dataPointer += varInt.getEncoded().size();
|
|
|
|
bool lastPacket = (opusDataLength & 0x2000) != 0;
|
|
opusDataLength = opusDataLength & 0x1fff;
|
|
|
|
if (inputLength < opusDataLength + dataPointer) {
|
|
throw AudioException((boost::format("invalid Opus payload (%d B): header %d B, expected Opus data length %d B")
|
|
% inputLength % dataPointer % opusDataLength).str());
|
|
}
|
|
|
|
// Issue #3 (Users speaking simultaneously)
|
|
// https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/group__opus__decoder.html
|
|
// Opus is a stateful codec with overlapping blocks and as a result Opus packets are not coded independently of each other.
|
|
// Packets must be passed into the decoder serially and in the correct order for a correct decode.
|
|
// Lost packets can be replaced with loss concealment by calling the decoder with a null pointer and zero length for the missing packet.
|
|
// A single codec state may only be accessed from a single thread at a time and any required locking must be performed by the caller.
|
|
// Separate streams must be decoded with separate decoder states and can be decoded in parallel unless the library was compiled with NONTHREADSAFE_PSEUDOSTACK defined.
|
|
auto *packet = reinterpret_cast<const unsigned char *>(&inputBuffer[dataPointer]);
|
|
int frame = opus_packet_get_nb_frames(packet, opusDataLength);
|
|
int samples = frame * opus_packet_get_samples_per_frame(packet, iSampleRate);
|
|
int outputSize = opus_decode(opusDecoder,
|
|
packet,
|
|
opusDataLength,
|
|
pcmBuffer,
|
|
pcmBufferSize,
|
|
0);
|
|
|
|
if (outputSize <= 0) {
|
|
throw AudioException((boost::format("failed to decode %d B of OPUS data: %s") % inputLength %
|
|
opus_strerror(outputSize)).str());
|
|
}
|
|
|
|
logger.debug("%d B of Opus data decoded to %d PCM samples, last packet: %d.",
|
|
opusDataLength, outputSize, lastPacket);
|
|
|
|
return std::make_pair(outputSize, lastPacket);
|
|
}
|
|
|
|
int mumlib::Audio::encodeAudioPacket(int target, int16_t *inputPcmBuffer, int inputLength, uint8_t *outputBuffer,
|
|
int outputBufferSize) {
|
|
|
|
using namespace std::chrono;
|
|
|
|
const int lastAudioPacketSentInterval = duration_cast<milliseconds>(
|
|
system_clock::now() - lastEncodedAudioPacketTimestamp).count();
|
|
|
|
if (lastAudioPacketSentInterval > RESET_SEQUENCE_NUMBER_INTERVAL.total_milliseconds() + 1000) {
|
|
logger.debug("Last audio packet was sent %d ms ago, resetting encoder.", lastAudioPacketSentInterval);
|
|
resetEncoder();
|
|
}
|
|
|
|
std::vector<uint8_t> header;
|
|
|
|
header.push_back(static_cast<unsigned char &&>(0x80 | target));
|
|
|
|
auto sequenceNumberEnc = VarInt(outgoingSequenceNumber).getEncoded();
|
|
header.insert(header.end(), sequenceNumberEnc.begin(), sequenceNumberEnc.end());
|
|
|
|
uint8_t tmpOpusBuffer[1024];
|
|
const int outputSize = opus_encode(opusEncoder,
|
|
inputPcmBuffer,
|
|
inputLength,
|
|
tmpOpusBuffer,
|
|
min(outputBufferSize, 1024)
|
|
);
|
|
|
|
if (outputSize <= 0) {
|
|
throw AudioException((boost::format("failed to encode %d B of PCM data: %s") % inputLength %
|
|
opus_strerror(outputSize)).str());
|
|
}
|
|
|
|
auto outputSizeEnc = VarInt(outputSize).getEncoded();
|
|
header.insert(header.end(), outputSizeEnc.begin(), outputSizeEnc.end());
|
|
|
|
memcpy(outputBuffer, &header[0], header.size());
|
|
memcpy(outputBuffer + header.size(), tmpOpusBuffer, (size_t) outputSize);
|
|
|
|
int incrementNumber = 100 * inputLength / iSampleRate;
|
|
|
|
outgoingSequenceNumber += incrementNumber;
|
|
|
|
lastEncodedAudioPacketTimestamp = std::chrono::system_clock::now();
|
|
|
|
return static_cast<int>(outputSize + header.size());
|
|
}
|
|
|
|
void mumlib::Audio::resetEncoder() {
|
|
int status = opus_encoder_ctl(opusEncoder, OPUS_RESET_STATE, nullptr);
|
|
|
|
if (status != OPUS_OK) {
|
|
throw AudioException((boost::format("failed to reset encoder: %s") % opus_strerror(status)).str());
|
|
}
|
|
|
|
outgoingSequenceNumber = 0;
|
|
}
|
|
|
|
void mumlib::Audio::resetJitterBuffer() {
|
|
logger.debug("Last audio packet, resetting jitter buffer");
|
|
jitter_buffer_reset(jbBuffer);
|
|
}
|
|
|
|
mumlib::IncomingAudioPacket mumlib::Audio::decodeIncomingAudioPacket(uint8_t *inputBuffer, int inputBufferLength) {
|
|
mumlib::IncomingAudioPacket incomingAudioPacket{};
|
|
|
|
incomingAudioPacket.type = static_cast<AudioPacketType >((inputBuffer[0] & 0xE0) >> 5);
|
|
incomingAudioPacket.target = inputBuffer[0] & 0x1F;
|
|
|
|
std::array<int64_t *, 2> varInts = {&incomingAudioPacket.sessionId, &incomingAudioPacket.sequenceNumber};
|
|
|
|
int dataPointer = 1;
|
|
for (int64_t *val : varInts) {
|
|
VarInt varInt(&inputBuffer[dataPointer]);
|
|
*val = varInt.getValue();
|
|
dataPointer += varInt.getEncoded().size();
|
|
}
|
|
|
|
incomingAudioPacket.audioPayload = &inputBuffer[dataPointer];
|
|
incomingAudioPacket.audioPayloadLength = inputBufferLength - dataPointer;
|
|
|
|
if (dataPointer >= inputBufferLength) {
|
|
throw AudioException((boost::format("invalid incoming audio packet (%d B): header %d B") % inputBufferLength %
|
|
dataPointer).str());
|
|
}
|
|
|
|
logger.debug(
|
|
"Received %d B of audio packet, %d B header, %d B payload (target: %d, sessionID: %ld, seq num: %ld).",
|
|
inputBufferLength,
|
|
dataPointer,
|
|
incomingAudioPacket.audioPayloadLength,
|
|
incomingAudioPacket.target,
|
|
incomingAudioPacket.sessionId,
|
|
incomingAudioPacket.sequenceNumber);
|
|
|
|
return incomingAudioPacket;
|
|
}
|
|
|
|
|