From 9a52f4e3ffc3bc71274cc99a223fde8bb32dea75 Mon Sep 17 00:00:00 2001 From: Nicholas Tinsley Date: Thu, 25 Jan 2024 13:48:38 -0500 Subject: [PATCH] Remux audio if possible when transcoding. Addresses #11712, #12674, #12945, #13084, #13346. --- .../securesms/video/interfaces/Muxer.java | 2 + .../video/videoconverter/AndroidMuxer.java | 5 + .../videoconverter/AudioTrackConverter.java | 96 +++++++++++++++++-- .../video/videoconverter/MediaConverter.java | 6 +- .../video/videoconverter/muxer/AacTrack.java | 9 +- .../videoconverter/muxer/StreamingMuxer.java | 54 +++++++++-- .../videoconverter/utils/MediaCodecCompat.kt | 7 ++ 7 files changed, 163 insertions(+), 16 deletions(-) diff --git a/video/lib/src/main/java/org/thoughtcrime/securesms/video/interfaces/Muxer.java b/video/lib/src/main/java/org/thoughtcrime/securesms/video/interfaces/Muxer.java index a88f4054b7..918dd3de69 100644 --- a/video/lib/src/main/java/org/thoughtcrime/securesms/video/interfaces/Muxer.java +++ b/video/lib/src/main/java/org/thoughtcrime/securesms/video/interfaces/Muxer.java @@ -24,4 +24,6 @@ public interface Muxer { void writeSampleData(int trackIndex, @NonNull ByteBuffer byteBuf, @NonNull MediaCodec.BufferInfo bufferInfo) throws IOException; void release(); + + boolean supportsAudioRemux(); } diff --git a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/AndroidMuxer.java b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/AndroidMuxer.java index d44ae864c7..bfc7333784 100644 --- a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/AndroidMuxer.java +++ b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/AndroidMuxer.java @@ -51,4 +51,9 @@ final class AndroidMuxer implements Muxer { public void release() { muxer.release(); } + + @Override + public boolean supportsAudioRemux() { + return false; + } } diff --git a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/AudioTrackConverter.java b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/AudioTrackConverter.java index 8d938ae906..036d62b10d 100644 --- a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/AudioTrackConverter.java +++ b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/AudioTrackConverter.java @@ -1,5 +1,6 @@ package org.thoughtcrime.securesms.video.videoconverter; +import android.annotation.SuppressLint; import android.media.MediaCodec; import android.media.MediaCodecInfo; import android.media.MediaExtractor; @@ -27,7 +28,8 @@ final class AudioTrackConverter { private static final String OUTPUT_AUDIO_MIME_TYPE = VideoConstants.AUDIO_MIME_TYPE; // Advanced Audio Coding private static final int OUTPUT_AUDIO_AAC_PROFILE = MediaCodecInfo.CodecProfileLevel.AACObjectLC; //MediaCodecInfo.CodecProfileLevel.AACObjectHE; - private static final int TIMEOUT_USEC = 10000; + private static final int SAMPLE_BUFFER_SIZE = 16 * 1024; + private static final int TIMEOUT_USEC = 10000; private final long mTimeFrom; private final long mTimeTo; @@ -39,6 +41,10 @@ final class AudioTrackConverter { private final MediaCodec mAudioDecoder; private final MediaCodec mAudioEncoder; + private final boolean skipTrancode; + private final ByteBuffer instanceSampleBuffer = ByteBuffer.allocateDirect(SAMPLE_BUFFER_SIZE); + private final MediaCodec.BufferInfo instanceBufferInfo = new MediaCodec.BufferInfo(); + private final ByteBuffer[] mAudioDecoderInputBuffers; private ByteBuffer[] mAudioDecoderOutputBuffers; private final ByteBuffer[] mAudioEncoderInputBuffers; @@ -68,7 +74,8 @@ final class AudioTrackConverter { final @NonNull MediaInput input, final long timeFrom, final long timeTo, - final int audioBitrate) throws IOException { + final int audioBitrate, + final boolean allowSkipTranscode) throws IOException { final MediaExtractor audioExtractor = input.createExtractor(); final int audioInputTrack = getAndSelectAudioTrackIndex(audioExtractor); @@ -76,7 +83,7 @@ final class AudioTrackConverter { audioExtractor.release(); return null; } - return new AudioTrackConverter(audioExtractor, audioInputTrack, timeFrom, timeTo, audioBitrate); + return new AudioTrackConverter(audioExtractor, audioInputTrack, timeFrom, timeTo, audioBitrate, allowSkipTranscode); } private AudioTrackConverter( @@ -84,7 +91,8 @@ final class AudioTrackConverter { final int audioInputTrack, long timeFrom, long timeTo, - int audioBitrate) throws IOException { + int audioBitrate, + final boolean allowSkipTranscode) throws IOException { mTimeFrom = timeFrom; mTimeTo = timeTo; @@ -102,6 +110,13 @@ final class AudioTrackConverter { final MediaFormat inputAudioFormat = mAudioExtractor.getTrackFormat(audioInputTrack); mInputDuration = inputAudioFormat.containsKey(MediaFormat.KEY_DURATION) ? inputAudioFormat.getLong(MediaFormat.KEY_DURATION) : 0; + skipTrancode = allowSkipTranscode && formatCanSkipTranscode(inputAudioFormat, audioBitrate); + if (skipTrancode) { + mEncoderOutputAudioFormat = inputAudioFormat; + } + + if (VERBOSE) Log.d(TAG, "audio skipping transcoding: " + skipTrancode); + final MediaFormat outputAudioFormat = MediaFormat.createAudioFormat( OUTPUT_AUDIO_MIME_TYPE, @@ -109,7 +124,7 @@ final class AudioTrackConverter { inputAudioFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT)); outputAudioFormat.setInteger(MediaFormat.KEY_BIT_RATE, audioBitrate); outputAudioFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, OUTPUT_AUDIO_AAC_PROFILE); - outputAudioFormat.setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, 16 * 1024); + outputAudioFormat.setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, SAMPLE_BUFFER_SIZE); // Create a MediaCodec for the desired codec, then configure it as an encoder with // our desired properties. Request a Surface to use for input. @@ -135,9 +150,11 @@ final class AudioTrackConverter { if (mEncoderOutputAudioFormat != null) { Log.d(TAG, "muxer: adding audio track."); if (!mEncoderOutputAudioFormat.containsKey(MediaFormat.KEY_BIT_RATE)) { + Log.d(TAG, "muxer: fixed MediaFormat to add bitrate."); mEncoderOutputAudioFormat.setInteger(MediaFormat.KEY_BIT_RATE, mAudioBitrate); } if (!mEncoderOutputAudioFormat.containsKey(MediaFormat.KEY_AAC_PROFILE)) { + Log.d(TAG, "muxer: fixed MediaFormat to add AAC profile."); mEncoderOutputAudioFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, OUTPUT_AUDIO_AAC_PROFILE); } mOutputAudioTrack = muxer.addTrack(mEncoderOutputAudioFormat); @@ -145,6 +162,12 @@ final class AudioTrackConverter { } void step() throws IOException { + + if (skipTrancode && mEncoderOutputAudioFormat != null) { + extractAndRemux(); + return; + } + // Extract audio from file and feed to decoder. // Do not extract audio if we have determined the output format but we are not yet // ready to mux the frames. @@ -164,7 +187,8 @@ final class AudioTrackConverter { Log.d(TAG, "audio extractor: returned buffer of size " + size); Log.d(TAG, "audio extractor: returned buffer for time " + presentationTime); } - mAudioExtractorDone = size < 0 || (mTimeTo > 0 && presentationTime > mTimeTo * 1000); + mAudioExtractorDone = isAudioExtractorDone(size, presentationTime); + if (mAudioExtractorDone) { if (VERBOSE) Log.d(TAG, "audio extractor: EOS"); mAudioDecoder.queueInputBuffer( @@ -388,6 +412,47 @@ final class AudioTrackConverter { Preconditions.checkState("no frame should be pending", -1 == mPendingAudioDecoderOutputBufferIndex); } + @SuppressLint("WrongConstant") // flags extracted from sample by MediaExtractor should be safe for MediaCodec.BufferInfo + private void extractAndRemux() throws IOException { + if (mMuxer == null) { + Log.d(TAG, "audio remuxer: tried to execute before muxer was ready"); + return; + } + int size = mAudioExtractor.readSampleData(instanceSampleBuffer, 0); + long presentationTime = mAudioExtractor.getSampleTime(); + int sampleFlags = mAudioExtractor.getSampleFlags(); + if (VERBOSE) { + Log.d(TAG, "audio extractor: returned buffer of size " + size); + Log.d(TAG, "audio extractor: returned buffer for time " + presentationTime); + Log.d(TAG, "audio extractor: returned buffer with flags " + Integer.toBinaryString(sampleFlags)); + } + mAudioExtractorDone = isAudioExtractorDone(size, presentationTime); + + if (mAudioExtractorDone) { + if (VERBOSE) Log.d(TAG, "audio encoder: EOS"); + instanceBufferInfo.set(0, 0, presentationTime, MediaCodec.BUFFER_FLAG_END_OF_STREAM); + mAudioEncoderDone = true; + } else { + instanceBufferInfo.set(0, size, presentationTime, sampleFlags); + } + + mMuxer.writeSampleData(mOutputAudioTrack, instanceSampleBuffer, instanceBufferInfo); + + if (VERBOSE) { + Log.d(TAG, "audio extractor: wrote sample at " + presentationTime); + } + + mAudioExtractor.advance(); + + mAudioExtractedFrameCount++; + mAudioEncodedFrameCount++; + mMuxingAudioPresentationTime = Math.max(mMuxingAudioPresentationTime, presentationTime); + } + + private boolean isAudioExtractorDone(int size, long presentationTime) { + return presentationTime == -1 || size < 0 || (mTimeTo > 0 && presentationTime > mTimeTo * 1000); + } + private static @NonNull MediaCodec createAudioDecoder(final @NonNull MediaFormat inputFormat) throws IOException { final MediaCodec decoder = MediaCodec.createDecoderByType(MediaConverter.getMimeTypeFor(inputFormat)); @@ -420,4 +485,23 @@ final class AudioTrackConverter { private static boolean isAudioFormat(final @NonNull MediaFormat format) { return MediaConverter.getMimeTypeFor(format).startsWith("audio/"); } + + /** + * HE-AAC input bitstreams exhibit bad decoder behavior: the decoder's output buffer's presentation timestamp is way larger than the input sample's. + * This mismatch propagates throughout the transcoding pipeline and results in slowed, distorted audio in the output file. + * To sidestep this: AAC and its variants are a supported output codec, and HE-AAC bitrates are almost always lower than our target bitrate, + * so we can pass through the input bitstream unaltered, relying on consumers of the output file to render HE-AAC correctly. + */ + private static boolean formatCanSkipTranscode(MediaFormat audioFormat, int desiredBitrate) { + try { + int inputBitrate = audioFormat.getInteger(MediaFormat.KEY_BIT_RATE); + String inputMimeType = audioFormat.getString(MediaFormat.KEY_MIME); + return OUTPUT_AUDIO_MIME_TYPE.equals(inputMimeType) && inputBitrate <= desiredBitrate; + } catch (NullPointerException exception) { + if (VERBOSE) { + Log.d(TAG, "could not find bitrate in mediaFormat, can't skip transcoding."); + } + return false; + } + } } diff --git a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/MediaConverter.java b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/MediaConverter.java index 628d946fed..d4465d92d4 100644 --- a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/MediaConverter.java +++ b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/MediaConverter.java @@ -141,15 +141,15 @@ public final class MediaConverter { AudioTrackConverter audioTrackConverter = null; try { + muxer = mOutput.createMuxer(); + videoTrackConverter = VideoTrackConverter.create(mInput, mTimeFrom, mTimeTo, mVideoResolution, mVideoBitrate, mVideoCodec); - audioTrackConverter = AudioTrackConverter.create(mInput, mTimeFrom, mTimeTo, mAudioBitrate); + audioTrackConverter = AudioTrackConverter.create(mInput, mTimeFrom, mTimeTo, mAudioBitrate, muxer.supportsAudioRemux()); if (videoTrackConverter == null && audioTrackConverter == null) { throw new EncodingException("No video and audio tracks"); } - muxer = mOutput.createMuxer(); - doExtractDecodeEditEncodeMux( videoTrackConverter, audioTrackConverter, diff --git a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/muxer/AacTrack.java b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/muxer/AacTrack.java index 5cefe5db51..a37a454bf9 100644 --- a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/muxer/AacTrack.java +++ b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/muxer/AacTrack.java @@ -4,6 +4,7 @@ import android.util.SparseIntArray; import org.mp4parser.boxes.iso14496.part1.objectdescriptors.AudioSpecificConfig; import org.mp4parser.boxes.iso14496.part1.objectdescriptors.DecoderConfigDescriptor; +import org.mp4parser.boxes.iso14496.part1.objectdescriptors.DecoderSpecificInfo; import org.mp4parser.boxes.iso14496.part1.objectdescriptors.ESDescriptor; import org.mp4parser.boxes.iso14496.part1.objectdescriptors.SLConfigDescriptor; import org.mp4parser.boxes.iso14496.part12.SampleDescriptionBox; @@ -16,6 +17,8 @@ import org.mp4parser.streaming.input.StreamingSampleImpl; import java.io.IOException; import java.nio.ByteBuffer; +import androidx.annotation.Nullable; + abstract class AacTrack extends AbstractStreamingTrack { private static final SparseIntArray SAMPLING_FREQUENCY_INDEX_MAP = new SparseIntArray(); @@ -39,7 +42,7 @@ abstract class AacTrack extends AbstractStreamingTrack { private int sampleRate; - AacTrack(long avgBitrate, long maxBitrate, int sampleRate, int channelCount, int aacProfile) { + AacTrack(long avgBitrate, long maxBitrate, int sampleRate, int channelCount, int aacProfile, @Nullable DecoderSpecificInfo decoderSpecificInfo) { this.sampleRate = sampleRate; final DefaultSampleFlagsTrackExtension defaultSampleFlagsTrackExtension = new DefaultSampleFlagsTrackExtension(); @@ -83,6 +86,10 @@ abstract class AacTrack extends AbstractStreamingTrack { audioSpecificConfig.setChannelConfiguration(channelCount); decoderConfigDescriptor.setAudioSpecificInfo(audioSpecificConfig); + if (decoderSpecificInfo != null) { + decoderConfigDescriptor.setDecoderSpecificInfo(decoderSpecificInfo); + } + descriptor.setDecoderConfigDescriptor(decoderConfigDescriptor); esds.setEsDescriptor(descriptor); diff --git a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/muxer/StreamingMuxer.java b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/muxer/StreamingMuxer.java index 3ad40595d2..bb6c9d7f10 100644 --- a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/muxer/StreamingMuxer.java +++ b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/muxer/StreamingMuxer.java @@ -4,9 +4,13 @@ import android.media.MediaCodec; import android.media.MediaFormat; import androidx.annotation.NonNull; +import androidx.annotation.Nullable; +import org.mp4parser.boxes.iso14496.part1.objectdescriptors.DecoderSpecificInfo; import org.mp4parser.streaming.StreamingTrack; +import org.signal.core.util.logging.Log; import org.thoughtcrime.securesms.video.interfaces.Muxer; +import org.thoughtcrime.securesms.video.videoconverter.utils.MediaCodecCompat; import java.io.IOException; import java.io.OutputStream; @@ -16,7 +20,7 @@ import java.util.ArrayList; import java.util.List; public final class StreamingMuxer implements Muxer { - + private static final String TAG = Log.tag(StreamingMuxer.class); private final OutputStream outputStream; private final List tracks = new ArrayList<>(); private Mp4Writer mp4Writer; @@ -55,7 +59,7 @@ public final class StreamingMuxer implements Muxer { tracks.add(new MediaCodecAvcTrack(format)); break; case "audio/mp4a-latm": - tracks.add(new MediaCodecAacTrack(format)); + tracks.add(MediaCodecAacTrack.create(format)); break; case "video/hevc": tracks.add(new MediaCodecHevcTrack(format)); @@ -75,6 +79,11 @@ public final class StreamingMuxer implements Muxer { public void release() { } + @Override + public boolean supportsAudioRemux() { + return true; + } + interface MediaCodecTrack { void writeSampleData(@NonNull ByteBuffer byteBuf, @NonNull MediaCodec.BufferInfo bufferInfo) throws IOException; @@ -123,10 +132,43 @@ public final class StreamingMuxer implements Muxer { static class MediaCodecAacTrack extends AacTrack implements MediaCodecTrack { - MediaCodecAacTrack(@NonNull MediaFormat format) { - super(format.getInteger(MediaFormat.KEY_BIT_RATE), format.getInteger(MediaFormat.KEY_BIT_RATE), - format.getInteger(MediaFormat.KEY_SAMPLE_RATE), format.getInteger(MediaFormat.KEY_CHANNEL_COUNT), - format.getInteger(MediaFormat.KEY_AAC_PROFILE)); + private MediaCodecAacTrack(long avgBitrate, long maxBitrate, int sampleRate, int channelCount, int aacProfile, @Nullable DecoderSpecificInfo decoderSpecificInfo) { + super(avgBitrate, maxBitrate, sampleRate, channelCount, aacProfile, decoderSpecificInfo); + } + + public static MediaCodecAacTrack create(@NonNull MediaFormat format) { + final int bitrate = format.getInteger(MediaFormat.KEY_BIT_RATE); + final int maxBitrate; + if (format.containsKey(MediaCodecCompat.MEDIA_FORMAT_KEY_MAX_BIT_RATE)) { + maxBitrate = format.getInteger(MediaCodecCompat.MEDIA_FORMAT_KEY_MAX_BIT_RATE); + } else { + maxBitrate = bitrate; + } + + final DecoderSpecificInfo filledDecoderSpecificInfo; + if (format.containsKey(MediaCodecCompat.MEDIA_FORMAT_KEY_MAX_BIT_RATE)) { + final ByteBuffer csd = format.getByteBuffer(MediaCodecCompat.MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_0); + + DecoderSpecificInfo decoderSpecificInfo = new DecoderSpecificInfo(); + boolean parseSuccess = false; + try { + decoderSpecificInfo.parseDetail(csd); + parseSuccess = true; + } catch (IOException e) { + Log.w(TAG, "Could not parse AAC codec-specific data!", e); + } + if (parseSuccess) { + filledDecoderSpecificInfo = decoderSpecificInfo; + } else { + filledDecoderSpecificInfo = null; + } + } else { + filledDecoderSpecificInfo = null; + } + + return new MediaCodecAacTrack(bitrate, maxBitrate, + format.getInteger(MediaFormat.KEY_SAMPLE_RATE), format.getInteger(MediaFormat.KEY_CHANNEL_COUNT), + format.getInteger(MediaFormat.KEY_AAC_PROFILE), filledDecoderSpecificInfo); } @Override diff --git a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/utils/MediaCodecCompat.kt b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/utils/MediaCodecCompat.kt index d41c678ebd..931c2784bc 100644 --- a/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/utils/MediaCodecCompat.kt +++ b/video/lib/src/main/java/org/thoughtcrime/securesms/video/videoconverter/utils/MediaCodecCompat.kt @@ -17,6 +17,13 @@ import java.io.IOException object MediaCodecCompat { private const val TAG = "MediaDataSourceCompat" + const val MEDIA_FORMAT_KEY_MAX_BIT_RATE = "max-bitrate" + + // https://developer.android.com/reference/android/media/MediaCodec#CSD + const val MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_0 = "csd-0" + const val MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_1 = "csd-1" + const val MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_2 = "csd-2" + @JvmStatic fun findDecoder(inputFormat: MediaFormat): Pair { val codecs = MediaCodecList(MediaCodecList.REGULAR_CODECS)