Ocean
Loading...
Searching...
No Matches
avfoundation/VideoDecoder.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_MEDIA_AVF_VIDEO_DECODER_H
9#define META_OCEAN_MEDIA_AVF_VIDEO_DECODER_H
10
12
13#include "ocean/base/Frame.h"
14#include "ocean/base/Lock.h"
15
16#include "ocean/math/Numeric.h"
17
18#include <VideoToolbox/VideoToolbox.h>
19
20#include <deque>
21
22namespace Ocean
23{
24
25namespace Media
26{
27
28namespace AVFoundation
29{
30
31/**
32 * This class implements a simple video decoder for iOS/macOS using encoded media samples from memory as input.
33 * The decoder uses Apple's VideoToolbox framework (VTDecompressionSession) for hardware-accelerated decoding.
34 *
35 * Usage:
36 * @code
37 * // a function which is e.g., running in a separate thread
38 * void threadRun()
39 * {
40 * VideoDecoder videoDecoder;
41 *
42 * // initializing the decoder with the input format of the media samples
43 * if (!videoDecoder.initialize("video/avc", 1920u, 1080u))
44 * {
45 * // handle error
46 * }
47 *
48 * if (!videoDecoder.start())
49 * {
50 * // handle error
51 * }
52 *
53 * unsigned int frameIndex = 0u;
54 * double frameRate = 30.0;
55 *
56 * while (true)
57 * {
58 * void* sampleData = nullptr;
59 * size_t sampleSize = 0;
60 *
61 * // external function: function needs to provide the new media samples from an external source - e.g., from an external webcam, a video stream, etc.
62 * if (doesNewInputSampleExist(sampleData, &sampleSize))
63 * {
64 * // presentation time in microseconds
65 * uint64_t presentationTime = uint64_t(1.0e6 * double(frameIndex) / frameRate);
66 *
67 * // we forward the media sample to the decoder, eventually it will be decoded and will be available through decodedFrame()
68 * if (!videoDecoder.pushSample(sampleData, sampleSize, presentationTime))
69 * {
70 * // handle error
71 * }
72 *
73 * ++frameIndex;
74 * }
75 *
76 * // we simply check whether another frame has been decoded (there may be a delay between
77 * Frame newFrame = videoDecoder.popFrame();
78 *
79 * if (newFrame.isValid())
80 * {
81 * // external function: receiving new frames and processes the frames
82 * sendFrameToReceiver(std::move(newFrame));
83 * }
84 * }
85 * }
86 * @endcode
87 * @ingroup mediaavf
88 */
90{
91 public:
92
93 /**
94 * Definition of the decoding mode controlling frame delivery order and latency.
95 */
97 {
98 /// Frames are decoded and delivered with minimal latency, frame ordering is not guaranteed, decoded frames may arrive out of presentation-timestamp order but as fast as possible.
99 /// Use this when low latency is more important than frame order.
101
102 /// Frames are decoded and delivered in presentation-timestamp order, but frame delivery may be delayed.
104 };
105
106 protected:
107
108 /**
109 * Definition of a scoped object holding a CMBlockBufferRef object.
110 * The wrapped CMBlockBufferRef object will be released automatically once the scoped object does not exist anymore.
111 */
113
114 /**
115 * Definition of a scoped object holding a CMFormatDescriptionRef object.
116 * The wrapped CMFormatDescriptionRef object will be released automatically once the scoped object does not exist anymore.
117 */
119
120 /**
121 * Release function for VTDecompressionSessionRef that invalidates and releases the session.
122 * @param session The session to release
123 */
125
126 /**
127 * Definition of a scoped object holding a VTDecompressionSessionRef object.
128 * The wrapped VTDecompressionSessionRef object will be invalidated and released automatically once the scoped object does not exist anymore.
129 */
131
132 /**
133 * Definition of a decoded frame entry.
134 */
136 {
137 /// The decoded frame.
139
140 /// The presentation time in microseconds.
141 int64_t presentationTime_ = 0;
142 };
143
144 /// Definition of a vector holding decoded frames.
145 using DecodedFrames = std::vector<DecodedFrame>;
146
147 /// Definition of a queue holding decoded frames.
148 using DecodedFrameQueue = std::deque<DecodedFrame>;
149
150 /// Definition of a queue holding presentation timestamps.
151 using TimestampQueue = std::deque<int64_t>;
152
153 public:
154
155 /**
156 * Default constructor creating an un-initialized decoder.
157 */
159
160 /**
161 * Destructs the video decoder and releases all associated resources.
162 */
164
165 /**
166 * Initializes the video decoder with codec configuration data (SPS/PPS for H.264, VPS/SPS/PPS for HEVC).
167 * @param mime The MIME type (Multipurpose Internet Mail Extensions) of the video to be decoded, e.g., "video/avc", "video/hevc", ...
168 * @param width The width of the video to be decoded, in pixel, with range [1, infinity)
169 * @param height The height of the video to be decoded, in pixel, with range [1, infinity)
170 * @param codecConfigData The codec configuration data containing parameter sets (SPS/PPS for H.264), can be nullptr if not available yet
171 * @param codecConfigSize The size of the codec configuration data in bytes, 0 if not available yet
172 * @param decodingMode The decoding mode controlling frame delivery order, default is DM_PERFORMANCE
173 * @return True, if succeeded
174 * @see isInitialized().
175 */
176 bool initialize(const std::string& mime, const unsigned int width, const unsigned int height, const void* codecConfigData = nullptr, const size_t codecConfigSize = 0, const DecodingMode decodingMode = DM_PERFORMANCE);
177
178 /**
179 * Starts the video decoder.
180 * @return True, if succeeded
181 * @see isStarted().
182 */
183 bool start();
184
185 /**
186 * Stops the video decoder.
187 * @return True, if succeeded
188 */
189 bool stop();
190
191 /**
192 * Adds a new media sample which needs to be decoded to the video decoder.
193 * The decoder needs to be initialized and started.
194 * The presentation time is mainly intended to allow associating the provided encoded media sample with the resulting decoded frame when calling popFrame().
195 * However, it's recommended to define a reasonable presentation time for each sample (e.g., let the first sample start at 0 and increment the time by 1^6/fps for each following sample.
196 * @param data The data of the encoded media sample, must be valid
197 * @param size The size of the encoded media sample, in bytes, with range [1, infinity)
198 * @param presentationTime The presentation time of the sample, in microseconds, with range [0, infinity)
199 * @return True, if succeeded
200 * @see start(), isInitialized(), isStarted().
201 */
202 bool pushSample(const void* data, const size_t size, const uint64_t presentationTime);
203
204 /**
205 * Pops the next decoded frame from the decoder.
206 * Optional the frame's presentation time will be returned, this is the presentation time which was used when the corresponding sample was provided in pushSample().
207 * @param presentationTime Optional resulting presentation time in micro seconds, with range (-infinity, infinity)
208 * @return The resulting frame, invalid if currently no decoded frame is available
209 * @see pushSample().
210 */
211 Frame popFrame(int64_t* presentationTime = nullptr);
212
213 /**
214 * Returns whether this decoder is initialized.
215 * @return True, if so
216 * @see initialize().
217 */
218 inline bool isInitialized() const;
219
220 /**
221 * Returns whether this decoder is currently running.
222 * @return True, if so
223 * @see start().
224 */
225 inline bool isStarted() const;
226
227 /**
228 * Explicitly releases this video decoder.
229 * If the decoder is still running, the decoder will be stopped as well.
230 */
231 void release();
232
233 /**
234 * Converts Annex B formatted H.264/H.265 data to AVCC/HVCC format.
235 * For encoded samples (isCodecConfig = false): Replaces start code prefixes (00 00 00 01 or 00 00 01) with 4-byte big-endian length prefixes.
236 * For codec config (isCodecConfig = true): Extracts SPS/PPS (and VPS for HEVC) NAL units and builds an AVCC/HVCC configuration record.
237 * @param annexBData The Annex B formatted data containing NAL units with start code prefixes, must be valid
238 * @param annexBSize The size of the Annex B data in bytes, with range [4, infinity)
239 * @param avccData The resulting AVCC/HVCC formatted data
240 * @param isCodecConfig True to build an AVCC/HVCC codec configuration record from the NAL units; False to simply replace start codes with length prefixes
241 * @param mime The MIME type, used only when isCodecConfig is true to determine H.264 vs HEVC format, either "video/avc" or "video/hevc"
242 * @return True if conversion succeeded; False if the input data is invalid or conversion failed
243 */
244 static bool convertAnnexBToAvcc(const void* annexBData, const size_t annexBSize, std::vector<uint8_t>& avccData, const bool isCodecConfig = false, const std::string& mime = "video/avc");
245
246 /**
247 * Determines whether the given data is in Annex B format (start code prefixed) or AVCC format (length prefixed).
248 * Annex B format uses start codes (0x00 0x00 0x00 0x01 or 0x00 0x00 0x01) to delimit NAL units.
249 * AVCC format uses 4-byte big-endian length prefixes before each NAL unit.
250 *
251 * Note: For codec configuration data, use isCodecConfig=true as AVCC config starts with version byte 0x01.
252 * For regular NAL unit samples, use isCodecConfig=false which applies more sophisticated detection
253 * to distinguish AVCC length prefixes from Annex B start codes (especially for NAL sizes 256-511 bytes
254 * where the length prefix 0x00 0x00 0x01 XX looks like an Annex B 3-byte start code).
255 *
256 * @param data The data to check, must be valid
257 * @param size The size of the data in bytes, with range [4, infinity)
258 * @param isCodecConfig True if the data is codec configuration (SPS/PPS), false for regular NAL samples
259 * @return True if the data is in Annex B format; false if it's in AVCC format
260 */
261 static bool isAnnexB(const void* data, const size_t size, const bool isCodecConfig = false);
262
263 /**
264 * Move operator.
265 * @param videoDecoder The video decoder to be moved
266 * @return Reference to this object
267 */
268 inline VideoDecoder& operator=(VideoDecoder&& videoDecoder) noexcept;
269
270 protected:
271
272 /**
273 * Disabled copy constructor.
274 */
275 VideoDecoder(const VideoDecoder&) = delete;
276
277 /**
278 * Disabled copy operator.
279 * @return Reference to this object
280 */
282
283 /**
284 * Handles a newly decoded frame by either delivering it directly or deferring it for later delivery in DM_ORDERED mode.
285 * @param decodedFrame The decoded frame to be processed
286 */
287 void onNewDecodedFrame(DecodedFrame&& decodedFrame);
288
289 /**
290 * Removes a presentation timestamp from the pending sample timestamps queue.
291 * This is called when a frame fails to decode, so that the failed frame does not block delivery of subsequent frames.
292 * @param presentationTime The presentation time to remove, in microseconds
293 */
294 void removePendingSampleTimestamps(const int64_t presentationTime);
295
296 /**
297 * Processes deferred frames whose presentation timestamps now match the front of the pending timestamps queue.
298 */
300
301 /**
302 * Callback function for decoded frames from VideoToolbox.
303 * @param decompressionOutputRefCon Reference to this decoder
304 * @param sourceFrameRefCon Reference containing the presentation time
305 * @param status The status of the decompression operation
306 * @param infoFlags Information flags
307 * @param imageBuffer The decoded image buffer, may be nullptr on error
308 * @param presentationTimeStamp The presentation time stamp
309 * @param presentationDuration The presentation duration
310 */
311 static void decompressionOutputCallback(void* decompressionOutputRefCon, void* sourceFrameRefCon, OSStatus status, VTDecodeInfoFlags infoFlags, CVImageBufferRef imageBuffer, CMTime presentationTimeStamp, CMTime presentationDuration);
312
313 /**
314 * Translates a MIME type to a CMVideoCodecType.
315 * @param mime The MIME type
316 * @return The corresponding codec type, 0 if not supported
317 */
318 static CMVideoCodecType mimeToCodecType(const std::string& mime);
319
320 protected:
321
322 /// The video format description.
324
325 /// The decompression session.
327
328 /// The queue timestamps of submitted but not yet processed samples.
330
331 /// The queue of decoded frames ready for consumption.
333
334 /// Decoded frames waiting to be delivered in presentation-timestamp order (DM_ORDERED mode only).
336
337 /// The width of the video.
338 unsigned int width_ = 0u;
339
340 /// The height of the video.
341 unsigned int height_ = 0u;
342
343 /// The decoding mode.
345
346 /// True, if the decoder is currently started.
347 bool isStarted_ = false;
348
349 /// The decoder's lock.
350 mutable Lock lock_;
351
352 /// The lock for the decoded frames queue.
354
355#ifdef OCEAN_DEBUG
356 /// The previous presentation timestamp submitted via pushSample(), in microseconds, NumericT<int64_t>::minValue() if no sample has been submitted yet.
358
359 /// The previous presentation timestamp of a decoded frame released to decodedFrames_, in microseconds, NumericT<int64_t>::minValue() if no frame has been released yet.
361#endif
362};
363
365{
366 const ScopedLock scopedLock(lock_);
367
369}
370
371inline bool VideoDecoder::isStarted() const
372{
373 const ScopedLock scopedLock(lock_);
374
375 ocean_assert(!isStarted_ || isInitialized());
376
377 return isStarted_;
378}
379
381{
382 if (session != nullptr)
383 {
384 VTDecompressionSessionInvalidate(session);
385 CFRelease(session);
386 }
387}
388
389}
390
391}
392
393}
394
395#endif // META_OCEAN_MEDIA_AVF_VIDEO_DECODER_H
This class implements Ocean's image class.
Definition Frame.h:1879
This class implements a recursive lock object.
Definition Lock.h:31
This class implements a simple video decoder for iOS/macOS using encoded media samples from memory as...
Definition avfoundation/VideoDecoder.h:90
VideoDecoder & operator=(const VideoDecoder &)=delete
Disabled copy operator.
int64_t debugPreviousDecodedTimestamp_
The previous presentation timestamp of a decoded frame released to decodedFrames_,...
Definition avfoundation/VideoDecoder.h:360
void removePendingSampleTimestamps(const int64_t presentationTime)
Removes a presentation timestamp from the pending sample timestamps queue.
bool start()
Starts the video decoder.
unsigned int width_
The width of the video.
Definition avfoundation/VideoDecoder.h:338
int64_t debugPreviousSubmittedTimestamp_
The previous presentation timestamp submitted via pushSample(), in microseconds, NumericT<int64_t>::m...
Definition avfoundation/VideoDecoder.h:357
Lock decodedFramesLock_
The lock for the decoded frames queue.
Definition avfoundation/VideoDecoder.h:353
static bool convertAnnexBToAvcc(const void *annexBData, const size_t annexBSize, std::vector< uint8_t > &avccData, const bool isCodecConfig=false, const std::string &mime="video/avc")
Converts Annex B formatted H.264/H.265 data to AVCC/HVCC format.
~VideoDecoder()
Destructs the video decoder and releases all associated resources.
ScopedCMFormatDescriptionRef formatDescription_
The video format description.
Definition avfoundation/VideoDecoder.h:323
VideoDecoder & operator=(VideoDecoder &&videoDecoder) noexcept
Move operator.
bool pushSample(const void *data, const size_t size, const uint64_t presentationTime)
Adds a new media sample which needs to be decoded to the video decoder.
bool isStarted_
True, if the decoder is currently started.
Definition avfoundation/VideoDecoder.h:347
Lock lock_
The decoder's lock.
Definition avfoundation/VideoDecoder.h:350
static bool isAnnexB(const void *data, const size_t size, const bool isCodecConfig=false)
Determines whether the given data is in Annex B format (start code prefixed) or AVCC format (length p...
std::deque< int64_t > TimestampQueue
Definition of a queue holding presentation timestamps.
Definition avfoundation/VideoDecoder.h:151
bool initialize(const std::string &mime, const unsigned int width, const unsigned int height, const void *codecConfigData=nullptr, const size_t codecConfigSize=0, const DecodingMode decodingMode=DM_PERFORMANCE)
Initializes the video decoder with codec configuration data (SPS/PPS for H.264, VPS/SPS/PPS for HEVC)...
void release()
Explicitly releases this video decoder.
std::deque< DecodedFrame > DecodedFrameQueue
Definition of a queue holding decoded frames.
Definition avfoundation/VideoDecoder.h:148
static CMVideoCodecType mimeToCodecType(const std::string &mime)
Translates a MIME type to a CMVideoCodecType.
bool isStarted() const
Returns whether this decoder is currently running.
Definition avfoundation/VideoDecoder.h:371
VideoDecoder(const VideoDecoder &)=delete
Disabled copy constructor.
static void decompressionOutputCallback(void *decompressionOutputRefCon, void *sourceFrameRefCon, OSStatus status, VTDecodeInfoFlags infoFlags, CVImageBufferRef imageBuffer, CMTime presentationTimeStamp, CMTime presentationDuration)
Callback function for decoded frames from VideoToolbox.
static void releaseVTDecompressionSession(VTDecompressionSessionRef session)
Release function for VTDecompressionSessionRef that invalidates and releases the session.
Definition avfoundation/VideoDecoder.h:380
TimestampQueue pendingSampleTimestamps_
The queue timestamps of submitted but not yet processed samples.
Definition avfoundation/VideoDecoder.h:329
bool isInitialized() const
Returns whether this decoder is initialized.
Definition avfoundation/VideoDecoder.h:364
std::vector< DecodedFrame > DecodedFrames
Definition of a vector holding decoded frames.
Definition avfoundation/VideoDecoder.h:145
void onNewDecodedFrame(DecodedFrame &&decodedFrame)
Handles a newly decoded frame by either delivering it directly or deferring it for later delivery in ...
VideoDecoder()
Default constructor creating an un-initialized decoder.
DecodedFrames deferredFrames_
Decoded frames waiting to be delivered in presentation-timestamp order (DM_ORDERED mode only).
Definition avfoundation/VideoDecoder.h:335
bool stop()
Stops the video decoder.
unsigned int height_
The height of the video.
Definition avfoundation/VideoDecoder.h:341
DecodingMode
Definition of the decoding mode controlling frame delivery order and latency.
Definition avfoundation/VideoDecoder.h:97
@ DM_PERFORMANCE
Frames are decoded and delivered with minimal latency, frame ordering is not guaranteed,...
Definition avfoundation/VideoDecoder.h:100
@ DM_ORDERED
Frames are decoded and delivered in presentation-timestamp order, but frame delivery may be delayed.
Definition avfoundation/VideoDecoder.h:103
Frame popFrame(int64_t *presentationTime=nullptr)
Pops the next decoded frame from the decoder.
ScopedVTDecompressionSessionRef decompressionSession_
The decompression session.
Definition avfoundation/VideoDecoder.h:326
DecodedFrameQueue decodedFrames_
The queue of decoded frames ready for consumption.
Definition avfoundation/VideoDecoder.h:332
void processDeferredFrames()
Processes deferred frames whose presentation timestamps now match the front of the pending timestamps...
DecodingMode decodingMode_
The decoding mode.
Definition avfoundation/VideoDecoder.h:344
static constexpr T minValue()
Returns the min scalar value.
Definition Numeric.h:3259
This class implements a scoped lock object for recursive lock objects.
Definition Lock.h:147
This class wraps an unmanaged object (or reference) which needs to be released after usage.
Definition ScopedObject.h:166
bool isValid() const
Returns whether this scoped object holds a valid object.
Definition ScopedObject.h:460
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Definition of a decoded frame entry.
Definition avfoundation/VideoDecoder.h:136
int64_t presentationTime_
The presentation time in microseconds.
Definition avfoundation/VideoDecoder.h:141
Frame frame_
The decoded frame.
Definition avfoundation/VideoDecoder.h:138