Ocean
Loading...
Searching...
No Matches
avfoundation/VideoDecoder.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_MEDIA_AVF_VIDEO_DECODER_H
9#define META_OCEAN_MEDIA_AVF_VIDEO_DECODER_H
10
12
13#include "ocean/base/Frame.h"
14#include "ocean/base/Lock.h"
15
16#include "ocean/math/Numeric.h"
17
18#include <VideoToolbox/VideoToolbox.h>
19
20#include <deque>
21
22namespace Ocean
23{
24
25namespace Media
26{
27
28namespace AVFoundation
29{
30
31/**
32 * Definition of a scoped object holding a CMFormatDescriptionRef object.
33 * The wrapped CMFormatDescriptionRef object will be released automatically once the scoped object does not exist anymore.
34 */
36
37/**
38 * Release function for VTDecompressionSessionRef that invalidates and releases the session.
39 * @param session The session to release
40 */
42{
43 if (session != nullptr)
44 {
45 VTDecompressionSessionInvalidate(session);
46 CFRelease(session);
47 }
48}
49
50/**
51 * Definition of a scoped object holding a VTDecompressionSessionRef object.
52 * The wrapped VTDecompressionSessionRef object will be invalidated and released automatically once the scoped object does not exist anymore.
53 */
55
56/**
57 * This class implements a simple video decoder for iOS/macOS using encoded media samples from memory as input.
58 * The decoder uses Apple's VideoToolbox framework (VTDecompressionSession) for hardware-accelerated decoding.
59 *
60 * Usage:
61 * @code
62 * // a function which is e.g., running in a separate thread
63 * void threadRun()
64 * {
65 * VideoDecoder videoDecoder;
66 *
67 * // initializing the decoder with the input format of the media samples
68 * if (!videoDecoder.initialize("video/avc", 1920u, 1080u))
69 * {
70 * // handle error
71 * }
72 *
73 * if (!videoDecoder.start())
74 * {
75 * // handle error
76 * }
77 *
78 * unsigned int frameIndex = 0u;
79 * double frameRate = 30.0;
80 *
81 * while (true)
82 * {
83 * void* sampleData = nullptr;
84 * size_t sampleSize = 0;
85 *
86 * // external function: function needs to provide the new media samples from an external source - e.g., from an external webcam, a video stream, etc.
87 * if (doesNewInputSampleExist(sampleData, &sampleSize))
88 * {
89 * // presentation time in microseconds
90 * uint64_t presentationTime = uint64_t(1.0e6 * double(frameIndex) / frameRate);
91 *
92 * // we forward the media sample to the decoder, eventually it will be decoded and will be available through decodedFrame()
93 * if (!videoDecoder.pushSample(sampleData, sampleSize, presentationTime))
94 * {
95 * // handle error
96 * }
97 *
98 * ++frameIndex;
99 * }
100 *
101 * // we simply check whether another frame has been decoded (there may be a delay between
102 * Frame newFrame = videoDecoder.popFrame();
103 *
104 * if (newFrame.isValid())
105 * {
106 * // external function: receiving new frames and processes the frames
107 * sendFrameToReceiver(std::move(newFrame));
108 * }
109 * }
110 * }
111 * @endcode
112 * @ingroup mediaavf
113 */
115{
116 protected:
117
118 /**
119 * Definition of a scoped object holding a CMBlockBufferRef object.
120 * The wrapped CMBlockBufferRef object will be released automatically once the scoped object does not exist anymore.
121 */
123
124 /**
125 * Definition of a decoded frame entry.
126 */
128 {
129 /// The decoded frame.
131
132 /// The presentation time in microseconds.
133 int64_t presentationTime_ = 0;
134 };
135
136 public:
137
138 /**
139 * Default constructor creating an un-initialized decoder.
140 */
142
143 /**
144 * Move constructor.
145 * @param videoDecoder The decoder to be moved
146 */
147 inline VideoDecoder(VideoDecoder&& videoDecoder) noexcept;
148
149 /**
150 * Destructs the video decoder and releases all associated resources.
151 */
153
154 /**
155 * Initializes the video decoder with codec configuration data (SPS/PPS for H.264, VPS/SPS/PPS for HEVC).
156 * @param mime The MIME type (Multipurpose Internet Mail Extensions) of the video to be decoded, e.g., "video/avc", "video/hevc", ...
157 * @param width The width of the video to be decoded, in pixel, with range [1, infinity)
158 * @param height The height of the video to be decoded, in pixel, with range [1, infinity)
159 * @param codecConfigData The codec configuration data containing parameter sets (SPS/PPS for H.264), can be nullptr if not available yet
160 * @param codecConfigSize The size of the codec configuration data in bytes, 0 if not available yet
161 * @return True, if succeeded
162 * @see isInitialized().
163 */
164 bool initialize(const std::string& mime, const unsigned int width, const unsigned int height, const void* codecConfigData = nullptr, const size_t codecConfigSize = 0);
165
166 /**
167 * Starts the video decoder.
168 * @return True, if succeeded
169 * @see isStarted().
170 */
171 bool start();
172
173 /**
174 * Stops the video decoder.
175 * @return True, if succeeded
176 */
177 bool stop();
178
179 /**
180 * Adds a new media sample which needs to be decoded to the video decoder.
181 * The decoder needs to be initialized and started.
182 * The presentation time is mainly intended to allow associating the provided encoded media sample with the resulting decoded frame when calling popFrame().
183 * However, it's recommended to define a reasonable presentation time for each sample (e.g., let the first sample start at 0 and increment the time by 1^6/fps for each following sample.
184 * @param data The data of the encoded media sample, must be valid
185 * @param size The size of the encoded media sample, in bytes, with range [1, infinity)
186 * @param presentationTime The presentation time of the sample, in microseconds, with range [0, infinity)
187 * @return True, if succeeded
188 * @see start(), isInitialized(), isStarted().
189 */
190 bool pushSample(const void* data, const size_t size, const uint64_t presentationTime);
191
192 /**
193 * Pops the next decoded frame from the decoder.
194 * Optional the frame's presentation time will be returned, this is the presentation time which was used when the corresponding sample was provided in pushSample().
195 * @param presentationTime Optional resulting presentation time in micro seconds, with range (-infinity, infinity)
196 * @return The resulting frame, invalid if currently no decoded frame is available
197 * @see pushSample().
198 */
199 Frame popFrame(int64_t* presentationTime = nullptr);
200
201 /**
202 * Returns whether this decoder is initialized.
203 * @return True, if so
204 * @see initialize().
205 */
206 inline bool isInitialized() const;
207
208 /**
209 * Returns whether this decoder is currently running.
210 * @return True, if so
211 * @see start().
212 */
213 inline bool isStarted() const;
214
215 /**
216 * Explicitly releases this video decoder.
217 * If the decoder is still running, the decoder will be stopped as well.
218 */
219 void release();
220
221 /**
222 * Converts Annex B formatted H.264/H.265 data to AVCC/HVCC format.
223 * For encoded samples (isCodecConfig = false): Replaces start code prefixes (00 00 00 01 or 00 00 01) with 4-byte big-endian length prefixes.
224 * For codec config (isCodecConfig = true): Extracts SPS/PPS (and VPS for HEVC) NAL units and builds an AVCC/HVCC configuration record.
225 * @param annexBData The Annex B formatted data containing NAL units with start code prefixes, must be valid
226 * @param annexBSize The size of the Annex B data in bytes, with range [4, infinity)
227 * @param avccData The resulting AVCC/HVCC formatted data
228 * @param isCodecConfig True to build an AVCC/HVCC codec configuration record from the NAL units; False to simply replace start codes with length prefixes
229 * @param mime The MIME type, used only when isCodecConfig is true to determine H.264 vs HEVC format, either "video/avc" or "video/hevc"
230 * @return True if conversion succeeded; False if the input data is invalid or conversion failed
231 */
232 static bool convertAnnexBToAvcc(const void* annexBData, const size_t annexBSize, std::vector<uint8_t>& avccData, const bool isCodecConfig = false, const std::string& mime = "video/avc");
233
234 /**
235 * Determines whether the given data is in Annex B format (start code prefixed) or AVCC format (length prefixed).
236 * Annex B format uses start codes (0x00 0x00 0x00 0x01 or 0x00 0x00 0x01) to delimit NAL units.
237 * AVCC format uses 4-byte big-endian length prefixes before each NAL unit.
238 *
239 * Note: For codec configuration data, use isCodecConfig=true as AVCC config starts with version byte 0x01.
240 * For regular NAL unit samples, use isCodecConfig=false which applies more sophisticated detection
241 * to distinguish AVCC length prefixes from Annex B start codes (especially for NAL sizes 256-511 bytes
242 * where the length prefix 0x00 0x00 0x01 XX looks like an Annex B 3-byte start code).
243 *
244 * @param data The data to check, must be valid
245 * @param size The size of the data in bytes, with range [4, infinity)
246 * @param isCodecConfig True if the data is codec configuration (SPS/PPS), false for regular NAL samples
247 * @return True if the data is in Annex B format; false if it's in AVCC format
248 */
249 static bool isAnnexB(const void* data, const size_t size, const bool isCodecConfig = false);
250
251 /**
252 * Move operator.
253 * @param videoDecoder The video decoder to be moved
254 * @return Reference to this object
255 */
256 inline VideoDecoder& operator=(VideoDecoder&& videoDecoder) noexcept;
257
258 protected:
259
260 /**
261 * Disabled copy constructor.
262 */
263 VideoDecoder(const VideoDecoder&) = delete;
264
265 /**
266 * Disabled copy operator.
267 * @return Reference to this object
268 */
270
271 /**
272 * Callback function for decoded frames from VideoToolbox.
273 * @param decompressionOutputRefCon Reference to this decoder
274 * @param sourceFrameRefCon Reference containing the presentation time
275 * @param status The status of the decompression operation
276 * @param infoFlags Information flags
277 * @param imageBuffer The decoded image buffer, may be nullptr on error
278 * @param presentationTimeStamp The presentation time stamp
279 * @param presentationDuration The presentation duration
280 */
281 static void decompressionOutputCallback(void* decompressionOutputRefCon, void* sourceFrameRefCon, OSStatus status, VTDecodeInfoFlags infoFlags, CVImageBufferRef imageBuffer, CMTime presentationTimeStamp, CMTime presentationDuration);
282
283 /**
284 * Translates a MIME type to a CMVideoCodecType.
285 * @param mime The MIME type
286 * @return The corresponding codec type, 0 if not supported
287 */
288 static CMVideoCodecType mimeToCodecType(const std::string& mime);
289
290 protected:
291
292 /// The video format description.
294
295 /// The decompression session.
297
298 /// The queue of decoded frames.
299 std::deque<DecodedFrame> decodedFrames_;
300
301 /// The width of the video.
302 unsigned int width_ = 0u;
303
304 /// The height of the video.
305 unsigned int height_ = 0u;
306
307 /// True, if the decoder is currently started.
308 bool isStarted_ = false;
309
310 /// The decoder's lock.
311 mutable Lock lock_;
312
313 /// The lock for the decoded frames queue.
315
316#ifdef OCEAN_DEBUG
317 /// The previous presentation timestamp submitted via pushSample(), in microseconds, NumericT<int64_t>::minValue() if no sample has been submitted yet.
319
320 /// The previous presentation timestamp of a decoded frame in the decompression callback, in microseconds, NumericT<int64_t>::minValue() if no frame has been decoded yet.
322#endif
323};
324
325inline VideoDecoder::VideoDecoder(VideoDecoder&& videoDecoder) noexcept
326{
327 *this = std::move(videoDecoder);
328}
329
331{
332 const ScopedLock scopedLock(lock_);
333
335}
336
337inline bool VideoDecoder::isStarted() const
338{
339 const ScopedLock scopedLock(lock_);
340
341 ocean_assert(!isStarted_ || isInitialized());
342
343 return isStarted_;
344}
345
346inline VideoDecoder& VideoDecoder::operator=(VideoDecoder&& videoDecoder) noexcept
347{
348 if (this != &videoDecoder)
349 {
350 release();
351
352 formatDescription_ = std::move(videoDecoder.formatDescription_);
353 decompressionSession_ = std::move(videoDecoder.decompressionSession_);
354
355 decodedFrames_ = std::move(videoDecoder.decodedFrames_);
356
357 width_ = videoDecoder.width_;
358 videoDecoder.width_ = 0u;
359
360 height_ = videoDecoder.height_;
361 videoDecoder.height_ = 0u;
362
363 isStarted_ = videoDecoder.isStarted_;
364 videoDecoder.isStarted_ = false;
365
366#ifdef OCEAN_DEBUG
367 debugPreviousSubmittedTimestamp_ = videoDecoder.debugPreviousSubmittedTimestamp_;
368 videoDecoder.debugPreviousSubmittedTimestamp_ = NumericT<int64_t>::minValue();
369
370 debugPreviousDecodedTimestamp_ = videoDecoder.debugPreviousDecodedTimestamp_;
371 videoDecoder.debugPreviousDecodedTimestamp_ = NumericT<int64_t>::minValue();
372#endif
373 }
374
375 return *this;
376}
377
378}
379
380}
381
382}
383
384#endif // META_OCEAN_MEDIA_AVF_VIDEO_DECODER_H
This class implements Ocean's image class.
Definition Frame.h:1879
This class implements a recursive lock object.
Definition Lock.h:31
This class implements a simple video decoder for iOS/macOS using encoded media samples from memory as...
Definition avfoundation/VideoDecoder.h:115
VideoDecoder & operator=(const VideoDecoder &)=delete
Disabled copy operator.
int64_t debugPreviousDecodedTimestamp_
The previous presentation timestamp of a decoded frame in the decompression callback,...
Definition avfoundation/VideoDecoder.h:321
bool start()
Starts the video decoder.
unsigned int width_
The width of the video.
Definition avfoundation/VideoDecoder.h:302
std::deque< DecodedFrame > decodedFrames_
The queue of decoded frames.
Definition avfoundation/VideoDecoder.h:299
int64_t debugPreviousSubmittedTimestamp_
The previous presentation timestamp submitted via pushSample(), in microseconds, NumericT<int64_t>::m...
Definition avfoundation/VideoDecoder.h:318
Lock decodedFramesLock_
The lock for the decoded frames queue.
Definition avfoundation/VideoDecoder.h:314
static bool convertAnnexBToAvcc(const void *annexBData, const size_t annexBSize, std::vector< uint8_t > &avccData, const bool isCodecConfig=false, const std::string &mime="video/avc")
Converts Annex B formatted H.264/H.265 data to AVCC/HVCC format.
~VideoDecoder()
Destructs the video decoder and releases all associated resources.
ScopedCMFormatDescriptionRef formatDescription_
The video format description.
Definition avfoundation/VideoDecoder.h:293
VideoDecoder & operator=(VideoDecoder &&videoDecoder) noexcept
Move operator.
Definition avfoundation/VideoDecoder.h:346
bool pushSample(const void *data, const size_t size, const uint64_t presentationTime)
Adds a new media sample which needs to be decoded to the video decoder.
bool isStarted_
True, if the decoder is currently started.
Definition avfoundation/VideoDecoder.h:308
Lock lock_
The decoder's lock.
Definition avfoundation/VideoDecoder.h:311
static bool isAnnexB(const void *data, const size_t size, const bool isCodecConfig=false)
Determines whether the given data is in Annex B format (start code prefixed) or AVCC format (length p...
void release()
Explicitly releases this video decoder.
bool initialize(const std::string &mime, const unsigned int width, const unsigned int height, const void *codecConfigData=nullptr, const size_t codecConfigSize=0)
Initializes the video decoder with codec configuration data (SPS/PPS for H.264, VPS/SPS/PPS for HEVC)...
static CMVideoCodecType mimeToCodecType(const std::string &mime)
Translates a MIME type to a CMVideoCodecType.
bool isStarted() const
Returns whether this decoder is currently running.
Definition avfoundation/VideoDecoder.h:337
VideoDecoder(const VideoDecoder &)=delete
Disabled copy constructor.
static void decompressionOutputCallback(void *decompressionOutputRefCon, void *sourceFrameRefCon, OSStatus status, VTDecodeInfoFlags infoFlags, CVImageBufferRef imageBuffer, CMTime presentationTimeStamp, CMTime presentationDuration)
Callback function for decoded frames from VideoToolbox.
bool isInitialized() const
Returns whether this decoder is initialized.
Definition avfoundation/VideoDecoder.h:330
VideoDecoder()
Default constructor creating an un-initialized decoder.
bool stop()
Stops the video decoder.
unsigned int height_
The height of the video.
Definition avfoundation/VideoDecoder.h:305
Frame popFrame(int64_t *presentationTime=nullptr)
Pops the next decoded frame from the decoder.
ScopedVTDecompressionSessionRef decompressionSession_
The decompression session.
Definition avfoundation/VideoDecoder.h:296
static constexpr T minValue()
Returns the min scalar value.
Definition Numeric.h:3259
This class implements a scoped lock object for recursive lock objects.
Definition Lock.h:147
bool isValid() const
Returns whether this scoped object holds a valid object.
Definition ScopedObject.h:460
void releaseVTDecompressionSession(VTDecompressionSessionRef session)
Release function for VTDecompressionSessionRef that invalidates and releases the session.
Definition avfoundation/VideoDecoder.h:41
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Definition of a decoded frame entry.
Definition avfoundation/VideoDecoder.h:128
int64_t presentationTime_
The presentation time in microseconds.
Definition avfoundation/VideoDecoder.h:133
Frame frame_
The decoded frame.
Definition avfoundation/VideoDecoder.h:130