Ocean
Loading...
Searching...
No Matches
mediafoundation/VideoDecoder.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_MEDIA_MF_VIDEO_DECODER_H
9#define META_OCEAN_MEDIA_MF_VIDEO_DECODER_H
10
12
13#include "ocean/base/Frame.h"
14#include "ocean/base/Lock.h"
15
16namespace Ocean
17{
18
19namespace Media
20{
21
22namespace MediaFoundation
23{
24
25/**
26 * This class implements a simple video decoder for Windows using encoded media samples from memory as input.
27 * The decoder uses Microsoft's Media Foundation Transform (MFT) framework for hardware-accelerated decoding.
28 *
29 * Usage:
30 * @code
31 * // a function which is e.g., running in a separate thread
32 * void threadRun()
33 * {
34 * VideoDecoder videoDecoder;
35 *
36 * // initializing the decoder with the input format of the media samples
37 * if (!videoDecoder.initialize("video/avc", 1920u, 1080u))
38 * {
39 * // handle error
40 * }
41 *
42 * if (!videoDecoder.start())
43 * {
44 * // handle error
45 * }
46 *
47 * unsigned int frameIndex = 0u;
48 * double frameRate = 30.0;
49 *
50 * while (true)
51 * {
52 * void* sampleData = nullptr;
53 * size_t sampleSize = 0;
54 *
55 * // external function: function needs to provide the new media samples from an external source - e.g., from an external webcam, a video stream, etc.
56 * if (doesNewInputSampleExist(sampleData, &sampleSize))
57 * {
58 * // presentation time in microseconds
59 * uint64_t presentationTime = uint64_t(1.0e6 * double(frameIndex) / frameRate);
60 *
61 * // we forward the media sample to the decoder, eventually it will be decoded and will be available through popFrame()
62 * if (!videoDecoder.pushSample(sampleData, sampleSize, presentationTime))
63 * {
64 * // handle error
65 * }
66 *
67 * ++frameIndex;
68 * }
69 *
70 * // we simply check whether another frame has been decoded (there may be a delay between pushing and popping)
71 * Frame newFrame = videoDecoder.popFrame();
72 *
73 * if (newFrame.isValid())
74 * {
75 * // external function: receiving new frames and processes the frames
76 * sendFrameToReceiver(std::move(newFrame));
77 * }
78 * }
79 * }
80 * @endcode
81 * @ingroup mediamf
82 */
84{
85 public:
86
87 /**
88 * Default constructor creating an un-initialized decoder.
89 */
91
92 /**
93 * Move constructor.
94 * @param videoDecoder The decoder to be moved
95 */
96 inline VideoDecoder(VideoDecoder&& videoDecoder) noexcept;
97
98 /**
99 * Destructs the video decoder and releases all associated resources.
100 */
102
103 /**
104 * Initializes the video decoder with codec configuration data (SPS/PPS for H.264, VPS/SPS/PPS for HEVC).
105 * @param mime The MIME type (Multipurpose Internet Mail Extensions) of the video to be decoded, e.g., "video/avc", "video/hevc", ...
106 * @param width The width of the video to be decoded, in pixel, with range [1, infinity)
107 * @param height The height of the video to be decoded, in pixel, with range [1, infinity)
108 * @param codecConfigData The codec configuration data containing parameter sets (SPS/PPS for H.264), can be nullptr if not available yet
109 * @param codecConfigSize The size of the codec configuration data in bytes, 0 if not available yet
110 * @return True, if succeeded
111 * @see isInitialized().
112 */
113 bool initialize(const std::string& mime, const unsigned int width, const unsigned int height, const void* codecConfigData = nullptr, const size_t codecConfigSize = 0);
114
115 /**
116 * Starts the video decoder.
117 * @return True, if succeeded
118 * @see isStarted().
119 */
120 bool start();
121
122 /**
123 * Stops the video decoder.
124 * @return True, if succeeded
125 */
126 bool stop();
127
128 /**
129 * Adds a new media sample which needs to be decoded to the video decoder.
130 * The decoder needs to be initialized and started.
131 * The presentation time is mainly intended to allow associating the provided encoded media sample with the resulting decoded frame when calling popFrame().
132 * However, it's recommended to define a reasonable presentation time for each sample (e.g., let the first sample start at 0 and increment the time by 1^6/fps for each following sample.
133 * @param data The data of the encoded media sample, must be valid
134 * @param size The size of the encoded media sample, in bytes, with range [1, infinity)
135 * @param presentationTime The presentation time of the sample, in microseconds, with range [0, infinity)
136 * @return True, if succeeded
137 * @see start(), isInitialized(), isStarted().
138 */
139 bool pushSample(const void* data, const size_t size, const uint64_t presentationTime);
140
141 /**
142 * Pops the next decoded frame from the decoder.
143 * Optional the frame's presentation time will be returned, this is the presentation time which was used when the corresponding sample was provided in pushSample().
144 * @param presentationTime Optional resulting presentation time in micro seconds, with range (-infinity, infinity)
145 * @return The resulting frame, invalid if currently no decoded frame is available
146 * @see pushSample().
147 */
148 Frame popFrame(int64_t* presentationTime = nullptr);
149
150 /**
151 * Returns whether this decoder is initialized.
152 * @return True, if so
153 * @see initialize().
154 */
155 inline bool isInitialized() const;
156
157 /**
158 * Returns whether this decoder is currently running.
159 * @return True, if so
160 * @see start().
161 */
162 inline bool isStarted() const;
163
164 /**
165 * Signals the decoder to drain all buffered frames.
166 * After calling this, popFrame() should be called repeatedly until no more frames are available.
167 * @return True, if succeeded
168 */
169 bool drain();
170
171 /**
172 * Explicitly releases this video decoder.
173 * If the decoder is still running, the decoder will be stopped as well.
174 */
175 void release();
176
177 /**
178 * Converts AVCC/HVCC formatted H.264/H.265 data to Annex B format.
179 * For encoded samples (isCodecConfig = false): Replaces 4-byte big-endian length prefixes with start code prefixes (00 00 00 01).
180 * For codec config (isCodecConfig = true): Parses the AVCC/HVCC configuration record and extracts SPS/PPS (and VPS for HEVC) NAL units with start codes.
181 * @param avccData The AVCC/HVCC formatted data, must be valid
182 * @param avccSize The size of the AVCC/HVCC data in bytes, with range [4, infinity)
183 * @param annexBData The resulting Annex B formatted data with start code prefixes
184 * @param isCodecConfig True if the input is an AVCC/HVCC codec configuration record; False if it contains length-prefixed NAL units
185 * @param mime The MIME type, used only when isCodecConfig is true to determine H.264 vs HEVC format, either "video/avc" or "video/hevc"
186 * @return True if conversion succeeded; False if the input data is invalid or conversion failed
187 */
188 static bool convertAvccToAnnexB(const void* avccData, const size_t avccSize, std::vector<uint8_t>& annexBData, const bool isCodecConfig = false, const std::string& mime = "video/avc");
189
190 /**
191 * Determines whether the given data is in AVCC format (length prefixed) or Annex B format (start code prefixed).
192 * AVCC format uses 4-byte big-endian length prefixes before each NAL unit.
193 * Annex B format uses start codes (0x00 0x00 0x00 0x01 or 0x00 0x00 0x01) to delimit NAL units.
194 *
195 * Note: For codec configuration data, use isCodecConfig=true as AVCC config starts with version byte 0x01.
196 * For regular NAL unit samples, use isCodecConfig=false which applies more sophisticated detection
197 * to distinguish AVCC length prefixes from Annex B start codes (especially for NAL sizes 256-511 bytes
198 * where the length prefix 0x00 0x00 0x01 XX looks like an Annex B 3-byte start code).
199 *
200 * @param data The data to check, must be valid
201 * @param size The size of the data in bytes, with range [4, infinity)
202 * @param isCodecConfig True if the data is codec configuration (SPS/PPS), false for regular NAL samples
203 * @return True if the data is in AVCC format; false if it's in Annex B format
204 */
205 static bool isAvcc(const void* data, const size_t size, const bool isCodecConfig = false);
206
207 /**
208 * Move operator.
209 * @param videoDecoder The video decoder to be moved
210 * @return Reference to this object
211 */
212 inline VideoDecoder& operator=(VideoDecoder&& videoDecoder) noexcept;
213
214 protected:
215
216 /**
217 * Disabled copy constructor.
218 */
219 VideoDecoder(const VideoDecoder&) = delete;
220
221 /**
222 * Disabled copy operator.
223 * @return Reference to this object
224 */
226
227 /**
228 * Translates a MIME type to a Media Foundation video format GUID.
229 * @param mime The MIME type
230 * @return The corresponding video format GUID, GUID_NULL if not supported
231 */
232 static GUID mimeToVideoFormat(const std::string& mime);
233
234 protected:
235
236 /// The MFT decoder used to decode the video.
238
239 /// The width of the video.
240 unsigned int width_ = 0u;
241
242 /// The height of the video.
243 unsigned int height_ = 0u;
244
245 /// True, if the decoder is currently started.
246 bool isStarted_ = false;
247
248 /// True if MFStartup has been called by this instance.
249 bool mfStarted_ = false;
250
251 /// True if the MFT provides its own output samples.
253
254 /// The size of the output buffer in bytes, used when the MFT does not provide its own output samples.
256
257 /// The decoder's lock.
258 mutable Lock lock_;
259};
260
261inline VideoDecoder::VideoDecoder(VideoDecoder&& videoDecoder) noexcept
262{
263 *this = std::move(videoDecoder);
264}
265
267{
268 const ScopedLock scopedLock(lock_);
269
270 return decoder_.isValid();
271}
272
273inline bool VideoDecoder::isStarted() const
274{
275 const ScopedLock scopedLock(lock_);
276
277 ocean_assert(!isStarted_ || isInitialized());
278
279 return isStarted_;
280}
281
282inline VideoDecoder& VideoDecoder::operator=(VideoDecoder&& videoDecoder) noexcept
283{
284 if (this != &videoDecoder)
285 {
286 release();
287
288 decoder_ = std::move(videoDecoder.decoder_);
289
290 width_ = videoDecoder.width_;
291 videoDecoder.width_ = 0u;
292
293 height_ = videoDecoder.height_;
294 videoDecoder.height_ = 0u;
295
296 isStarted_ = videoDecoder.isStarted_;
297 videoDecoder.isStarted_ = false;
298
299 mfStarted_ = videoDecoder.mfStarted_;
300 videoDecoder.mfStarted_ = false;
301
302 mftProvidesOutputSamples_ = videoDecoder.mftProvidesOutputSamples_;
303 videoDecoder.mftProvidesOutputSamples_ = false;
304
305 outputBufferSize_ = videoDecoder.outputBufferSize_;
306 videoDecoder.outputBufferSize_ = 0u;
307 }
308
309 return *this;
310}
311
312}
313
314}
315
316}
317
318#endif // META_OCEAN_MEDIA_MF_VIDEO_DECODER_H
This class implements Ocean's image class.
Definition Frame.h:1879
This class implements a recursive lock object.
Definition Lock.h:31
This class implements a simple video decoder for Windows using encoded media samples from memory as i...
Definition mediafoundation/VideoDecoder.h:84
VideoDecoder & operator=(const VideoDecoder &)=delete
Disabled copy operator.
Lock lock_
The decoder's lock.
Definition mediafoundation/VideoDecoder.h:258
static bool convertAvccToAnnexB(const void *avccData, const size_t avccSize, std::vector< uint8_t > &annexBData, const bool isCodecConfig=false, const std::string &mime="video/avc")
Converts AVCC/HVCC formatted H.264/H.265 data to Annex B format.
bool isInitialized() const
Returns whether this decoder is initialized.
Definition mediafoundation/VideoDecoder.h:266
bool drain()
Signals the decoder to drain all buffered frames.
~VideoDecoder()
Destructs the video decoder and releases all associated resources.
VideoDecoder & operator=(VideoDecoder &&videoDecoder) noexcept
Move operator.
Definition mediafoundation/VideoDecoder.h:282
VideoDecoder(const VideoDecoder &)=delete
Disabled copy constructor.
unsigned int height_
The height of the video.
Definition mediafoundation/VideoDecoder.h:243
Frame popFrame(int64_t *presentationTime=nullptr)
Pops the next decoded frame from the decoder.
void release()
Explicitly releases this video decoder.
static GUID mimeToVideoFormat(const std::string &mime)
Translates a MIME type to a Media Foundation video format GUID.
VideoDecoder()
Default constructor creating an un-initialized decoder.
bool isStarted_
True, if the decoder is currently started.
Definition mediafoundation/VideoDecoder.h:246
bool mfStarted_
True if MFStartup has been called by this instance.
Definition mediafoundation/VideoDecoder.h:249
unsigned int width_
The width of the video.
Definition mediafoundation/VideoDecoder.h:240
DWORD outputBufferSize_
The size of the output buffer in bytes, used when the MFT does not provide its own output samples.
Definition mediafoundation/VideoDecoder.h:255
bool start()
Starts the video decoder.
bool pushSample(const void *data, const size_t size, const uint64_t presentationTime)
Adds a new media sample which needs to be decoded to the video decoder.
bool mftProvidesOutputSamples_
True if the MFT provides its own output samples.
Definition mediafoundation/VideoDecoder.h:252
bool stop()
Stops the video decoder.
bool isStarted() const
Returns whether this decoder is currently running.
Definition mediafoundation/VideoDecoder.h:273
static bool isAvcc(const void *data, const size_t size, const bool isCodecConfig=false)
Determines whether the given data is in AVCC format (length prefixed) or Annex B format (start code p...
ScopedIMFTransform decoder_
The MFT decoder used to decode the video.
Definition mediafoundation/VideoDecoder.h:237
bool initialize(const std::string &mime, const unsigned int width, const unsigned int height, const void *codecConfigData=nullptr, const size_t codecConfigSize=0)
Initializes the video decoder with codec configuration data (SPS/PPS for H.264, VPS/SPS/PPS for HEVC)...
This class implements a scoped lock object for recursive lock objects.
Definition Lock.h:147
This class wraps an unmanaged object (or reference) which needs to be released after usage.
Definition ScopedObject.h:166
bool isValid() const
Returns whether this scoped object holds a valid object.
Definition ScopedObject.h:460
void release(T *object)
This functions allows to release a media foundation object if it does exist.
Definition MediaFoundation.h:226
The namespace covering the entire Ocean framework.
Definition Accessor.h:15