Ocean
Loading...
Searching...
No Matches
mediafoundation/VideoEncoder.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_MEDIA_MF_VIDEO_ENCODER_H
9#define META_OCEAN_MEDIA_MF_VIDEO_ENCODER_H
10
12
13#include "ocean/base/Frame.h"
14#include "ocean/base/Lock.h"
15
16#include "ocean/math/Numeric.h"
17
18#include <strmif.h>
19
20#include <deque>
21
22namespace Ocean
23{
24
25namespace Media
26{
27
28namespace MediaFoundation
29{
30
31/**
32 * This class implements a simple video encoder for Windows using Ocean::Frame objects as input.
33 * The encoder uses Microsoft's Media Foundation Transform (MFT) framework for hardware-accelerated encoding.
34 *
35 * Usage:
36 * @code
37 * // a function which is e.g., running in a separate thread
38 * void threadRun()
39 * {
40 * VideoEncoder videoEncoder;
41 *
42 * if (!videoEncoder.initialize(1920u, 1080u))
43 * {
44 * // handle error
45 * }
46 *
47 * if (!videoEncoder.start())
48 * {
49 * // handle error
50 * }
51 *
52 * unsigned int frameIndex = 0u;
53 * double frameRate = 30.0;
54 *
55 * while (true)
56 * {
57 * Frame frame;
58 *
59 * // external function: function needs to provide frames from an external source - e.g., from a camera, a video stream, etc.
60 * if (doesNewFrameExist(frame))
61 * {
62 * // presentation time in microseconds
63 * uint64_t presentationTime = uint64_t(1.0e6 * double(frameIndex) / frameRate);
64 *
65 * // we forward the frame to the encoder, eventually it will be encoded and will be available through popSample()
66 * if (!videoEncoder.pushFrame(frame, presentationTime))
67 * {
68 * // handle error
69 * }
70 *
71 * ++frameIndex;
72 * }
73 *
74 * // we simply check whether another sample has been encoded
75 * VideoEncoder::Sample encodedSample = videoEncoder.popSample();
76 *
77 * if (encodedSample.isValid())
78 * {
79 * // external function: receiving encoded samples and processes them
80 * sendSampleToReceiver(std::move(encodedSample));
81 * }
82 * }
83 * }
84 * @endcode
85 * @ingroup mediamf
86 */
88{
89 public:
90
91 /// Definition of a 1 Mbps bit rate
92 static constexpr int bitrateMbps1_ = 1000 * 1000;
93
94 /// Definition of a 2 Mbps bit rate
95 static constexpr int bitrateMbps2_ = bitrateMbps1_ * 2;
96
97 /// Definition of a 5 Mbps bit rate
98 static constexpr int bitrateMbps5_ = bitrateMbps1_ * 5;
99
100 /// Definition of a 10 Mbps bit rate
101 static constexpr int bitrateMbps10_ = bitrateMbps1_ * 10;
102
103 /**
104 * Definition of individual buffer flag constants.
105 * Modeled after Android's MediaCodec.BufferInfo for API compatibility.
106 */
107 enum BufferFlags : uint32_t
108 {
109 /// The buffer has no special property.
111 /// Indicates that the (encoded) buffer marked as such contains the data for a key frame.
113 /// Indicates that the buffer marked as such contains codec initialization / codec specific data instead of media data.
115 /// Indicates that the buffer is the last buffer in the stream.
117 /// Indicates that the buffer only contains part of a frame.
119 };
120
121 /**
122 * Definition of an encoded sample.
123 */
124 class Sample
125 {
126 friend class VideoEncoder;
127
128 public:
129
130 /**
131 * Creates an invalid sample.
132 */
133 Sample() = default;
134
135 /**
136 * Move constructor.
137 * @param sample The sample to be moved
138 */
139 inline Sample(Sample&& sample) noexcept;
140
141 /**
142 * Returns whether this sample is valid.
143 * @return True, if so
144 */
145 inline bool isValid() const;
146
147 /**
148 * Returns the encoded data.
149 * @return The encoded data
150 */
151 inline const std::vector<uint8_t>& data() const;
152
153 /**
154 * Returns the presentation time in microseconds.
155 * @return The presentation time
156 */
157 inline int64_t presentationTime() const;
158
159 /**
160 * Returns whether this sample is a key frame.
161 * @return True, if so
162 */
163 inline bool isKeyFrame() const;
164
165 /**
166 * Returns whether this sample contains codec configuration data instead of media data.
167 * @return True, if so
168 */
169 inline bool isConfiguration() const;
170
171 /**
172 * Returns whether this sample marks the end of the stream.
173 * @return True, if so
174 */
175 inline bool isEndOfStream() const;
176
177 /**
178 * Returns whether this sample contains only part of a frame.
179 * @return True, if so
180 */
181 inline bool isPartialFrame() const;
182
183 /**
184 * Move operator.
185 * @param sample The sample to be moved
186 * @return Reference to this object
187 */
188 inline Sample& operator=(Sample&& sample) noexcept;
189
190 /**
191 * Returns whether this sample is valid.
192 * @return True, if so
193 */
194 inline explicit operator bool() const;
195
196 protected:
197
198 /**
199 * Creates a sample with specified data.
200 * @param data The encoded data, will be moved
201 * @param presentationTime The presentation time in microseconds, with range [0, infinity)
202 * @param bufferFlags The buffer flags of the sample
203 */
204 inline Sample(std::vector<uint8_t>&& data, const int64_t presentationTime, const BufferFlags bufferFlags);
205
206 /**
207 * Disabled copy constructor.
208 */
209 Sample(const Sample&) = delete;
210
211 /**
212 * Disabled copy operator.
213 * @return Reference to this object
214 */
215 Sample& operator=(const Sample&) = delete;
216
217 protected:
218
219 /// The encoded data.
220 std::vector<uint8_t> data_;
221
222 /// The presentation time in microseconds.
224
225 /// The buffer flags.
227 };
228
229 /**
230 * Definition of a vector holding sample objects.
231 */
232 using Samples = std::vector<Sample>;
233
234 protected:
235
236 /// Definition of the maximal image width.
237 static constexpr unsigned int maximalWidth_ = 1920u * 8u;
238
239 /// Definition of the maximal image height.
240 static constexpr unsigned int maximalHeight_ = 1080u * 8u;
241
242 /// Definition of the maximal bit rate.
243 static constexpr int maximalBitrate_ = bitrateMbps10_ * 10;
244
245 /// Scoped object for ICodecAPI, used for configuring encoder parameters.
247
248 public:
249
250 /**
251 * Default constructor creating an un-initialized encoder.
252 */
254
255 /**
256 * Move constructor.
257 * @param videoEncoder The encoder to be moved
258 */
259 inline VideoEncoder(VideoEncoder&& videoEncoder) noexcept;
260
261 /**
262 * Destructs the video encoder and releases all associated resources.
263 */
265
266 /**
267 * Initializes the video encoder with the specified configuration.
268 * @param width The width of the video to be encoded, in pixel, with range [1, infinity)
269 * @param height The height of the video to be encoded, in pixel, with range [1, infinity)
270 * @param mime The MIME type (Multipurpose Internet Mail Extensions) of the video to be encoded, e.g., "video/avc", "video/hevc", ...
271 * @param frameRate The target frame rate in frames per second, with range (0, infinity), e.g., 30.0
272 * @param bitrate The target bitrate in bits per second, with range [1, infinity), e.g., 5000000 for 5 Mbps
273 * @param iFrameInterval The interval between I-frames (key frames) in seconds: negative value = no key frames after first frame, 0 = all frames are key frames, positive value = key frames every N seconds
274 * @return True, if succeeded
275 * @see isInitialized().
276 */
277 bool initialize(const unsigned int width, const unsigned int height, const std::string& mime = "video/avc", const double frameRate = 30.0, const unsigned int bitrate = bitrateMbps2_, const int iFrameInterval = 1);
278
279 /**
280 * Starts the video encoder.
281 * @return True, if succeeded
282 * @see isStarted().
283 */
284 bool start();
285
286 /**
287 * Stops the video encoder.
288 * @return True, if succeeded
289 */
290 bool stop();
291
292 /**
293 * Adds a new frame which needs to be encoded to the video encoder.
294 * The encoder needs to be initialized and started.
295 * The presentation time is mainly intended to allow associating the provided frame with the resulting encoded sample when calling popSample().
296 * However, it's recommended to define a reasonable presentation time for each frame (e.g., let the first frame start at 0 and increment the time by 1^6/fps for each following frame).
297 * @param frame The frame to be encoded, must be valid
298 * @param presentationTime The presentation time of the frame, in microseconds, with range [0, infinity)
299 * @return True, if succeeded
300 * @see start(), isInitialized(), isStarted().
301 */
302 bool pushFrame(const Frame& frame, const uint64_t presentationTime);
303
304 /**
305 * Returns the next encoded sample if available.
306 * @return The resulting encoded sample, invalid if currently no encoded sample is available
307 * @see pushFrame().
308 */
310
311 /**
312 * Returns whether this encoder is initialized.
313 * @return True, if so
314 * @see initialize().
315 */
316 inline bool isInitialized() const;
317
318 /**
319 * Returns whether this encoder is currently running.
320 * @return True, if so
321 * @see start().
322 */
323 inline bool isStarted() const;
324
325 /**
326 * Explicitly releases this video encoder.
327 * If the encoder is still running, the encoder will be stopped as well.
328 */
329 void release();
330
331 /**
332 * Move operator.
333 * @param videoEncoder The video encoder to be moved
334 * @return Reference to this object
335 */
336 inline VideoEncoder& operator=(VideoEncoder&& videoEncoder) noexcept;
337
338 protected:
339
340 /**
341 * Disabled copy constructor.
342 */
343 VideoEncoder(const VideoEncoder&) = delete;
344
345 /**
346 * Disabled copy operator.
347 * @return Reference to this object
348 */
350
351 /**
352 * Tries to drain encoded output samples from the MFT into the internal queue.
353 * @return The number of samples drained
354 */
356
357 /**
358 * Translates a MIME type to a Media Foundation video format GUID.
359 * @param mime The MIME type
360 * @return The corresponding video format GUID, GUID_NULL if not supported
361 */
362 static GUID mimeToVideoFormat(const std::string& mime);
363
364 protected:
365
366 /// The MFT encoder used to encode the video.
368
369 /// The width of the video.
370 unsigned int width_ = 0u;
371
372 /// The height of the video.
373 unsigned int height_ = 0u;
374
375 /// True, if the encoder is currently started.
376 bool isStarted_ = false;
377
378 /// True if MFStartup has been called by this instance.
379 bool mfStarted_ = false;
380
381 /// True if the MFT provides its own output samples.
383
384 /// The size of the output buffer in bytes, used when the MFT does not provide its own output samples.
386
387 /// True if codec config data has been emitted at least once.
389
390 /// The queue of encoded samples.
391 std::deque<Sample> encodedSamples_;
392
393 /// The encoder's lock.
394 mutable Lock lock_;
395};
396
397inline VideoEncoder::Sample::Sample(std::vector<uint8_t>&& data, const int64_t presentationTime, const BufferFlags bufferFlags) :
398 data_(std::move(data)),
399 presentationTime_(presentationTime),
400 bufferFlags_(bufferFlags)
401{
402 // nothing to do here
403}
404
405inline VideoEncoder::Sample::Sample(Sample&& sample) noexcept
406{
407 *this = std::move(sample);
408}
409
411{
412 return !data_.empty();
413}
414
415inline const std::vector<uint8_t>& VideoEncoder::Sample::data() const
416{
417 return data_;
418}
419
421{
422 return presentationTime_;
423}
424
426{
427 return bufferFlags_ & BUFFER_FLAG_KEY_FRAME;
428}
429
431{
432 return bufferFlags_ & BUFFER_FLAG_CODEC_CONFIG;
433}
434
436{
437 return bufferFlags_ & BUFFER_FLAG_END_OF_STREAM;
438}
439
441{
442 return bufferFlags_ & BUFFER_FLAG_PARTIAL_FRAME;
443}
444
446{
447 if (this != &sample)
448 {
449 data_ = std::move(sample.data_);
450 presentationTime_ = sample.presentationTime_;
451 bufferFlags_ = sample.bufferFlags_;
452
453 sample.presentationTime_ = NumericT<int64_t>::minValue();
454 sample.bufferFlags_ = BUFFER_FLAG_NONE;
455 }
456
457 return *this;
458}
459
460inline VideoEncoder::Sample::operator bool() const
461{
462 return isValid();
463}
464
465inline VideoEncoder::VideoEncoder(VideoEncoder&& videoEncoder) noexcept
466{
467 *this = std::move(videoEncoder);
468}
469
471{
472 const ScopedLock scopedLock(lock_);
473
474 return encoder_.isValid();
475}
476
477inline bool VideoEncoder::isStarted() const
478{
479 const ScopedLock scopedLock(lock_);
480
481 ocean_assert(!isStarted_ || isInitialized());
482
483 return isStarted_;
484}
485
486inline VideoEncoder& VideoEncoder::operator=(VideoEncoder&& videoEncoder) noexcept
487{
488 if (this != &videoEncoder)
489 {
490 release();
491
492 encoder_ = std::move(videoEncoder.encoder_);
493
494 encodedSamples_ = std::move(videoEncoder.encodedSamples_);
495
496 width_ = videoEncoder.width_;
497 videoEncoder.width_ = 0u;
498
499 height_ = videoEncoder.height_;
500 videoEncoder.height_ = 0u;
501
502 isStarted_ = videoEncoder.isStarted_;
503 videoEncoder.isStarted_ = false;
504
505 mfStarted_ = videoEncoder.mfStarted_;
506 videoEncoder.mfStarted_ = false;
507
508 mftProvidesOutputSamples_ = videoEncoder.mftProvidesOutputSamples_;
509 videoEncoder.mftProvidesOutputSamples_ = false;
510
511 outputBufferSize_ = videoEncoder.outputBufferSize_;
512 videoEncoder.outputBufferSize_ = 0u;
513
514 codecConfigEmitted_ = videoEncoder.codecConfigEmitted_;
515 videoEncoder.codecConfigEmitted_ = false;
516 }
517
518 return *this;
519}
520
521}
522
523}
524
525}
526
527#endif // META_OCEAN_MEDIA_MF_VIDEO_ENCODER_H
This class implements Ocean's image class.
Definition Frame.h:1879
This class implements a recursive lock object.
Definition Lock.h:31
Definition of an encoded sample.
Definition mediafoundation/VideoEncoder.h:125
std::vector< uint8_t > data_
The encoded data.
Definition mediafoundation/VideoEncoder.h:220
bool isEndOfStream() const
Returns whether this sample marks the end of the stream.
Definition mediafoundation/VideoEncoder.h:435
BufferFlags bufferFlags_
The buffer flags.
Definition mediafoundation/VideoEncoder.h:226
Sample(const Sample &)=delete
Disabled copy constructor.
bool isValid() const
Returns whether this sample is valid.
Definition mediafoundation/VideoEncoder.h:410
bool isKeyFrame() const
Returns whether this sample is a key frame.
Definition mediafoundation/VideoEncoder.h:425
Sample()=default
Creates an invalid sample.
bool isConfiguration() const
Returns whether this sample contains codec configuration data instead of media data.
Definition mediafoundation/VideoEncoder.h:430
Sample & operator=(const Sample &)=delete
Disabled copy operator.
Sample & operator=(Sample &&sample) noexcept
Move operator.
Definition mediafoundation/VideoEncoder.h:445
int64_t presentationTime() const
Returns the presentation time in microseconds.
Definition mediafoundation/VideoEncoder.h:420
const std::vector< uint8_t > & data() const
Returns the encoded data.
Definition mediafoundation/VideoEncoder.h:415
int64_t presentationTime_
The presentation time in microseconds.
Definition mediafoundation/VideoEncoder.h:223
bool isPartialFrame() const
Returns whether this sample contains only part of a frame.
Definition mediafoundation/VideoEncoder.h:440
This class implements a simple video encoder for Windows using Ocean::Frame objects as input.
Definition mediafoundation/VideoEncoder.h:88
VideoEncoder(const VideoEncoder &)=delete
Disabled copy constructor.
bool pushFrame(const Frame &frame, const uint64_t presentationTime)
Adds a new frame which needs to be encoded to the video encoder.
unsigned int height_
The height of the video.
Definition mediafoundation/VideoEncoder.h:373
std::deque< Sample > encodedSamples_
The queue of encoded samples.
Definition mediafoundation/VideoEncoder.h:391
ScopedIMFTransform encoder_
The MFT encoder used to encode the video.
Definition mediafoundation/VideoEncoder.h:367
DWORD outputBufferSize_
The size of the output buffer in bytes, used when the MFT does not provide its own output samples.
Definition mediafoundation/VideoEncoder.h:385
bool isStarted_
True, if the encoder is currently started.
Definition mediafoundation/VideoEncoder.h:376
bool isStarted() const
Returns whether this encoder is currently running.
Definition mediafoundation/VideoEncoder.h:477
static constexpr int bitrateMbps5_
Definition of a 5 Mbps bit rate.
Definition mediafoundation/VideoEncoder.h:98
bool initialize(const unsigned int width, const unsigned int height, const std::string &mime="video/avc", const double frameRate=30.0, const unsigned int bitrate=bitrateMbps2_, const int iFrameInterval=1)
Initializes the video encoder with the specified configuration.
VideoEncoder & operator=(const VideoEncoder &)=delete
Disabled copy operator.
size_t drainOutputSamples()
Tries to drain encoded output samples from the MFT into the internal queue.
static constexpr int bitrateMbps2_
Definition of a 2 Mbps bit rate.
Definition mediafoundation/VideoEncoder.h:95
static constexpr unsigned int maximalWidth_
Definition of the maximal image width.
Definition mediafoundation/VideoEncoder.h:237
BufferFlags
Definition of individual buffer flag constants.
Definition mediafoundation/VideoEncoder.h:108
@ BUFFER_FLAG_CODEC_CONFIG
Indicates that the buffer marked as such contains codec initialization / codec specific data instead ...
Definition mediafoundation/VideoEncoder.h:114
@ BUFFER_FLAG_END_OF_STREAM
Indicates that the buffer is the last buffer in the stream.
Definition mediafoundation/VideoEncoder.h:116
@ BUFFER_FLAG_KEY_FRAME
Indicates that the (encoded) buffer marked as such contains the data for a key frame.
Definition mediafoundation/VideoEncoder.h:112
@ BUFFER_FLAG_PARTIAL_FRAME
Indicates that the buffer only contains part of a frame.
Definition mediafoundation/VideoEncoder.h:118
@ BUFFER_FLAG_NONE
The buffer has no special property.
Definition mediafoundation/VideoEncoder.h:110
VideoEncoder()
Default constructor creating an un-initialized encoder.
static constexpr int bitrateMbps10_
Definition of a 10 Mbps bit rate.
Definition mediafoundation/VideoEncoder.h:101
static constexpr int maximalBitrate_
Definition of the maximal bit rate.
Definition mediafoundation/VideoEncoder.h:243
static GUID mimeToVideoFormat(const std::string &mime)
Translates a MIME type to a Media Foundation video format GUID.
bool start()
Starts the video encoder.
bool mfStarted_
True if MFStartup has been called by this instance.
Definition mediafoundation/VideoEncoder.h:379
bool stop()
Stops the video encoder.
~VideoEncoder()
Destructs the video encoder and releases all associated resources.
bool isInitialized() const
Returns whether this encoder is initialized.
Definition mediafoundation/VideoEncoder.h:470
static constexpr int bitrateMbps1_
Definition of a 1 Mbps bit rate.
Definition mediafoundation/VideoEncoder.h:92
void release()
Explicitly releases this video encoder.
bool mftProvidesOutputSamples_
True if the MFT provides its own output samples.
Definition mediafoundation/VideoEncoder.h:382
unsigned int width_
The width of the video.
Definition mediafoundation/VideoEncoder.h:370
VideoEncoder & operator=(VideoEncoder &&videoEncoder) noexcept
Move operator.
Definition mediafoundation/VideoEncoder.h:486
Sample popSample()
Returns the next encoded sample if available.
std::vector< Sample > Samples
Definition of a vector holding sample objects.
Definition mediafoundation/VideoEncoder.h:232
static constexpr unsigned int maximalHeight_
Definition of the maximal image height.
Definition mediafoundation/VideoEncoder.h:240
Lock lock_
The encoder's lock.
Definition mediafoundation/VideoEncoder.h:394
bool codecConfigEmitted_
True if codec config data has been emitted at least once.
Definition mediafoundation/VideoEncoder.h:388
static constexpr T minValue()
Returns the min scalar value.
Definition Numeric.h:3259
This class implements a scoped lock object for recursive lock objects.
Definition Lock.h:147
This class wraps an unmanaged object (or reference) which needs to be released after usage.
Definition ScopedObject.h:166
bool isValid() const
Returns whether this scoped object holds a valid object.
Definition ScopedObject.h:460
The namespace covering the entire Ocean framework.
Definition Accessor.h:15