streaming_nvdec_decoding¶
This example shows how to decode a video with GPU in streaming fashion.
Source¶
Source
Click here to see the source.
1# Copyright (c) Meta Platforms, Inc. and affiliates.
2# All rights reserved.
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7"""This example shows how to decode a video with GPU in streaming fashion."""
8
9__all__ = [
10 "main",
11 "parse_args",
12 "run",
13 "decode",
14 "torch_cuda_warmup",
15]
16
17import argparse
18import contextlib
19import logging
20import pathlib
21import time
22
23import spdl.io
24import torch
25from PIL import Image
26from spdl.io import CUDAConfig
27from torch.profiler import profile
28
29# pyre-strict
30
31
32def parse_args(args: list[str] | None = None) -> tuple[argparse.Namespace, list[str]]:
33 """Parse command line arguments.
34
35 Args:
36 args: The command line arguments. By default it reads ``sys.argv``.
37
38 Returns:
39 Tuple of parsed arguments and unused arguments, as returned by
40 :py:meth:`argparse.ArgumentParser.parse_known_args`.
41 """
42
43 parser = argparse.ArgumentParser(
44 description=__doc__,
45 )
46 parser.add_argument(
47 "--input-file", required=True, help="The input video to process."
48 )
49 parser.add_argument(
50 "--plot-dir",
51 type=pathlib.Path,
52 help="If provided, plot the result to the given dirctory.",
53 )
54 parser.add_argument(
55 "--trace-path",
56 help="If provided, trace the execution. e.g. 'trace.json.gz'",
57 )
58 parser.add_argument(
59 "--device-index",
60 type=int,
61 help="The CUDA device index. By default it use the last one.",
62 )
63 parser.add_argument(
64 "--width",
65 type=int,
66 default=320,
67 help="Rescale the video to this width. Provide -1 to disable.",
68 )
69 parser.add_argument(
70 "--height",
71 type=int,
72 default=240,
73 help="Rescale the video to this height. Provide -1 to disable.",
74 )
75 return parser.parse_known_args(args)
76
77
78def decode(
79 src: str,
80 device_config: CUDAConfig,
81 post_processing_params: dict[str, int],
82 profiler: torch.profiler.profile | None,
83 plot_dir: pathlib.Path | None,
84) -> None:
85 """Decode video in streaming fashion with optional resizing, profiling and exporting.
86
87 Args:
88 src: The path or URL to the source video.
89 device_config: The GPU configuration.
90 post_processing_params: Post processing argument.
91 See :py:func:`spdl.io.streaming_load_video_nvdec`.
92 profiler: PyTorch Profiler or ``None``.
93 plot_dir: If provided, the decoded frames are exported as images to the directory.
94 """
95 streamer = spdl.io.streaming_load_video_nvdec(
96 src,
97 device_config,
98 num_frames=32,
99 post_processing_params=post_processing_params,
100 )
101
102 i, num_frames = 0, 0
103 t0 = time.monotonic()
104 for buffers in streamer:
105 buffer = spdl.io.nv12_to_rgb(buffers, device_config=device_config, sync=True)
106 tensor = spdl.io.to_torch(buffer)
107 num_frames += len(tensor)
108
109 if plot_dir is not None:
110 for f in tensor.permute(0, 2, 3, 1):
111 img = Image.fromarray(f.cpu().numpy())
112 img.save(plot_dir / f"{i:05d}.png")
113 i += 1
114
115 if profiler is not None:
116 profiler.step()
117 if num_frames >= 500:
118 break
119
120 elapsed = time.monotonic() - t0
121 qps = num_frames / elapsed
122 print(f"Processed {num_frames} frames in {elapsed:.1f} sec. QPS: {qps:.1f}")
123
124
125def torch_cuda_warmup(device_index: int | None) -> tuple[int, torch.cuda.Stream]:
126 """Initialize the CUDA context perform dry-run.
127
128 Args:
129 device_index: The CUDA device to use. If ``None``, the last available device is used.
130 """
131 assert torch.cuda.is_available()
132
133 cuda_index: int = device_index or (torch.cuda.device_count() - 1)
134 stream = torch.cuda.Stream(device=cuda_index)
135 with torch.cuda.stream(stream):
136 a = torch.empty([32, 3, 1080, 1920])
137 a.pin_memory().to(f"cuda:{cuda_index}", non_blocking=True)
138 stream.synchronize()
139 return cuda_index, stream
140
141
142def run(
143 src: str,
144 device_index: int | None,
145 post_processing_params: dict[str, int],
146 profiler: torch.profiler.profile,
147 plot_dir: pathlib.Path,
148) -> None:
149 """Run the benchmark."""
150 cuda_index, stream = torch_cuda_warmup(device_index)
151
152 device_config = spdl.io.cuda_config(
153 device_index=cuda_index,
154 allocator=(
155 torch.cuda.caching_allocator_alloc,
156 torch.cuda.caching_allocator_delete,
157 ),
158 stream=stream.cuda_stream,
159 )
160
161 for i in range(3):
162 with torch.autograd.profiler.record_function(f"decode_{i}"):
163 decode(src, device_config, post_processing_params, profiler, plot_dir)
164
165
166def main(args: list[str] | None = None) -> None:
167 """The main entrypoint for the CLI."""
168 ns, _ = parse_args(args)
169
170 logging.basicConfig(level=logging.INFO)
171
172 prof = None
173 post_process = {
174 "scale_width": ns.width if ns.width > 0 else None,
175 "scale_height": ns.height if ns.height > 0 else None,
176 }
177 with contextlib.ExitStack() as stack:
178 if ns.trace_path:
179 prof = stack.enter_context(
180 profile(
181 with_stack=True,
182 on_trace_ready=lambda p: p.export_chrome_trace(ns.trace_path),
183 )
184 )
185
186 run(ns.input_file, ns.device_index, post_process, prof, ns.plot_dir)
187
188
189if __name__ == "__main__":
190 main()
Functions¶
Functions
- parse_args(args: list[str] | None = None) tuple[Namespace, list[str]] [source]¶
Parse command line arguments.
- Parameters:
args – The command line arguments. By default it reads
sys.argv
.- Returns:
Tuple of parsed arguments and unused arguments, as returned by
argparse.ArgumentParser.parse_known_args()
.
- run(src: str, device_index: int | None, post_processing_params: dict[str, int], profiler: profile, plot_dir: Path) None [source]¶
Run the benchmark.
- decode(src: str, device_config: CUDAConfig, post_processing_params: dict[str, int], profiler: profile | None, plot_dir: Path | None) None [source]¶
Decode video in streaming fashion with optional resizing, profiling and exporting.
- Parameters:
src – The path or URL to the source video.
device_config – The GPU configuration.
post_processing_params – Post processing argument. See
spdl.io.streaming_load_video_nvdec()
.profiler – PyTorch Profiler or
None
.plot_dir – If provided, the decoded frames are exported as images to the directory.