eb67a45ca8
This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com>
139 lines
3.3 KiB
C++
139 lines
3.3 KiB
C++
// Copyright 2020 yuzu Emulator Project
|
|
// Licensed under GPLv2 or any later version
|
|
// Refer to the license.txt file included.
|
|
|
|
#pragma once
|
|
|
|
#include <memory>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
#include <queue>
|
|
|
|
#include "common/bit_field.h"
|
|
#include "common/common_types.h"
|
|
#include "video_core/command_classes/sync_manager.h"
|
|
|
|
namespace Tegra {
|
|
|
|
class GPU;
|
|
class Nvdec;
|
|
class Vic;
|
|
class Host1x;
|
|
|
|
enum class ChSubmissionMode : u32 {
|
|
SetClass = 0,
|
|
Incrementing = 1,
|
|
NonIncrementing = 2,
|
|
Mask = 3,
|
|
Immediate = 4,
|
|
Restart = 5,
|
|
Gather = 6,
|
|
};
|
|
|
|
enum class ChClassId : u32 {
|
|
NoClass = 0x0,
|
|
Host1x = 0x1,
|
|
VideoEncodeMpeg = 0x20,
|
|
VideoEncodeNvEnc = 0x21,
|
|
VideoStreamingVi = 0x30,
|
|
VideoStreamingIsp = 0x32,
|
|
VideoStreamingIspB = 0x34,
|
|
VideoStreamingViI2c = 0x36,
|
|
GraphicsVic = 0x5d,
|
|
Graphics3D = 0x60,
|
|
GraphicsGpu = 0x61,
|
|
Tsec = 0xe0,
|
|
TsecB = 0xe1,
|
|
NvJpg = 0xc0,
|
|
NvDec = 0xf0
|
|
};
|
|
|
|
enum class ChMethod : u32 {
|
|
Empty = 0,
|
|
SetMethod = 0x10,
|
|
SetData = 0x11,
|
|
};
|
|
|
|
union ChCommandHeader {
|
|
u32 raw;
|
|
BitField<0, 16, u32> value;
|
|
BitField<16, 12, ChMethod> method_offset;
|
|
BitField<28, 4, ChSubmissionMode> submission_mode;
|
|
};
|
|
static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size");
|
|
|
|
struct ChCommand {
|
|
ChClassId class_id{};
|
|
int method_offset{};
|
|
std::vector<u32> arguments;
|
|
};
|
|
|
|
using ChCommandHeaderList = std::vector<Tegra::ChCommandHeader>;
|
|
using ChCommandList = std::vector<Tegra::ChCommand>;
|
|
|
|
struct ThiRegisters {
|
|
u32_le increment_syncpt{};
|
|
INSERT_PADDING_WORDS(1);
|
|
u32_le increment_syncpt_error{};
|
|
u32_le ctx_switch_incremement_syncpt{};
|
|
INSERT_PADDING_WORDS(4);
|
|
u32_le ctx_switch{};
|
|
INSERT_PADDING_WORDS(1);
|
|
u32_le ctx_syncpt_eof{};
|
|
INSERT_PADDING_WORDS(5);
|
|
u32_le method_0{};
|
|
u32_le method_1{};
|
|
INSERT_PADDING_WORDS(12);
|
|
u32_le int_status{};
|
|
u32_le int_mask{};
|
|
};
|
|
|
|
enum class ThiMethod : u32 {
|
|
IncSyncpt = offsetof(ThiRegisters, increment_syncpt) / sizeof(u32),
|
|
SetMethod0 = offsetof(ThiRegisters, method_0) / sizeof(u32),
|
|
SetMethod1 = offsetof(ThiRegisters, method_1) / sizeof(u32),
|
|
};
|
|
|
|
class CDmaPusher {
|
|
public:
|
|
explicit CDmaPusher(GPU& gpu);
|
|
~CDmaPusher();
|
|
|
|
/// Push NVDEC command buffer entries into queue
|
|
void Push(ChCommandHeaderList&& entries);
|
|
|
|
/// Process queued command buffer entries
|
|
void DispatchCalls();
|
|
|
|
/// Process one queue element
|
|
void Step();
|
|
|
|
/// Invoke command class devices to execute the command based on the current state
|
|
void ExecuteCommand(u32 offset, u32 data);
|
|
|
|
private:
|
|
/// Write arguments value to the ThiRegisters member at the specified offset
|
|
void ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments);
|
|
|
|
GPU& gpu;
|
|
|
|
std::shared_ptr<Tegra::Nvdec> nvdec_processor;
|
|
std::unique_ptr<Tegra::Vic> vic_processor;
|
|
std::unique_ptr<Tegra::Host1x> host1x_processor;
|
|
std::unique_ptr<SyncptIncrManager> nvdec_sync;
|
|
std::unique_ptr<SyncptIncrManager> vic_sync;
|
|
ChClassId current_class{};
|
|
ThiRegisters vic_thi_state{};
|
|
ThiRegisters nvdec_thi_state{};
|
|
|
|
s32 count{};
|
|
s32 offset{};
|
|
s32 mask{};
|
|
bool incrementing{};
|
|
|
|
// Queue of command lists to be processed
|
|
std::queue<ChCommandHeaderList> cdma_queue;
|
|
};
|
|
|
|
} // namespace Tegra
|