vulkan dims specialization
This commit is contained in:
parent
7a0d7e7668
commit
5a78b35b1a
|
@ -13,6 +13,11 @@ set(GLSL_INCLUDES
|
||||||
${FIDELITYFX_FILES}
|
${FIDELITYFX_FILES}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(ASTC_INCLUDES
|
||||||
|
# astc_decoder_glsl_includes.h
|
||||||
|
astc_decoder_spv_includes.h
|
||||||
|
)
|
||||||
|
|
||||||
set(SHADER_FILES
|
set(SHADER_FILES
|
||||||
astc_decoder.comp
|
astc_decoder.comp
|
||||||
blit_color_float.frag
|
blit_color_float.frag
|
||||||
|
@ -95,9 +100,60 @@ if (NOT GLSLANG_ERROR STREQUAL "")
|
||||||
set(QUIET_FLAG "")
|
set(QUIET_FLAG "")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
macro(ASTC_GEN)
|
||||||
|
# paired list of valid astc block dimensions
|
||||||
|
set(ASTC_WIDTHS 4 5 5 6 6 6 8 8 8 10 10 10 10 12 12)
|
||||||
|
set(ASTC_HEIGHTS 4 4 5 4 5 6 5 6 8 5 6 8 10 10 12)
|
||||||
|
list(LENGTH ASTC_WIDTHS NUM_ASTC_FORMATS)
|
||||||
|
math(EXPR NUM_ASTC_FORMATS "${NUM_ASTC_FORMATS}-1")
|
||||||
|
foreach(i RANGE ${NUM_ASTC_FORMATS})
|
||||||
|
list(GET ASTC_WIDTHS ${i} ASTC_WIDTH)
|
||||||
|
list(GET ASTC_HEIGHTS ${i} ASTC_HEIGHT)
|
||||||
|
|
||||||
|
# Vulkan SPIR-V Specialization
|
||||||
|
|
||||||
|
string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_SPV SPIRV_VARIABLE_NAME)
|
||||||
|
set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_spv.h)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT
|
||||||
|
${SPIRV_HEADER_FILE}
|
||||||
|
COMMAND
|
||||||
|
${GLSLANGVALIDATOR} -V -DBLOCK_WIDTH=${ASTC_WIDTH} -DBLOCK_HEIGHT=${ASTC_HEIGHT} ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
|
||||||
|
MAIN_DEPENDENCY
|
||||||
|
${SOURCE_FILE}
|
||||||
|
)
|
||||||
|
set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE})
|
||||||
|
|
||||||
|
# GLSL Specialization
|
||||||
|
# Disabled as there was no noticeable performance uplift specializing the shaders for OGL
|
||||||
|
|
||||||
|
# set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}.h)
|
||||||
|
# set(SHADER_DEFINES "#define BLOCK_WIDTH ${ASTC_WIDTH}" "#define BLOCK_HEIGHT ${ASTC_HEIGHT}")
|
||||||
|
# set(DEFINES_LINE_NUMBER 14)
|
||||||
|
# string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT} GLSL_VARIABLE_NAME)
|
||||||
|
# add_custom_command(
|
||||||
|
# OUTPUT
|
||||||
|
# ${SOURCE_HEADER_FILE}
|
||||||
|
# COMMAND
|
||||||
|
# ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} "${SHADER_DEFINES}" ${DEFINES_LINE_NUMBER} ${GLSL_VARIABLE_NAME}
|
||||||
|
# MAIN_DEPENDENCY
|
||||||
|
# ${SOURCE_FILE}
|
||||||
|
# DEPENDS
|
||||||
|
# ${INPUT_FILE}
|
||||||
|
# ${SOURCE_FILE}
|
||||||
|
# )
|
||||||
|
# set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
|
||||||
|
endforeach()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
foreach(FILENAME IN ITEMS ${SHADER_FILES})
|
foreach(FILENAME IN ITEMS ${SHADER_FILES})
|
||||||
string(REPLACE "." "_" SHADER_NAME ${FILENAME})
|
string(REPLACE "." "_" SHADER_NAME ${FILENAME})
|
||||||
set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
|
set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
|
||||||
|
|
||||||
|
if (${FILENAME} MATCHES "astc_decoder.comp")
|
||||||
|
ASTC_GEN()
|
||||||
|
endif()
|
||||||
|
|
||||||
# Skip generating source headers on Vulkan exclusive files
|
# Skip generating source headers on Vulkan exclusive files
|
||||||
if (NOT ${FILENAME} MATCHES "vulkan.*")
|
if (NOT ${FILENAME} MATCHES "vulkan.*")
|
||||||
set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
|
set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
|
||||||
|
@ -151,6 +207,7 @@ endforeach()
|
||||||
|
|
||||||
set(SHADER_SOURCES ${SHADER_FILES})
|
set(SHADER_SOURCES ${SHADER_FILES})
|
||||||
list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
|
list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
|
||||||
|
list(APPEND SHADER_SOURCES ${ASTC_INCLUDES})
|
||||||
|
|
||||||
add_custom_target(host_shaders
|
add_custom_target(host_shaders
|
||||||
DEPENDS
|
DEPENDS
|
||||||
|
|
|
@ -24,7 +24,9 @@
|
||||||
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
|
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
|
||||||
|
|
||||||
BEGIN_PUSH_CONSTANTS
|
BEGIN_PUSH_CONSTANTS
|
||||||
|
#ifndef BLOCK_WIDTH
|
||||||
UNIFORM(1) uvec2 block_dims;
|
UNIFORM(1) uvec2 block_dims;
|
||||||
|
#endif
|
||||||
UNIFORM(2) uint layer_stride;
|
UNIFORM(2) uint layer_stride;
|
||||||
UNIFORM(3) uint block_size;
|
UNIFORM(3) uint block_size;
|
||||||
UNIFORM(4) uint x_shift;
|
UNIFORM(4) uint x_shift;
|
||||||
|
@ -75,7 +77,15 @@ int color_bitsread = 0;
|
||||||
// At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode
|
// At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode
|
||||||
// So the maximum would be 144 (12 x 12) elements, x 2 for two planes
|
// So the maximum would be 144 (12 x 12) elements, x 2 for two planes
|
||||||
#define DIVCEIL(number, divisor) (number + divisor - 1) / divisor
|
#define DIVCEIL(number, divisor) (number + divisor - 1) / divisor
|
||||||
|
|
||||||
|
#ifndef BLOCK_WIDTH
|
||||||
|
#define BLOCK_WIDTH block_dims.x
|
||||||
|
#define BLOCK_HEIGHT block_dims.y
|
||||||
#define ARRAY_NUM_ELEMENTS 144
|
#define ARRAY_NUM_ELEMENTS 144
|
||||||
|
#else
|
||||||
|
#define ARRAY_NUM_ELEMENTS BLOCK_WIDTH * BLOCK_HEIGHT
|
||||||
|
#endif
|
||||||
|
|
||||||
#define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4)
|
#define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4)
|
||||||
uvec4 result_vector[VECTOR_ARRAY_SIZE];
|
uvec4 result_vector[VECTOR_ARRAY_SIZE];
|
||||||
|
|
||||||
|
@ -265,7 +275,7 @@ uint Hash52(uint p) {
|
||||||
}
|
}
|
||||||
|
|
||||||
uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) {
|
uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) {
|
||||||
if ((block_dims.y * block_dims.x) < 32) {
|
if ((BLOCK_WIDTH * BLOCK_HEIGHT) < 32) {
|
||||||
x <<= 1;
|
x <<= 1;
|
||||||
y <<= 1;
|
y <<= 1;
|
||||||
}
|
}
|
||||||
|
@ -878,8 +888,8 @@ uint UnquantizeTexelWeight(EncodingData val) {
|
||||||
uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
|
uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
|
||||||
|
|
||||||
void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
|
void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
|
||||||
const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
|
const uint Ds = uint((BLOCK_WIDTH * 0.5f + 1024) / (BLOCK_WIDTH - 1));
|
||||||
const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
|
const uint Dt = uint((BLOCK_HEIGHT * 0.5f + 1024) / (BLOCK_HEIGHT - 1));
|
||||||
const uint num_planes = is_dual_plane ? 2 : 1;
|
const uint num_planes = is_dual_plane ? 2 : 1;
|
||||||
const uint area = size.x * size.y;
|
const uint area = size.x * size.y;
|
||||||
const uint loop_count = min(result_index, area * num_planes);
|
const uint loop_count = min(result_index, area * num_planes);
|
||||||
|
@ -890,8 +900,8 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
|
||||||
UnquantizeTexelWeight(GetEncodingFromVector(itr));
|
UnquantizeTexelWeight(GetEncodingFromVector(itr));
|
||||||
}
|
}
|
||||||
for (uint plane = 0; plane < num_planes; ++plane) {
|
for (uint plane = 0; plane < num_planes; ++plane) {
|
||||||
for (uint t = 0; t < block_dims.y; t++) {
|
for (uint t = 0; t < BLOCK_HEIGHT; t++) {
|
||||||
for (uint s = 0; s < block_dims.x; s++) {
|
for (uint s = 0; s < BLOCK_WIDTH; s++) {
|
||||||
const uint cs = Ds * s;
|
const uint cs = Ds * s;
|
||||||
const uint ct = Dt * t;
|
const uint ct = Dt * t;
|
||||||
const uint gs = (cs * (size.x - 1) + 32) >> 6;
|
const uint gs = (cs * (size.x - 1) + 32) >> 6;
|
||||||
|
@ -934,7 +944,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
|
||||||
VectorIndicesFromBase(offset_base);
|
VectorIndicesFromBase(offset_base);
|
||||||
p.w = result_vector[array_index][vector_index];
|
p.w = result_vector[array_index][vector_index];
|
||||||
}
|
}
|
||||||
const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane;
|
const uint offset = (t * BLOCK_WIDTH + s) + ARRAY_NUM_ELEMENTS * plane;
|
||||||
const uint array_index = offset / 4;
|
const uint array_index = offset / 4;
|
||||||
const uint vector_index = offset % 4;
|
const uint vector_index = offset % 4;
|
||||||
unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4;
|
unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4;
|
||||||
|
@ -976,8 +986,8 @@ int FindLayout(uint mode) {
|
||||||
|
|
||||||
|
|
||||||
void FillError(ivec3 coord) {
|
void FillError(ivec3 coord) {
|
||||||
for (uint j = 0; j < block_dims.y; j++) {
|
for (uint j = 0; j < BLOCK_HEIGHT; j++) {
|
||||||
for (uint i = 0; i < block_dims.x; i++) {
|
for (uint i = 0; i < BLOCK_WIDTH; i++) {
|
||||||
imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0));
|
imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -993,8 +1003,8 @@ void FillVoidExtentLDR(ivec3 coord) {
|
||||||
const float r = float(r_u) / 65535.0f;
|
const float r = float(r_u) / 65535.0f;
|
||||||
const float g = float(g_u) / 65535.0f;
|
const float g = float(g_u) / 65535.0f;
|
||||||
const float b = float(b_u) / 65535.0f;
|
const float b = float(b_u) / 65535.0f;
|
||||||
for (uint j = 0; j < block_dims.y; j++) {
|
for (uint j = 0; j < BLOCK_HEIGHT; j++) {
|
||||||
for (uint i = 0; i < block_dims.x; i++) {
|
for (uint i = 0; i < BLOCK_WIDTH; i++) {
|
||||||
imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a));
|
imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1089,7 +1099,7 @@ void DecompressBlock(ivec3 coord) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const uvec2 size_params = DecodeBlockSize(mode);
|
const uvec2 size_params = DecodeBlockSize(mode);
|
||||||
if ((size_params.x > block_dims.x) || (size_params.y > block_dims.y)) {
|
if ((size_params.x > BLOCK_WIDTH) || (size_params.y > BLOCK_HEIGHT)) {
|
||||||
FillError(coord);
|
FillError(coord);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1218,21 +1228,21 @@ void DecompressBlock(ivec3 coord) {
|
||||||
DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane));
|
DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane));
|
||||||
|
|
||||||
UnquantizeTexelWeights(size_params, dual_plane);
|
UnquantizeTexelWeights(size_params, dual_plane);
|
||||||
for (uint j = 0; j < block_dims.y; j++) {
|
for (uint j = 0; j < BLOCK_HEIGHT; j++) {
|
||||||
for (uint i = 0; i < block_dims.x; i++) {
|
for (uint i = 0; i < BLOCK_WIDTH; i++) {
|
||||||
uint local_partition = 0;
|
uint local_partition = 0;
|
||||||
if (num_partitions > 1) {
|
if (num_partitions > 1) {
|
||||||
local_partition = Select2DPartition(partition_index, i, j, num_partitions);
|
local_partition = Select2DPartition(partition_index, i, j, num_partitions);
|
||||||
}
|
}
|
||||||
const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
|
const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
|
||||||
const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
|
const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
|
||||||
const uint weight_offset = (j * block_dims.x + i);
|
const uint weight_offset = (j * BLOCK_WIDTH + i);
|
||||||
const uint array_index = weight_offset / 4;
|
const uint array_index = weight_offset / 4;
|
||||||
const uint vector_index = weight_offset % 4;
|
const uint vector_index = weight_offset % 4;
|
||||||
const uint primary_weight = unquantized_texel_weights[array_index][vector_index];
|
const uint primary_weight = unquantized_texel_weights[array_index][vector_index];
|
||||||
uvec4 weight_vec = uvec4(primary_weight);
|
uvec4 weight_vec = uvec4(primary_weight);
|
||||||
if (dual_plane) {
|
if (dual_plane) {
|
||||||
const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS;
|
const uint secondary_weight_offset = (j * BLOCK_WIDTH + i) + ARRAY_NUM_ELEMENTS;
|
||||||
const uint secondary_array_index = secondary_weight_offset / 4;
|
const uint secondary_array_index = secondary_weight_offset / 4;
|
||||||
const uint secondary_vector_index = secondary_weight_offset % 4;
|
const uint secondary_vector_index = secondary_weight_offset % 4;
|
||||||
const uint secondary_weight =
|
const uint secondary_weight =
|
||||||
|
@ -1270,7 +1280,7 @@ void main() {
|
||||||
offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
|
offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
|
||||||
offset += swizzle;
|
offset += swizzle;
|
||||||
|
|
||||||
const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
|
const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(BLOCK_WIDTH, BLOCK_HEIGHT, 1));
|
||||||
if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
|
if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
20
src/video_core/host_shaders/astc_decoder_spv_includes.h
Normal file
20
src/video_core/host_shaders/astc_decoder_spv_includes.h
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_10x10_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_10x5_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_10x6_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_10x8_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_12x10_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_12x12_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_4x4_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_5x4_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_5x5_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_6x5_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_6x6_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_8x5_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_8x6_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_8x8_spv.h"
|
||||||
|
#include "video_core/host_shaders/astc_decoder_comp_spv.h"
|
|
@ -11,7 +11,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/div_ceil.h"
|
#include "common/div_ceil.h"
|
||||||
#include "video_core/host_shaders/astc_decoder_comp_spv.h"
|
#include "video_core/host_shaders/astc_decoder_spv_includes.h"
|
||||||
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
|
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
|
||||||
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
|
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
|
||||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||||
|
@ -124,13 +124,62 @@ constexpr std::array<VkDescriptorUpdateTemplateEntry, ASTC_NUM_BINDINGS>
|
||||||
}};
|
}};
|
||||||
|
|
||||||
struct AstcPushConstants {
|
struct AstcPushConstants {
|
||||||
std::array<u32, 2> blocks_dims;
|
|
||||||
u32 layer_stride;
|
u32 layer_stride;
|
||||||
u32 block_size;
|
u32 block_size;
|
||||||
u32 x_shift;
|
u32 x_shift;
|
||||||
u32 block_height;
|
u32 block_height;
|
||||||
u32 block_height_mask;
|
u32 block_height_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
size_t AstcFormatIndex(VideoCore::Surface::PixelFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_4X4_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_4X4_UNORM:
|
||||||
|
return 0;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_5X4_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_5X4_UNORM:
|
||||||
|
return 1;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_5X5_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_5X5_UNORM:
|
||||||
|
return 2;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_6X5_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_6X5_UNORM:
|
||||||
|
return 3;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_6X6_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_6X6_UNORM:
|
||||||
|
return 4;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_8X5_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_8X5_UNORM:
|
||||||
|
return 5;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_8X6_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_8X6_UNORM:
|
||||||
|
return 6;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_8X8_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_8X8_UNORM:
|
||||||
|
return 7;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_10X5_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_10X5_UNORM:
|
||||||
|
return 8;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_10X6_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_10X6_UNORM:
|
||||||
|
return 9;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_10X8_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_10X8_UNORM:
|
||||||
|
return 10;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_10X10_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_10X10_UNORM:
|
||||||
|
return 11;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_12X10_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_12X10_UNORM:
|
||||||
|
return 12;
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_12X12_SRGB:
|
||||||
|
case VideoCore::Surface::PixelFormat::ASTC_2D_12X12_UNORM:
|
||||||
|
return 13;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
|
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
|
||||||
|
@ -312,19 +361,53 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
|
||||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
|
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
|
||||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||||
compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{
|
compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{
|
||||||
memory_allocator_} {}
|
memory_allocator_} {
|
||||||
|
// These must match the order found in AstcFormatIndex
|
||||||
|
static constexpr std::array<std::span<const u32>, 14> ASTC_SHADERS{
|
||||||
|
ASTC_DECODER_COMP_4X4_SPV, ASTC_DECODER_COMP_5X4_SPV, ASTC_DECODER_COMP_5X5_SPV,
|
||||||
|
ASTC_DECODER_COMP_6X5_SPV, ASTC_DECODER_COMP_6X6_SPV, ASTC_DECODER_COMP_8X5_SPV,
|
||||||
|
ASTC_DECODER_COMP_8X6_SPV, ASTC_DECODER_COMP_8X8_SPV, ASTC_DECODER_COMP_10X5_SPV,
|
||||||
|
ASTC_DECODER_COMP_10X6_SPV, ASTC_DECODER_COMP_10X8_SPV, ASTC_DECODER_COMP_10X10_SPV,
|
||||||
|
ASTC_DECODER_COMP_12X10_SPV, ASTC_DECODER_COMP_12X12_SPV,
|
||||||
|
};
|
||||||
|
for (size_t index = 0; index < ASTC_SHADERS.size(); ++index) {
|
||||||
|
const auto& code = ASTC_SHADERS[index];
|
||||||
|
const auto module_ = device.GetLogical().CreateShaderModule({
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.codeSize = static_cast<u32>(code.size_bytes()),
|
||||||
|
.pCode = code.data(),
|
||||||
|
});
|
||||||
|
device.SaveShader(code);
|
||||||
|
astc_pipelines[index] = device.GetLogical().CreateComputePipeline({
|
||||||
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
.module = *module_,
|
||||||
|
.pName = "main",
|
||||||
|
.pSpecializationInfo = nullptr,
|
||||||
|
},
|
||||||
|
.layout = *layout,
|
||||||
|
.basePipelineHandle = nullptr,
|
||||||
|
.basePipelineIndex = 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ASTCDecoderPass::~ASTCDecoderPass() = default;
|
ASTCDecoderPass::~ASTCDecoderPass() = default;
|
||||||
|
|
||||||
void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||||
std::span<const VideoCommon::SwizzleParameters> swizzles) {
|
std::span<const VideoCommon::SwizzleParameters> swizzles) {
|
||||||
using namespace VideoCommon::Accelerated;
|
using namespace VideoCommon::Accelerated;
|
||||||
const std::array<u32, 2> block_dims{
|
|
||||||
VideoCore::Surface::DefaultBlockWidth(image.info.format),
|
|
||||||
VideoCore::Surface::DefaultBlockHeight(image.info.format),
|
|
||||||
};
|
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
const VkPipeline vk_pipeline = *pipeline;
|
const VkPipeline vk_pipeline = *astc_pipelines[AstcFormatIndex(image.info.format)];
|
||||||
const VkImageAspectFlags aspect_mask = image.AspectMask();
|
const VkImageAspectFlags aspect_mask = image.AspectMask();
|
||||||
const VkImage vk_image = image.Handle();
|
const VkImage vk_image = image.Handle();
|
||||||
const bool is_initialized = image.ExchangeInitialization();
|
const bool is_initialized = image.ExchangeInitialization();
|
||||||
|
@ -371,10 +454,9 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||||
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
|
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
|
||||||
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
|
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
|
||||||
ASSERT(params.bytes_per_block_log2 == 4);
|
ASSERT(params.bytes_per_block_log2 == 4);
|
||||||
scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
|
scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, params,
|
||||||
params, descriptor_data](vk::CommandBuffer cmdbuf) {
|
descriptor_data](vk::CommandBuffer cmdbuf) {
|
||||||
const AstcPushConstants uniforms{
|
const AstcPushConstants uniforms{
|
||||||
.blocks_dims = block_dims,
|
|
||||||
.layer_stride = params.layer_stride,
|
.layer_stride = params.layer_stride,
|
||||||
.block_size = params.block_size,
|
.block_size = params.block_size,
|
||||||
.x_shift = params.x_shift,
|
.x_shift = params.x_shift,
|
||||||
|
|
|
@ -95,6 +95,8 @@ public:
|
||||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
std::array<vk::Pipeline, 14> astc_pipelines;
|
||||||
|
|
||||||
Scheduler& scheduler;
|
Scheduler& scheduler;
|
||||||
StagingBufferPool& staging_buffer_pool;
|
StagingBufferPool& staging_buffer_pool;
|
||||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue;
|
ComputePassDescriptorQueue& compute_pass_descriptor_queue;
|
||||||
|
|
Loading…
Reference in a new issue