Skip to content

Commit

Permalink
reduce gpu pressure via atest/srgb RCE PSO variation
Browse files Browse the repository at this point in the history
basically old atest profiles, but in realtime
  • Loading branch information
megai2 committed Jan 31, 2021
1 parent b88a265 commit ab1ee2a
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 2 deletions.
14 changes: 13 additions & 1 deletion d912pxy/d912pxy_dx9_pipeline_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ void d912pxy_dx9_pipeline_state::Init()
psoDesc.val.rt[0].format = DXGI_FORMAT_B8G8R8A8_UNORM;
psoDesc.val.ds.format = DXGI_FORMAT_D24_UNORM_S8_UINT;
psoDesc.val.compareSamplerStage = d912pxy_trimmed_pso_desc::NO_COMPARE_SAMPLERS;
psoDesc.val.dx9emulFlags = 0;
compareSamplerStage = d912pxy_trimmed_pso_desc::NO_COMPARE_SAMPLERS;

for (int i = 1; i < PXY_INNER_MAX_RENDER_TARGETS; ++i)
Expand Down Expand Up @@ -596,6 +597,10 @@ void d912pxy_dx9_pipeline_state::ProcessDX9RSChange(D3DRENDERSTATETYPE State, DW

case D3DRS_SRGBWRITEENABLE:
DX9RSvalues[State] = Value;
if (Value)
psoDesc.val.dx9emulFlags |= d912pxy_trimmed_pso_desc::DX9_EMUL_SRGB;
else
psoDesc.val.dx9emulFlags &= ~d912pxy_trimmed_pso_desc::DX9_EMUL_SRGB;
d912pxy_s.render.state.tex.ModStageBit(31, 13, Value);
//d912pxy_s.render.iframe.TST()->SetTexStage(29, Value);
break;
Expand Down Expand Up @@ -654,7 +659,14 @@ void d912pxy_dx9_pipeline_state::ProcessDX9RSChange(D3DRENDERSTATETYPE State, DW
case D3DRS_ALPHAREF:
case D3DRS_ALPHAFUNC:
DX9RSvalues[State] = Value;
//TODO: remove hardcoded 31, everywhere, !
if (State == D3DRS_ALPHATESTENABLE)
{
if (Value)
psoDesc.val.dx9emulFlags |= d912pxy_trimmed_pso_desc::DX9_EMUL_ATEST;
else
psoDesc.val.dx9emulFlags &= ~d912pxy_trimmed_pso_desc::DX9_EMUL_ATEST;
}
//TODO: remove hardcoded 31, everywhere, !
d912pxy_s.render.state.tex.ModStageByMask(31, (DX9RSvalues[D3DRS_ALPHATESTENABLE] & 1) | (DX9RSvalues[D3DRS_ALPHAFUNC] << 1) | (DX9RSvalues[D3DRS_ALPHAREF] << 5), 0xFFFFE000);
break;
case D3DRS_CLIPPLANEENABLE:
Expand Down
2 changes: 1 addition & 1 deletion d912pxy/d912pxy_hlsl_gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,7 @@ void d912pxy_hlsl_generator::WriteShaderTailData()
HLSL_GEN_WRITE_PROC("dx9_ps_nan_cull_emulation(dx9_ret_color_reg_ac);");
}

HLSL_GEN_WRITE_PROC("dx9_ps_write_emulation_at_srgb(dx9_ret_color_reg_ac);");
HLSL_GEN_WRITE_PROC("dx9_ps_write_emulation(dx9_ret_color_reg_ac);//RCE MARK");
}
else {
HLSL_GEN_WRITE_PROC("dx9_halfpixel_pos_reg_ac = dx9_fix_halfpixel_offset(dx9_halfpixel_pos_reg_ac);");
Expand Down
29 changes: 29 additions & 0 deletions d912pxy/d912pxy_pso_item.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,9 @@ bool d912pxy_pso_item::PerformRCE(char* alias, D3D12_GRAPHICS_PIPELINE_STATE_DES
if (desc->val.compareSamplerStage != d912pxy_trimmed_pso_desc::NO_COMPARE_SAMPLERS)
RCEApplyPCFSampler(HLSLsource[1].c_arr<char>(), desc->val.compareSamplerStage);

if (desc->val.dx9emulFlags)
RCEApplyDX9EmulFlags(HLSLsource[1].c_arr<char>(), desc->val.dx9emulFlags);

return true;
}

Expand Down Expand Up @@ -484,6 +487,32 @@ void d912pxy_pso_item::RCEApplyPCFSampler(char* source, UINT stage)
}
}

const char* psWriteEmulReplacements[] =
{
//"dx9_ps_write_emulation(dx9_ret_color_reg_ac);//RCE MARK",
"dx9_ps_write_emulation(dx9_ret_color_reg_ac);//default ",//no extra emulation 0 flags
"dx9_ps_write_emulation_at(dx9_ret_color_reg_ac);//flg01",//no extra emulation 1 flags
"dx9_ps_write_emulation_srgb(dx9_ret_color_reg_ac);//f02",//no extra emulation 2 flags
"dx9_ps_write_emulation_at_srgb(dx9_ret_color_reg_ac);//",//no extra emulation 3 flags
"out of range bullshit marker"
};

void d912pxy_pso_item::RCEApplyDX9EmulFlags(char* source, UINT8 flags)
{
const char* marker = "dx9_ps_write_emulation(dx9_ret_color_reg_ac);//RCE MARK";
const uint32_t markerSize = 55;

char* writeTarget = strstr(source, marker);

if (!writeTarget)
{
LOG_ERR_DTDM("No write marker found for PS dx9 emul RCE, this is totally wrong!");
return;
}

memcpy(writeTarget, psWriteEmulReplacements[flags], 55);
}

void d912pxy_pso_item::AfterCompileRelease()
{
delete dx12Desc;
Expand Down
1 change: 1 addition & 0 deletions d912pxy/d912pxy_pso_item.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class d912pxy_pso_item : public d912pxy_vtable, public d912pxy_comhandler
void RCEFilterUnusedRegs(char** ioBlock, UINT elements);
void RCEFixIOBlocksOrdering(char** vsOut, char** psIn, UINT vsOutCnt, UINT psInCnt);
void RCEApplyPCFSampler(char* source, UINT stage);
void RCEApplyDX9EmulFlags(char* source, UINT8 flags);

void AfterCompileRelease();

Expand Down
3 changes: 3 additions & 0 deletions d912pxy/d912pxy_trimmed_pso.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class d912pxy_trimmed_pso_desc
};

enum { NO_COMPARE_SAMPLERS = 0xFF };

enum { DX9_EMUL_ATEST = 0x1, DX9_EMUL_SRGB = 0x2 };

struct ValuePart {
UINT vdeclHash;
Expand All @@ -92,6 +94,7 @@ class d912pxy_trimmed_pso_desc
rt_desc rt[PXY_INNER_MAX_RENDER_TARGETS];
UINT8 NumRenderTargets;
UINT8 compareSamplerStage;
UINT8 dx9emulFlags;
};

struct ref_part {
Expand Down

0 comments on commit ab1ee2a

Please sign in to comment.