Merge pull request #2671 from wwylele/dot3-rgba

rasterizer: implement combiner operation 7 (Dot3_RGBA)
This commit is contained in:
bunnei 2017-04-21 17:03:22 -04:00 committed by GitHub
commit ea53d6085a
4 changed files with 38 additions and 21 deletions

View file

@ -199,7 +199,7 @@ struct TexturingRegs {
Lerp = 4,
Subtract = 5,
Dot3_RGB = 6,
Dot3_RGBA = 7,
MultiplyThenAdd = 8,
AddThenMultiply = 9,
};

View file

@ -306,8 +306,6 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)";
break;
case Operation::Lerp:
// TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use
// builtin lerp
out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name +
"[1] * (vec3(1.0) - " + variable_name + "[2])";
break;
@ -322,6 +320,7 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
variable_name + "[2]";
break;
case Operation::Dot3_RGB:
case Operation::Dot3_RGBA:
out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name +
"[1] - vec3(0.5)) * 4.0)";
break;
@ -421,17 +420,25 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
AppendColorCombiner(out, stage.color_op, "color_results_" + index_name);
out += ";\n";
if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
// result of Dot3_RGBA operation is also placed to the alpha component
out += "float alpha_output_" + index_name + " = color_output_" + index_name + "[0];\n";
} else {
out += "float alpha_results_" + index_name + "[3] = float[3](";
AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name);
AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1,
index_name);
out += ", ";
AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name);
AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2,
index_name);
out += ", ";
AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name);
AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3,
index_name);
out += ");\n";
out += "float alpha_output_" + index_name + " = ";
AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
out += ";\n";
}
out += "last_tex_env_out = vec4("
"clamp(color_output_" +

View file

@ -403,13 +403,22 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
};
auto color_output = ColorCombine(tev_stage.color_op, color_result);
u8 alpha_output;
if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
// result of Dot3_RGBA operation is also placed to the alpha component
alpha_output = color_output.x;
} else {
// alpha combiner
std::array<u8, 3> alpha_result = {{
GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)),
GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)),
GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)),
GetAlphaModifier(tev_stage.alpha_modifier1,
GetSource(tev_stage.alpha_source1)),
GetAlphaModifier(tev_stage.alpha_modifier2,
GetSource(tev_stage.alpha_source2)),
GetAlphaModifier(tev_stage.alpha_modifier3,
GetSource(tev_stage.alpha_source3)),
}};
auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
}
combiner_output[0] =
std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());

View file

@ -169,7 +169,8 @@ Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> i
result = (result * input[2].Cast<int>()) / 255;
return result.Cast<u8>();
}
case Operation::Dot3_RGB: {
case Operation::Dot3_RGB:
case Operation::Dot3_RGBA: {
// Not fully accurate. Worst case scenario seems to yield a +/-3 error. Some HW results
// indicate that the per-component computation can't have a higher precision than 1/256,
// while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give