mirror of
https://github.com/Ryujinx/Ryujinx.git
synced 2025-01-23 23:06:18 +01:00
68e15c1a74
* Begin work on A32 SIMD Intrinsics * More instructions, some cleanup. * Intrinsics for Move instructions (zip etc) These pass the existing tests. * Intrinsics for some of Cvt While doing this I noticed that the conversion for int/fp was incorrect in the slow path. I'll fix this in the original repo. * Intrinsics for more Arithmetic instructions. * Intrinsics for Vext * Fix VEXT Intrinsic for double words. * Use InsertPs to move scalar values. * Cleanup, fix VPADD.f32 and VMIN signed integer. * Cleanup, add SSE2 support for scalar insert. Works similarly to the IR scalar insert, but obviously this one works directly on V128. * Minor cleanup. * Enable intrinsic for FP64 to integer conversion. * Address feedback apart from splitting out intrinsic float abs Also: bad VREV encodings as undefined rather than throwing in translation. * Move float abs to helper, fix bug with cvt * Rename opc2 & 3 to match A32 docs, use ArgumentOutOfRangeException appropriately. * Get name of variable at compilation rather than string literal. * Use correct double sign mask.
95 lines
3 KiB
C#
95 lines
3 KiB
C#
using ARMeilleure.Decoders;
|
|
using ARMeilleure.IntermediateRepresentation;
|
|
using ARMeilleure.Translation;
|
|
|
|
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
|
|
|
|
namespace ARMeilleure.Instructions
|
|
{
|
|
static partial class InstEmit32
|
|
{
|
|
public static void Vand_I(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitVectorBinaryOpF32(context, Intrinsic.X86Pand, Intrinsic.X86Pand);
|
|
}
|
|
else
|
|
{
|
|
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
|
|
}
|
|
}
|
|
|
|
public static void Vbif(ArmEmitterContext context)
|
|
{
|
|
EmitBifBit(context, true);
|
|
}
|
|
|
|
public static void Vbit(ArmEmitterContext context)
|
|
{
|
|
EmitBifBit(context, false);
|
|
}
|
|
|
|
public static void Vbsl(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
|
|
{
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
|
|
res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
|
|
return context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
|
|
});
|
|
}
|
|
else
|
|
{
|
|
EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
|
|
{
|
|
return context.BitwiseExclusiveOr(
|
|
context.BitwiseAnd(op1,
|
|
context.BitwiseExclusiveOr(op2, op3)), op3);
|
|
});
|
|
}
|
|
}
|
|
|
|
public static void Vorr_I(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitVectorBinaryOpF32(context, Intrinsic.X86Por, Intrinsic.X86Por);
|
|
}
|
|
else
|
|
{
|
|
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
|
|
}
|
|
}
|
|
|
|
private static void EmitBifBit(ArmEmitterContext context, bool notRm)
|
|
{
|
|
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
|
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
|
|
{
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
|
|
res = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res);
|
|
return context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
|
|
});
|
|
}
|
|
else
|
|
{
|
|
EmitVectorTernaryOpZx32(context, (d, n, m) =>
|
|
{
|
|
if (notRm)
|
|
{
|
|
m = context.BitwiseNot(m);
|
|
}
|
|
return context.BitwiseExclusiveOr(
|
|
context.BitwiseAnd(m,
|
|
context.BitwiseExclusiveOr(d, n)), d);
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|