diff options
author | Ash Wolf <ninji@wuffs.org> | 2023-01-26 11:30:47 +0000 |
---|---|---|
committer | Ash Wolf <ninji@wuffs.org> | 2023-01-26 11:30:47 +0000 |
commit | 094b96ca1df4a035b5f93c351f773306c0241f3f (patch) | |
tree | 95ce05e3ebe816c7ee7996206bb37ea17d8ca33c /compiler_and_linker/BackEnd/PowerPC | |
parent | fc0c4c0df7b583b55a08317cf1ef6a71d27c0440 (diff) | |
download | MWCC-main.tar.gz MWCC-main.zip |
move lots of source files around to match their actual placement in the original treemain
Diffstat (limited to 'compiler_and_linker/BackEnd/PowerPC')
42 files changed, 46093 insertions, 0 deletions
diff --git a/compiler_and_linker/BackEnd/PowerPC/CMachine.c b/compiler_and_linker/BackEnd/PowerPC/CMachine.c new file mode 100644 index 0000000..bfa7023 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CMachine.c @@ -0,0 +1,1488 @@ +#include "compiler/CMachine.h" +#include "compiler/CClass.h" +#include "compiler/CError.h" +#include "compiler/CInt64.h" +#include "compiler/CParser.h" +#include "compiler/CPrep.h" +#include "compiler/CPrepTokenizer.h" +#include "compiler/CompilerTools.h" +#include "compiler/ScanFloat.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +TypeIntegral stbool = {TYPEINT, 1, IT_BOOL}; +TypeIntegral stchar = {TYPEINT, 1, IT_CHAR}; +TypeIntegral stsignedchar = {TYPEINT, 1, IT_SCHAR}; +TypeIntegral stunsignedchar = {TYPEINT, 1, IT_UCHAR}; +TypeIntegral stwchar = {TYPEINT, 4, IT_WCHAR_T}; +TypeIntegral stsignedshort = {TYPEINT, 2, IT_SHORT}; +TypeIntegral stunsignedshort = {TYPEINT, 2, IT_USHORT}; +TypeIntegral stsignedint = {TYPEINT, 4, IT_INT}; +TypeIntegral stunsignedint = {TYPEINT, 4, IT_UINT}; +TypeIntegral stsignedlong = {TYPEINT, 4, IT_LONG}; +TypeIntegral stunsignedlong = {TYPEINT, 4, IT_ULONG}; +TypeIntegral stsignedlonglong = {TYPEINT, 8, IT_LONGLONG}; +TypeIntegral stunsignedlonglong = {TYPEINT, 8, IT_ULONGLONG}; +TypeIntegral stfloat = {TYPEFLOAT, 4, IT_FLOAT}; +TypeIntegral stshortdouble = {TYPEFLOAT, 8, IT_SHORTDOUBLE}; +TypeIntegral stdouble = {TYPEFLOAT, 8, IT_DOUBLE}; +TypeIntegral stlongdouble = {TYPEFLOAT, 8, IT_LONGDOUBLE}; + +static StructMember stVUC_unsignedchar15 = {NULL, (Type *) &stunsignedchar, NULL, 15, 0}; +static StructMember stVUC_unsignedchar14 = {&stVUC_unsignedchar15, (Type *) &stunsignedchar, NULL, 14, 0}; +static StructMember stVUC_unsignedchar13 = {&stVUC_unsignedchar14, (Type *) &stunsignedchar, NULL, 13, 0}; +static StructMember stVUC_unsignedchar12 = {&stVUC_unsignedchar13, (Type *) &stunsignedchar, NULL, 12, 0}; +static StructMember stVUC_unsignedchar11 = {&stVUC_unsignedchar12, (Type *) &stunsignedchar, NULL, 11, 0}; +static StructMember stVUC_unsignedchar10 = {&stVUC_unsignedchar11, (Type *) &stunsignedchar, NULL, 10, 0}; +static StructMember stVUC_unsignedchar9 = {&stVUC_unsignedchar10, (Type *) &stunsignedchar, NULL, 9, 0}; +static StructMember stVUC_unsignedchar8 = {&stVUC_unsignedchar9, (Type *) &stunsignedchar, NULL, 8, 0}; +static StructMember stVUC_unsignedchar7 = {&stVUC_unsignedchar7, (Type *) &stunsignedchar, NULL, 7, 0}; +static StructMember stVUC_unsignedchar6 = {&stVUC_unsignedchar7, (Type *) &stunsignedchar, NULL, 6, 0}; +static StructMember stVUC_unsignedchar5 = {&stVUC_unsignedchar6, (Type *) &stunsignedchar, NULL, 5, 0}; +static StructMember stVUC_unsignedchar4 = {&stVUC_unsignedchar5, (Type *) &stunsignedchar, NULL, 4, 0}; +static StructMember stVUC_unsignedchar3 = {&stVUC_unsignedchar4, (Type *) &stunsignedchar, NULL, 3, 0}; +static StructMember stVUC_unsignedchar2 = {&stVUC_unsignedchar3, (Type *) &stunsignedchar, NULL, 2, 0}; +static StructMember stVUC_unsignedchar1 = {&stVUC_unsignedchar2, (Type *) &stunsignedchar, NULL, 1, 0}; +static StructMember stVUC_unsignedchar0 = {&stVUC_unsignedchar1, (Type *) &stunsignedchar, NULL, 0, 0}; + +static StructMember stVSC_signedchar15 = {NULL, (Type *) &stsignedchar, NULL, 15, 0}; +static StructMember stVSC_signedchar14 = {&stVSC_signedchar15, (Type *) &stsignedchar, NULL, 14, 0}; +static StructMember stVSC_signedchar13 = {&stVSC_signedchar14, (Type *) &stsignedchar, NULL, 13, 0}; +static StructMember stVSC_signedchar12 = {&stVSC_signedchar13, (Type *) &stsignedchar, NULL, 12, 0}; +static StructMember stVSC_signedchar11 = {&stVSC_signedchar12, (Type *) &stsignedchar, NULL, 11, 0}; +static StructMember stVSC_signedchar10 = {&stVSC_signedchar11, (Type *) &stsignedchar, NULL, 10, 0}; +static StructMember stVSC_signedchar9 = {&stVSC_signedchar10, (Type *) &stsignedchar, NULL, 9, 0}; +static StructMember stVSC_signedchar8 = {&stVSC_signedchar9, (Type *) &stsignedchar, NULL, 8, 0}; +static StructMember stVSC_signedchar7 = {&stVSC_signedchar7, (Type *) &stsignedchar, NULL, 7, 0}; +static StructMember stVSC_signedchar6 = {&stVSC_signedchar7, (Type *) &stsignedchar, NULL, 6, 0}; +static StructMember stVSC_signedchar5 = {&stVSC_signedchar6, (Type *) &stsignedchar, NULL, 5, 0}; +static StructMember stVSC_signedchar4 = {&stVSC_signedchar5, (Type *) &stsignedchar, NULL, 4, 0}; +static StructMember stVSC_signedchar3 = {&stVSC_signedchar4, (Type *) &stsignedchar, NULL, 3, 0}; +static StructMember stVSC_signedchar2 = {&stVSC_signedchar3, (Type *) &stsignedchar, NULL, 2, 0}; +static StructMember stVSC_signedchar1 = {&stVSC_signedchar2, (Type *) &stsignedchar, NULL, 1, 0}; +static StructMember stVSC_signedchar0 = {&stVSC_signedchar1, (Type *) &stsignedchar, NULL, 0, 0}; + +static StructMember stVUS_unsignedshort7 = {NULL, (Type *) &stunsignedshort, NULL, 14, 0}; +static StructMember stVUS_unsignedshort6 = {&stVUS_unsignedshort7, (Type *) &stunsignedshort, NULL, 12, 0}; +static StructMember stVUS_unsignedshort5 = {&stVUS_unsignedshort7, (Type *) &stunsignedshort, NULL, 10, 0}; +static StructMember stVUS_unsignedshort4 = {&stVUS_unsignedshort5, (Type *) &stunsignedshort, NULL, 8, 0}; +static StructMember stVUS_unsignedshort3 = {&stVUS_unsignedshort4, (Type *) &stunsignedshort, NULL, 6, 0}; +static StructMember stVUS_unsignedshort2 = {&stVUS_unsignedshort3, (Type *) &stunsignedshort, NULL, 4, 0}; +static StructMember stVUS_unsignedshort1 = {&stVUS_unsignedshort2, (Type *) &stunsignedshort, NULL, 2, 0}; +static StructMember stVUS_unsignedshort0 = {&stVUS_unsignedshort1, (Type *) &stunsignedshort, NULL, 0, 0}; + +static StructMember stVSS_signedshort7 = {NULL, (Type *) &stsignedshort, NULL, 14, 0}; +static StructMember stVSS_signedshort6 = {&stVSS_signedshort7, (Type *) &stsignedshort, NULL, 12, 0}; +static StructMember stVSS_signedshort5 = {&stVSS_signedshort7, (Type *) &stsignedshort, NULL, 10, 0}; +static StructMember stVSS_signedshort4 = {&stVSS_signedshort5, (Type *) &stsignedshort, NULL, 8, 0}; +static StructMember stVSS_signedshort3 = {&stVSS_signedshort4, (Type *) &stsignedshort, NULL, 6, 0}; +static StructMember stVSS_signedshort2 = {&stVSS_signedshort3, (Type *) &stsignedshort, NULL, 4, 0}; +static StructMember stVSS_signedshort1 = {&stVSS_signedshort2, (Type *) &stsignedshort, NULL, 2, 0}; +static StructMember stVSS_signedshort0 = {&stVSS_signedshort1, (Type *) &stsignedshort, NULL, 0, 0}; + +static StructMember stVUL_unsignedlong3 = {NULL, (Type *) &stunsignedlong, NULL, 12, 0}; +static StructMember stVUL_unsignedlong2 = {&stVUL_unsignedlong3, (Type *) &stunsignedlong, NULL, 8, 0}; +static StructMember stVUL_unsignedlong1 = {&stVUL_unsignedlong2, (Type *) &stunsignedlong, NULL, 4, 0}; +static StructMember stVUL_unsignedlong0 = {&stVUL_unsignedlong1, (Type *) &stunsignedlong, NULL, 0, 0}; + +static StructMember stVSL_signedlong3 = {NULL, (Type *) &stsignedlong, NULL, 12, 0}; +static StructMember stVSL_signedlong2 = {&stVSL_signedlong3, (Type *) &stsignedlong, NULL, 8, 0}; +static StructMember stVSL_signedlong1 = {&stVSL_signedlong2, (Type *) &stsignedlong, NULL, 4, 0}; +static StructMember stVSL_signedlong0 = {&stVSL_signedlong1, (Type *) &stsignedlong, NULL, 0, 0}; + +static StructMember stVF_float3 = {NULL, (Type *) &stfloat, NULL, 12, 0}; +static StructMember stVF_float2 = {&stVF_float3, (Type *) &stfloat, NULL, 8, 0}; +static StructMember stVF_float1 = {&stVF_float2, (Type *) &stfloat, NULL, 4, 0}; +static StructMember stVF_float0 = {&stVF_float1, (Type *) &stfloat, NULL, 0, 0}; + +TypeStruct stvectorunsignedchar = {TYPESTRUCT, 16, NULL, &stVUC_unsignedchar0, STRUCT_VECTOR_UCHAR, 16}; +TypeStruct stvectorsignedchar = {TYPESTRUCT, 16, NULL, &stVSC_signedchar0, STRUCT_VECTOR_SCHAR, 16}; +TypeStruct stvectorboolchar = {TYPESTRUCT, 16, NULL, &stVSC_signedchar0, STRUCT_VECTOR_BCHAR, 16}; + +TypeStruct stvectorunsignedshort = {TYPESTRUCT, 16, NULL, &stVUS_unsignedshort0, STRUCT_VECTOR_USHORT, 16}; +TypeStruct stvectorsignedshort = {TYPESTRUCT, 16, NULL, &stVSS_signedshort0, STRUCT_VECTOR_SSHORT, 16}; +TypeStruct stvectorboolshort = {TYPESTRUCT, 16, NULL, &stVSS_signedshort0, STRUCT_VECTOR_BSHORT, 16}; + +TypeStruct stvectorunsignedlong = {TYPESTRUCT, 16, NULL, &stVUL_unsignedlong0, STRUCT_VECTOR_UINT, 16}; +TypeStruct stvectorsignedlong = {TYPESTRUCT, 16, NULL, &stVSL_signedlong0, STRUCT_VECTOR_SINT, 16}; +TypeStruct stvectorboollong = {TYPESTRUCT, 16, NULL, &stVSL_signedlong0, STRUCT_VECTOR_BINT, 16}; + +TypeStruct stvectorfloat = {TYPESTRUCT, 16, NULL, &stVF_float0, STRUCT_VECTOR_FLOAT, 16}; + +TypeStruct stvectorpixel = {TYPESTRUCT, 16, NULL, &stVUS_unsignedshort0, STRUCT_VECTOR_PIXEL, 16}; + +TypeStruct stvector = {TYPESTRUCT, 16, NULL, NULL, STRUCT_VECTOR_UINT, 16}; + +static SInt32 cmach_structoffset; +static UInt8 cmach_structalign; +static short cmach_curbfsize; +static short cmach_curbfbasesize; +static int cmach_curbfoffset; + +static short cmach_packsize[] = { + 1, 2, 4, 8, 16 +}; + +// forward declarations +static SInt16 CMach_GetQualifiedTypeAlign(Type *type, Boolean flag); +static SInt16 CMach_GetMemberAlignment(Type *type, SInt32 align, Boolean flag); + +void CMach_Configure(void) { +} + +SInt32 CMach_GetQUALalign(UInt32 qual) { + SInt32 result = 0; + UInt32 chk; + + if ((chk = (qual & Q_ALIGNED_MASK))) { + if (chk == Q_ALIGNED_1) + result = 1; + else if (chk == Q_ALIGNED_2) + result = 2; + else if (chk == Q_ALIGNED_4) + result = 4; + else if (chk == Q_ALIGNED_8) + result = 8; + else if (chk == Q_ALIGNED_16) + result = 16; + else if (chk == Q_ALIGNED_32) + result = 32; + else if (chk == Q_ALIGNED_64) + result = 64; + else if (chk == Q_ALIGNED_128) + result = 128; + else if (chk == Q_ALIGNED_256) + result = 256; + else if (chk == Q_ALIGNED_512) + result = 512; + else if (chk == Q_ALIGNED_1024) + result = 1024; + else if (chk == Q_ALIGNED_2048) + result = 2048; + else if (chk == Q_ALIGNED_4096) + result = 4096; + else if (chk == Q_ALIGNED_8192) + result = 8192; + else + CError_FATAL(226); + } + + return result; +} + +SInt32 CMach_ArgumentAlignment(Type *type) { + char save_align_mode; + UInt8 save_oldalignment; + SInt32 align; + + save_align_mode = copts.structalignment; + save_oldalignment = copts.oldalignment; + copts.structalignment = AlignMode2_PPC; + copts.oldalignment = 0; + align = CMach_GetQualifiedTypeAlign(type, 0); + copts.structalignment = save_align_mode; + copts.oldalignment = save_oldalignment; + + if (type->type == TYPESTRUCT && !TYPE_STRUCT(type)->members) { + if (TYPE_STRUCT(type)->align > align) + return TYPE_STRUCT(type)->align; + else + return align; + } else { + return align; + } +} + +// TODO: investigate if this returns SInt16 actually +SInt32 CMach_AllocationAlignment(Type *type, UInt32 qual) { + SInt32 align; + SInt32 qualalign; + SInt32 argalign; + SInt32 anotheralign; + + qualalign = CMach_GetQUALalign(qual); + align = CMach_GetQualifiedTypeAlign(type, 1); + if (qualalign > align) + align = qualalign; + argalign = CMach_ArgumentAlignment(type); + if (argalign > align) + align = argalign; + + switch (type->type) { + case TYPEVOID: + case TYPEMEMBERPOINTER: + case TYPEPOINTER: + anotheralign = 4; + break; + default: + anotheralign = 1; + } + if (anotheralign > align) + align = anotheralign; + + if (copts.optimizationlevel > 0) { + if (type->type == TYPEARRAY || (type->type == TYPESTRUCT && (TYPE_STRUCT(type)->stype < STRUCT_VECTOR_UCHAR || TYPE_STRUCT(type)->stype > STRUCT_VECTOR_PIXEL)) || type->type == TYPECLASS || (type->type == TYPEMEMBERPOINTER && (UInt32) type->size == 12)) { + return (copts.min_struct_alignment > align) ? copts.min_struct_alignment : align; + } + } + + return align; +} + +CInt64 CMach_CalcIntDiadic(Type *type, CInt64 left, short op, CInt64 right) { + if (is_unsigned(type)) { + switch (type->size) { + case 1: + CInt64_ConvertUInt8(&left); + CInt64_ConvertUInt8(&right); + break; + case 2: + CInt64_ConvertUInt16(&left); + CInt64_ConvertUInt16(&right); + break; + case 4: + CInt64_ConvertUInt32(&left); + CInt64_ConvertUInt32(&right); + break; + case 8: + break; + default: + CError_FATAL(327); + } + + switch (op) { + case '*': + left = CInt64_MulU(left, right); + break; + case '/': + if (CInt64_IsZero(&right)) + CError_Warning(CErrorStr139); + else + left = CInt64_DivU(left, right); + break; + case '%': + if (CInt64_IsZero(&right)) + CError_Warning(CErrorStr139); + else + left = CInt64_ModU(left, right); + break; + case '+': + left = CInt64_Add(left, right); + break; + case '-': + left = CInt64_Sub(left, right); + break; + case TK_SHL: + left = CInt64_Shl(left, right); + break; + case TK_SHR: + left = CInt64_ShrU(left, right); + break; + case '<': + CInt64_SetLong(&left, CInt64_LessU(left, right)); + break; + case '>': + CInt64_SetLong(&left, CInt64_GreaterU(left, right)); + break; + case TK_LESS_EQUAL: + CInt64_SetLong(&left, CInt64_LessEqualU(left, right)); + break; + case TK_GREATER_EQUAL: + CInt64_SetLong(&left, CInt64_GreaterEqualU(left, right)); + break; + case TK_LOGICAL_EQ: + CInt64_SetLong(&left, CInt64_Equal(left, right)); + break; + case TK_LOGICAL_NE: + CInt64_SetLong(&left, CInt64_NotEqual(left, right)); + break; + case '&': + left = CInt64_And(left, right); + break; + case '^': + left = CInt64_Xor(left, right); + break; + case '|': + left = CInt64_Or(left, right); + break; + case TK_LOGICAL_AND: + CInt64_SetLong(&left, (!CInt64_IsZero(&left) && !CInt64_IsZero(&right))); + break; + case TK_LOGICAL_OR: + CInt64_SetLong(&left, (!CInt64_IsZero(&left) || !CInt64_IsZero(&right))); + break; + default: + CError_Error(CErrorStr120); + } + + switch (type->size) { + case 1: + CInt64_ConvertUInt8(&left); + break; + case 2: + CInt64_ConvertUInt16(&left); + break; + case 4: + CInt64_ConvertUInt32(&left); + break; + case 8: + break; + } + } else { + switch (type->size) { + case 1: + CInt64_ConvertInt8(&left); + CInt64_ConvertInt8(&right); + break; + case 2: + CInt64_ConvertInt16(&left); + CInt64_ConvertInt16(&right); + break; + case 4: + CInt64_ConvertInt32(&left); + CInt64_ConvertInt32(&right); + break; + case 8: + break; + default: + CError_FATAL(389); + } + + switch (op) { + case '*': + left = CInt64_Mul(left, right); + break; + case '/': + if (CInt64_IsZero(&right)) + CError_Warning(CErrorStr139); + else + left = CInt64_Div(left, right); + break; + case '%': + if (CInt64_IsZero(&right)) + CError_Warning(CErrorStr139); + else + left = CInt64_Mod(left, right); + break; + case '+': + left = CInt64_Add(left, right); + break; + case '-': + left = CInt64_Sub(left, right); + break; + case TK_SHL: + left = CInt64_Shl(left, right); + break; + case TK_SHR: + left = CInt64_Shr(left, right); + break; + case '<': + CInt64_SetLong(&left, CInt64_Less(left, right)); + break; + case '>': + CInt64_SetLong(&left, CInt64_Greater(left, right)); + break; + case TK_LESS_EQUAL: + CInt64_SetLong(&left, CInt64_LessEqual(left, right)); + break; + case TK_GREATER_EQUAL: + CInt64_SetLong(&left, CInt64_GreaterEqual(left, right)); + break; + case TK_LOGICAL_EQ: + CInt64_SetLong(&left, CInt64_Equal(left, right)); + break; + case TK_LOGICAL_NE: + CInt64_SetLong(&left, CInt64_NotEqual(left, right)); + break; + case '&': + left = CInt64_And(left, right); + break; + case '^': + left = CInt64_Xor(left, right); + break; + case '|': + left = CInt64_Or(left, right); + break; + case TK_LOGICAL_AND: + CInt64_SetLong(&left, (!CInt64_IsZero(&left) && !CInt64_IsZero(&right))); + break; + case TK_LOGICAL_OR: + CInt64_SetLong(&left, (!CInt64_IsZero(&left) || !CInt64_IsZero(&right))); + break; + default: + CError_Error(CErrorStr120); + } + + switch (type->size) { + case 1: + CInt64_ConvertInt8(&left); + break; + case 2: + CInt64_ConvertInt16(&left); + break; + case 4: + CInt64_ConvertInt32(&left); + break; + case 8: + break; + } + } + + return left; +} + +CInt64 CMach_CalcIntMonadic(Type *type, short op, CInt64 val) { + if (is_unsigned(type)) { + switch (type->size) { + case 1: + CInt64_ConvertUInt8(&val); + break; + case 2: + CInt64_ConvertUInt16(&val); + break; + case 4: + CInt64_ConvertUInt32(&val); + break; + case 8: + break; + default: + CError_FATAL(448); + } + + switch (op) { + case '-': + val = CInt64_Neg(val); + break; + case '~': + val = CInt64_Inv(val); + break; + case '!': + val = CInt64_Not(val); + break; + default: + CError_Error(CErrorStr120); + } + + switch (type->size) { + case 1: + CInt64_ConvertUInt8(&val); + break; + case 2: + CInt64_ConvertUInt16(&val); + break; + case 4: + CInt64_ConvertUInt32(&val); + break; + case 8: + break; + } + } else { + switch (type->size) { + case 1: + CInt64_ConvertInt8(&val); + break; + case 2: + CInt64_ConvertInt16(&val); + break; + case 4: + CInt64_ConvertInt32(&val); + break; + case 8: + break; + default: + CError_FATAL(478); + } + + switch (op) { + case '-': + val = CInt64_Neg(val); + break; + case '~': + val = CInt64_Inv(val); + break; + case '!': + val = CInt64_Not(val); + break; + default: + CError_Error(CErrorStr120); + } + + switch (type->size) { + case 1: + CInt64_ConvertInt8(&val); + break; + case 2: + CInt64_ConvertInt16(&val); + break; + case 4: + CInt64_ConvertInt32(&val); + break; + case 8: + break; + } + } + + return val; +} + +CInt64 CMach_CalcIntConvertFromFloat(Type *type, Float fval) { + CInt64 result; + + if ((type->type == TYPEINT || type->type == TYPEENUM) && (type->size == 8)) { + if (is_unsigned(type)) + CInt64_ConvertUFromLongDouble(&result, fval.value); + else + CInt64_ConvertFromLongDouble(&result, fval.value); + } else { + if (is_unsigned(type)) + CInt64_SetULong(&result, fval.value); + else + CInt64_SetLong(&result, fval.value); + } + + return result; +} + +void CMach_InitIntMem(Type *type, CInt64 val, void *mem) { + SInt32 lg; + SInt16 sh; + SInt8 ch; + + switch (type->type) { + case TYPEINT: + switch (type->size) { + case 1: + ch = CInt64_GetULong(&val); + memcpy(mem, &ch, 1); + break; + case 2: + sh = CTool_EndianConvertWord16(CInt64_GetULong(&val)); + memcpy(mem, &sh, 2); + break; + case 4: + lg = CTool_EndianConvertWord32(CInt64_GetULong(&val)); + memcpy(mem, &lg, 4); + break; + case 8: + CTool_EndianConvertWord64(val, mem); + break; + default: + CError_FATAL(566); + } + break; + default: + CError_FATAL(570); + } +} + +void CMach_InitVectorMem(Type *type, MWVector128 val, void *mem, Boolean flag) { + unsigned char uc[16]; + unsigned short us[8]; + unsigned int ul[4]; + float f[4]; + int i; + + switch (type->type) { + case TYPESTRUCT: + switch (TYPE_STRUCT(type)->stype) { + case STRUCT_VECTOR_UCHAR: + case STRUCT_VECTOR_SCHAR: + case STRUCT_VECTOR_BCHAR: + for (i = 0; i < 16; i++) + uc[i] = val.uc[i]; + memcpy(mem, uc, 16); + break; + case STRUCT_VECTOR_USHORT: + case STRUCT_VECTOR_SSHORT: + case STRUCT_VECTOR_BSHORT: + case STRUCT_VECTOR_PIXEL: + for (i = 0; i < 8; i++) + us[i] = val.us[i]; + memcpy(mem, us, 16); + break; + case STRUCT_VECTOR_UINT: + case STRUCT_VECTOR_SINT: + case STRUCT_VECTOR_BINT: + for (i = 0; i < 4; i++) + ul[i] = val.ul[i]; + memcpy(mem, ul, 16); + break; + case STRUCT_VECTOR_FLOAT: + for (i = 0; i < 4; i++) + f[i] = val.f[i]; + memcpy(mem, f, 16); + break; + default: + CError_FATAL(655); + } + break; + default: + CError_FATAL(659); + } +} + +Float CMach_CalcFloatDiadic(Type *type, Float left, short op, Float right) { + switch (op) { + case '+': + left.value += right.value; + break; + case '-': + left.value -= right.value; + break; + case '*': + left.value *= right.value; + break; + case '/': + left.value /= right.value; + break; + default: + CError_FATAL(679); + } + + return CMach_CalcFloatConvert(type, left); +} + +Float CMach_CalcFloatMonadic(Type *type, short op, Float fval) { + if (op != '-') + CError_FATAL(692); + + fval.value = -fval.value; + return CMach_CalcFloatConvert(type, fval); +} + +Boolean CMach_CalcFloatDiadicBool(Type *type, Float left, short op, Float right) { + switch (op) { + case TK_LOGICAL_EQ: + return left.value == right.value; + case TK_LOGICAL_NE: + return left.value != right.value; + case TK_LESS_EQUAL: + return left.value <= right.value; + case TK_GREATER_EQUAL: + return left.value >= right.value; + case '>': + return left.value > right.value; + case '<': + return left.value < right.value; + default: + CError_FATAL(714); + return 0; + } +} + +Boolean CMach_CalcVectorDiadicBool(Type *type, MWVector128 *left, short op, MWVector128 *right) { + switch (op) { + case TK_LOGICAL_EQ: + return (left->ul[0] == right->ul[0]) && (left->ul[1] == right->ul[1]) && (left->ul[2] == right->ul[2]) && (left->ul[3] == right->ul[3]); + case TK_LOGICAL_NE: + return (left->ul[0] != right->ul[0]) && (left->ul[1] != right->ul[1]) && (left->ul[2] != right->ul[2]) && (left->ul[3] != right->ul[3]); + default: + CError_FATAL(740); + return 0; + } +} + +char *CMach_FloatScan(char *input, Float *result, Boolean *fail) { + double resultval; + char *outpos; + + if (!(outpos = ScanFloat(input, &resultval, fail))) + CError_ErrorTerm(CErrorStr154); + + if (*fail) + result->value = 0.0; + else + result->value = resultval; + + return outpos; +} + +Float CMach_CalcFloatConvertFromInt(Type *type, CInt64 val) { + Float result; + + if ((type->type == TYPEINT || type->type == TYPEENUM) && (type->size == 8)) { + if (is_unsigned(type)) + result.value = CInt64_ConvertUToLongDouble(&val); + else + result.value = CInt64_ConvertToLongDouble(&val); + } else { + if (is_unsigned(type)) + result.value = val.lo; + else + result.value = (SInt32) val.lo; + } + + return result; +} + +Float CMach_CalcFloatConvert(Type *type, Float fval) { + switch (type->size) { + case 4: + fval.value = (float) fval.value; + break; + case 8: + fval.value = (double) fval.value; + break; + case 10: + case 12: + break; + default: + CError_FATAL(801); + } + return fval; +} + +Boolean CMach_FloatIsZero(Float fval) { + return fval.value == 0.0; +} + +Boolean CMach_FloatIsOne(Float fval) { + return fval.value == 1.0; +} + +Boolean CMach_FloatIsNegOne(Float fval) { + return fval.value == -1.0; +} + +void CMach_InitFloatMem(Type *type, Float val, void *mem) { + float f; + double d; + + if (type->type == TYPEFLOAT) { + switch (type->size) { + case 4: + f = val.value; + memcpy(mem, &f, 4); + CTool_EndianConvertMem(mem, 4); + return; + case 8: + d = val.value; + memcpy(mem, &d, 8); + CTool_EndianConvertMem(mem, 8); + return; + } + } + + CError_FATAL(866); +} + +void CMach_PrintFloat(char *buf, Float val) { + double f; + CMach_InitFloatMem((Type *) &stshortdouble, val, &f); + sprintf(buf, "%g", f); +} + +void CMach_PragmaParams(void) { + if (copts.warn_illpragma) + CError_Warning(CErrorStr186, 0); + + while (notendofline()) + lex(); +} + +void CMach_AdjustFuntionArgs() { + // not called so we will never know what the args should have been :( +} + +static SInt32 CMach_GetPPCTypeAlign(Type *type, Boolean flag1, Boolean flag2) { + ClassList *base; + ObjMemberVar *ivar; + StructMember *member; + SInt32 best; + SInt32 ivarAlign; + SInt32 qualAlign; + + SInt32 align = CMach_GetQualifiedTypeAlign(type, flag2); + if (align <= 8) { + while (type->type == TYPEARRAY) + type = TYPE_POINTER(type)->target; + + if (flag1) { + if (type->type == TYPEFLOAT && type->size > 4 && align < 8) + return 8; + } else if (align == 8) { + if (type->type == TYPECLASS) { + best = 4; + for (base = TYPE_CLASS(type)->bases; base; base = base->next) { + if (base->base->align > best) + best = base->base->align; + } + for (ivar = TYPE_CLASS(type)->ivars; ivar; ivar = ivar->next) { + ivarAlign = CMach_GetPPCTypeAlign(ivar->type, 0, flag2); + if (ivarAlign > best) + best = ivarAlign; + if (flag2) { + qualAlign = CMach_GetQUALalign(ivar->qual); + if (qualAlign > best) + best = qualAlign; + } + } + return best; + } + if (type->type == TYPESTRUCT) { + best = 4; + for (member = TYPE_STRUCT(type)->members; member; member = member->next) { + ivarAlign = CMach_GetPPCTypeAlign(member->type, 0, flag2); + if (ivarAlign > best) + best = ivarAlign; + if (flag2) { + qualAlign = CMach_GetQUALalign(member->qual); + if (qualAlign > best) + best = qualAlign; + } + } + return best; + } + } + } + + return align; +} + +static SInt16 CMach_GetQualifiedStructAlign(const TypeStruct *tstruct, Boolean flag) { + StructMember *member; + SInt32 best; + SInt32 align; + Boolean isFirst; + + if (tstruct->stype >= STRUCT_VECTOR_UCHAR && tstruct->stype <= STRUCT_VECTOR_PIXEL) + return 16; + + switch (copts.structalignment) { + case AlignMode3_1Byte: + case AlignMode8_Packed: + return 1; + case AlignMode0_Mac68k: + return 2; + case AlignMode1_Mac68k4byte: + return (tstruct->size <= 2) ? 2 : 4; + } + + if (tstruct->size <= 1) + return 1; + + best = 1; + switch (copts.structalignment) { + default: + CError_FATAL(1026); + case AlignMode4_2Byte: + case AlignMode5_4Byte: + case AlignMode6_8Byte: + case AlignMode7_16Byte: + for (member = tstruct->members; member; member = member->next) { + align = CMach_GetQualifiedTypeAlign(member->type, flag); + if (align > best) + best = align; + if (flag) { + align = CMach_GetQUALalign(member->qual); + if (align > best) + best = align; + } + } + return best; + case AlignMode2_PPC: + if (copts.oldalignment) { + for (member = tstruct->members; member; member = member->next) { + align = CMach_GetQualifiedTypeAlign(member->type, flag); + if (align > best) + best = align; + if (flag) { + align = CMach_GetQUALalign(member->qual); + if (align > best) + best = align; + } + } + } else if (TYPE_STRUCT(tstruct)->stype == STRUCT_TYPE_UNION) { + for (member = tstruct->members; member; member = member->next) { + align = CMach_GetPPCTypeAlign(member->type, 1, flag); + if (align > best) + best = align; + if (flag) { + align = CMach_GetQUALalign(member->qual); + if (align > best) + best = align; + } + } + } else { + isFirst = 1; + for (member = tstruct->members; member; member = member->next) { + align = CMach_GetPPCTypeAlign(member->type, isFirst || (best >= 8), flag); + if (align > best) + best = align; + if (flag) { + align = CMach_GetQUALalign(member->qual); + if (align > best) + best = align; + } + isFirst = 0; + } + } + return best; + } +} + +SInt16 CMach_GetStructAlign(TypeStruct *tstruct) { + return CMach_GetQualifiedStructAlign(tstruct, 1); +} + +static SInt16 CMach_GetQualifiedClassAlign(TypeClass *tclass, Boolean flag) { + ClassList *base; + ObjMemberVar *ivar; + SInt32 best; + SInt32 align; + Boolean isFirst; + + switch (copts.structalignment) { + case AlignMode3_1Byte: + case AlignMode8_Packed: + return 1; + case AlignMode0_Mac68k: + return 2; + case AlignMode1_Mac68k4byte: + return (tclass->size <= 2) ? 2 : 4; + } + if (tclass->size <= 1) + return 1; + best = 1; + switch (copts.structalignment) { + default: + CError_FATAL(1149); + case AlignMode4_2Byte: + case AlignMode5_4Byte: + case AlignMode6_8Byte: + case AlignMode7_16Byte: + for (base = tclass->bases; base; base = base->next) { + if ((align = base->base->align) > best) + best = align; + } + for (ivar = tclass->ivars; ivar; ivar = ivar->next) { + align = CMach_GetMemberAlignment(ivar->type, flag ? CMach_GetQUALalign(ivar->qual) : 0, flag); + if (align > best) + best = align; + } + return best; + case AlignMode2_PPC: + best = 1; + if (copts.oldalignment) { + for (base = tclass->bases; base; base = base->next) { + if ((align = base->base->align) > best) + best = align; + } + for (ivar = tclass->ivars; ivar; ivar = ivar->next) { + align = CMach_GetMemberAlignment(ivar->type, flag ? CMach_GetQUALalign(ivar->qual) : 0, flag); + if (align > best) + best = align; + } + } else { + isFirst = 1; + for (base = tclass->bases; base; base = base->next) { + if ((align = base->base->align) > best) + best = align; + isFirst = 0; + } + if (tclass->mode == 1) { + for (ivar = tclass->ivars; ivar; ivar = ivar->next) { + align = CMach_GetPPCTypeAlign(ivar->type, 1, flag); + if (align > best) + best = align; + if (flag) { + align = CMach_GetQUALalign(ivar->qual); + if (align > best) + best = align; + } + } + } else { + for (ivar = tclass->ivars; ivar; ivar = ivar->next) { + if (ivar->offset && align != 8) + isFirst = 0; + align = CMach_GetPPCTypeAlign(ivar->type, isFirst || (best >= 8), flag); + if (align > best) + best = align; + if (flag) { + align = CMach_GetQUALalign(ivar->qual); + if (align > best) + best = align; + } + } + } + } + return best; + } +} + +SInt16 CMach_GetClassAlign(TypeClass *tclass) { + return CMach_GetQualifiedClassAlign(tclass, 1); +} + +static SInt16 CMach_GetWinTypeAlign(Type *type) { + int packoffs = copts.structalignment - 3; + SInt32 align = cmach_packsize[packoffs]; + if (type->size < align) + align = type->size; + return align; +} + +static SInt16 CMach_GetWinMinimizeAlign(SInt16 align) { + int packoffs = copts.structalignment - 3; + SInt16 minimum = cmach_packsize[packoffs]; + if (minimum < align) + align = minimum; + return align; +} + +static SInt16 CMach_GetQualifiedTypeAlign(Type *type, Boolean flag) { + Boolean isWin; + SInt16 align; + + if (type->type == TYPESTRUCT && TYPE_STRUCT(type)->stype >= STRUCT_VECTOR_UCHAR && TYPE_STRUCT(type)->stype <= STRUCT_VECTOR_PIXEL) + return 16; + + switch (copts.structalignment) { + case AlignMode3_1Byte: + case AlignMode8_Packed: + return 1; + case AlignMode4_2Byte: + case AlignMode5_4Byte: + case AlignMode6_8Byte: + case AlignMode7_16Byte: + isWin = 1; + break; + default: + isWin = 0; + break; + } + +restart: + switch (type->type) { + case TYPEVOID: + return 0; + case TYPEFUNC: + return 0; + case TYPEENUM: + type = TYPE_ENUM(type)->enumtype; + case TYPEINT: + if (isWin) + return CMach_GetWinTypeAlign(type); + if (type->size == 1) + return 1; + if (copts.oldalignment && type->size == 8) + return 8; + if (copts.structalignment != AlignMode0_Mac68k && type->size >= 4) + return 4; + return 2; + case TYPEFLOAT: + if (isWin) + return CMach_GetWinTypeAlign(type); + switch (copts.structalignment) { + case AlignMode0_Mac68k: + return 2; + case AlignMode1_Mac68k4byte: + return 4; + case AlignMode2_PPC: + if (copts.oldalignment && type->size > 4) + return 8; + return 4; + default: + CError_FATAL(1346); + } + case TYPEMEMBERPOINTER: + case TYPEPOINTER: + if (isWin) + return CMach_GetWinTypeAlign(type); + if (copts.structalignment == AlignMode0_Mac68k) + return 2; + else + return 4; + case TYPEARRAY: + if (copts.alignarraymembers) { + if (isWin) + return CMach_GetWinTypeAlign(type); + if (type->size == 1) + return 1; + if (copts.structalignment == AlignMode0_Mac68k || type->size <= 2) + return 2; + if (copts.structalignment == AlignMode1_Mac68k4byte || type->size < 8) + return 4; + align = CMach_GetQualifiedTypeAlign(TYPE_POINTER(type)->target, flag); + if (align > 4) + return align; + else + return 4; + } else { + type = TYPE_POINTER(type)->target; + goto restart; + } + case TYPESTRUCT: + if (flag) + align = TYPE_STRUCT(type)->align; + else + align = CMach_GetQualifiedStructAlign(TYPE_STRUCT(type), flag); + if (isWin) + return CMach_GetWinMinimizeAlign(align); + else + return align; + case TYPECLASS: + if (flag) + align = TYPE_CLASS(type)->align; + else + align = CMach_GetQualifiedClassAlign(TYPE_CLASS(type), flag); + if (isWin) + return CMach_GetWinMinimizeAlign(align); + else + return align; + case TYPEBITFIELD: + type = TYPE_BITFIELD(type)->bitfieldtype; + goto restart; + case TYPETEMPLATE: + return 1; + default: + CError_FATAL(1392); + return 0; + } +} + +SInt16 CMach_GetTypeAlign(Type *type) { + return CMach_GetQualifiedTypeAlign(type, 1); +} + +static SInt16 CMach_GetMemberAlignment(Type *type, SInt32 var, Boolean flag) { + SInt32 align; + + align = CMach_GetQualifiedTypeAlign(type, flag); + if (align < 1) + align = 1; + + if (IS_TYPE_VECTOR(type) && align < 16) + align = 16; + + switch (copts.structalignment) { + case AlignMode8_Packed: + align = 1; + break; + case AlignMode0_Mac68k: + if (align > 2) + align = 2; + break; + case AlignMode1_Mac68k4byte: + if (align > 4) + align = 4; + break; + case AlignMode2_PPC: + if (!copts.oldalignment) + align = CMach_GetPPCTypeAlign(type, !cmach_structoffset || (cmach_structalign >= 8), flag); + if (var > align) + align = var; + break; + } + + if (align > cmach_structalign) + cmach_structalign = align; + return align; +} + +SInt16 CMach_MemberAlignValue(Type *type, SInt32 var) { + SInt16 align = CMach_GetMemberAlignment(type, 0, 1); + if (align <= 1) + return 0; + + return (align - 1) & (align - (var & (align - 1))); +} + +static SInt16 MemberAlignValue(Type *type, SInt32 var1, SInt32 var2) { + SInt16 align = CMach_GetMemberAlignment(type, var2, 1); + return (align - 1) & (align - (var1 & (align - 1))); +} + +void CMach_StructLayoutInitOffset(SInt32 offset) { + cmach_structoffset = offset; + cmach_structalign = 4; + cmach_curbfsize = 0; +} + +SInt32 CMach_StructLayoutGetCurSize(void) { + return cmach_structoffset; +} + +SInt32 CMach_StructLayoutGetOffset(Type *type, UInt32 qual) { + SInt32 align; + SInt32 offset; + + qual = CParser_GetTypeQualifiers(type, qual); + + cmach_curbfsize = 0; + align = MemberAlignValue(type, cmach_structoffset, CMach_GetQUALalign(qual)); + offset = cmach_structoffset + align; + cmach_structoffset = offset + type->size; + return offset; +} + +SInt32 CMach_StructLayoutBitfield(TypeBitfield *tbitfield, UInt32 qual) { + SInt16 align; + SInt16 padding_at_start; + SInt16 basesize; + SInt16 basesize_bits; + SInt16 required_alignment; + SInt16 r4; + + padding_at_start = 0; + required_alignment = 0; + align = CMach_GetQUALalign(qual); + if (align <= tbitfield->bitfieldtype->size) + align = 0; + + switch (tbitfield->bitfieldtype->size) { + case 1: + basesize = 1; + basesize_bits = 8; + required_alignment = 0; + break; + case 2: + basesize = 2; + basesize_bits = 16; + required_alignment = 2; + break; + case 4: + if (copts.structalignment != AlignMode0_Mac68k && copts.structalignment != AlignMode4_2Byte) + required_alignment = 4; + else + required_alignment = 2; + basesize = 4; + basesize_bits = 32; + break; + default: + CError_FATAL(1620); + } + + switch (copts.structalignment) { + case AlignMode3_1Byte: + case AlignMode8_Packed: + required_alignment = 0; + break; + } + + r4 = required_alignment; + if (align > required_alignment) + r4 = align; + if (r4 && (cmach_structoffset & (r4 - 1))) + padding_at_start = r4 - (cmach_structoffset & (r4 - 1)); + + if (!cmach_curbfsize) { + cmach_structoffset += padding_at_start; + if (!tbitfield->bitlength) + return cmach_structoffset; + cmach_curbfsize = tbitfield->bitlength; + cmach_curbfbasesize = basesize; + cmach_curbfoffset = cmach_structoffset; + cmach_structoffset += basesize; + tbitfield->offset = 0; + return cmach_curbfoffset; + } else { + if (!tbitfield->bitlength || (cmach_curbfsize + tbitfield->bitlength) > basesize_bits || cmach_curbfbasesize != basesize) { + cmach_structoffset += padding_at_start; + cmach_curbfsize = 0; + cmach_curbfbasesize = basesize; + if (!tbitfield->bitlength) + return cmach_structoffset; + cmach_curbfoffset = cmach_structoffset; + cmach_structoffset += basesize; + } + tbitfield->offset = cmach_curbfsize; + cmach_curbfsize += tbitfield->bitlength; + return cmach_curbfoffset; + } +} + +static Boolean CMach_IsTrivialClass(TypeClass *tclass) { + return !CClass_Constructor(tclass); +} + +UInt8 CMach_GetFunctionResultClass(TypeFunc *tfunc) { + switch (tfunc->functype->type) { + case TYPESTRUCT: + if (tfunc->functype->type == TYPESTRUCT && TYPE_STRUCT(tfunc->functype)->stype >= STRUCT_VECTOR_UCHAR && TYPE_STRUCT(tfunc->functype)->stype <= STRUCT_VECTOR_PIXEL) + return 0; + case TYPECLASS: + case TYPEMEMBERPOINTER: + return CMach_PassResultInHiddenArg(tfunc->functype) != 0; + default: + return 0; + } +} + +Boolean CMach_PassResultInHiddenArg(Type *type) { + switch (type->type) { + case TYPESTRUCT: + if (TYPE_STRUCT(type)->stype >= STRUCT_VECTOR_UCHAR && TYPE_STRUCT(type)->stype <= STRUCT_VECTOR_PIXEL) + return 0; + else + return 1; + case TYPECLASS: + return 1; + case TYPEMEMBERPOINTER: + return (type->size != 4); + default: + return 0; + } +} + +char *CMach_GetCPU(void) { + switch (copts.processor) { + case CPU_PPC401: return "__PPC401__"; + case CPU_PPC403: return "__PPC403__"; + case CPU_PPC505: return "__PPC505__"; + case CPU_PPC509: return "__PPC509__"; + case CPU_PPC555: return "__PPC555__"; + case CPU_PPC556: return "__PPC556__"; + case CPU_PPC565: return "__PPC565__"; + case CPU_PPC601: return "__PPC601__"; + case CPU_PPC602: return "__PPC602__"; + case CPU_PPC603: return "__PPC603__"; + case CPU_PPC603e: return "__PPC603e__"; + case CPU_PPC604: return "__PPC604__"; + case CPU_PPC604e: return "__PPC604e__"; + case CPU_PPC740: return "__PPC740__"; + case CPU_PPC750: return "__PPC750__"; + case CPU_PPC801: return "__PPC801__"; + case CPU_PPC821: return "__PPC821__"; + case CPU_PPC823: return "__PPC823__"; + case CPU_PPC850: return "__PPC850__"; + case CPU_PPC860: return "__PPC860__"; + case CPU_PPC7400: return "__PPC7400__"; + case CPU_PPC7450: return "__PPC7450__"; + case CPU_PPC8240: return "__PPC8240__"; + case CPU_PPC8260: return "__PPC8260__"; + case CPU_PPCGEKKO: return "__PPCGEKKO__"; + case CPU_PPCELF: return "__PPCELF__"; + default: return NULL; + } +} + +Boolean CMach_FloatIsPowerOf2(Float flt) { + return (flt.value == 2.0) || + (flt.value == 4.0) || + (flt.value == 8.0) || + (flt.value == 16.0) || + (flt.value == 32.0) || + (flt.value == 64.0) || + (flt.value == 128.0) || + (flt.value == 256.0) || + (flt.value == 512.0) || + (flt.value == 1024.0); +} + +Float CMach_FloatReciprocal(Float flt) { + flt.value = 1.0 / flt.value; + return flt; +} + +SInt32 CMach_RoundedSizeOf(Object *object) { + SInt32 size = object->type->size; + SInt32 align = CMach_GetTypeAlign(object->type) - 1; + return (size + align) & ~align; +} + +void CMach_ReInitRuntimeObjects(void) { + HashNameNode *e0 = GetHashNameNodeExport("[0]"); + HashNameNode *e1 = GetHashNameNodeExport("[1]"); + HashNameNode *e2 = GetHashNameNodeExport("[2]"); + HashNameNode *e3 = GetHashNameNodeExport("[3]"); + HashNameNode *e4 = GetHashNameNodeExport("[4]"); + HashNameNode *e5 = GetHashNameNodeExport("[5]"); + HashNameNode *e6 = GetHashNameNodeExport("[6]"); + HashNameNode *e7 = GetHashNameNodeExport("[7]"); + HashNameNode *e8 = GetHashNameNodeExport("[8]"); + HashNameNode *e9 = GetHashNameNodeExport("[9]"); + HashNameNode *e10 = GetHashNameNodeExport("[10]"); + HashNameNode *e11 = GetHashNameNodeExport("[11]"); + HashNameNode *e12 = GetHashNameNodeExport("[12]"); + HashNameNode *e13 = GetHashNameNodeExport("[13]"); + HashNameNode *e14 = GetHashNameNodeExport("[14]"); + HashNameNode *e15 = GetHashNameNodeExport("[15]"); + HashNameNode *vuc = GetHashNameNodeExport("vector unsigned char"); + HashNameNode *vus = GetHashNameNodeExport("vector unsigned short"); + HashNameNode *vui = GetHashNameNodeExport("vector unsigned int"); + HashNameNode *vsc = GetHashNameNodeExport("vector signed char"); + HashNameNode *vss = GetHashNameNodeExport("vector signed short"); + HashNameNode *vsi = GetHashNameNodeExport("vector signed int"); + HashNameNode *vbc = GetHashNameNodeExport("vector bool char"); + HashNameNode *vbs = GetHashNameNodeExport("vector bool short"); + HashNameNode *vbi = GetHashNameNodeExport("vector bool int"); + HashNameNode *vf = GetHashNameNodeExport("vector float"); + HashNameNode *vp = GetHashNameNodeExport("vector pixel"); + + stvectorunsignedchar.name = vuc; + stvectorunsignedshort.name = vus; + stvectorunsignedlong.name = vui; + stvectorsignedchar.name = vsc; + stvectorsignedshort.name = vss; + stvectorsignedlong.name = vsi; + stvectorboolchar.name = vbc; + stvectorboolshort.name = vbs; + stvectorboollong.name = vbi; + stvectorfloat.name = vf; + stvectorpixel.name = vp; + + stVUC_unsignedchar0.name = e0; + stVUC_unsignedchar1.name = e1; + stVUC_unsignedchar2.name = e2; + stVUC_unsignedchar3.name = e3; + stVUC_unsignedchar4.name = e4; + stVUC_unsignedchar5.name = e5; + stVUC_unsignedchar6.name = e6; + stVUC_unsignedchar7.name = e7; + stVUC_unsignedchar8.name = e8; + stVUC_unsignedchar9.name = e9; + stVUC_unsignedchar10.name = e10; + stVUC_unsignedchar11.name = e11; + stVUC_unsignedchar12.name = e12; + stVUC_unsignedchar13.name = e13; + stVUC_unsignedchar14.name = e14; + stVUC_unsignedchar15.name = e15; + stVSC_signedchar0.name = e0; + stVSC_signedchar1.name = e1; + stVSC_signedchar2.name = e2; + stVSC_signedchar3.name = e3; + stVSC_signedchar4.name = e4; + stVSC_signedchar5.name = e5; + stVSC_signedchar6.name = e6; + stVSC_signedchar7.name = e7; + stVSC_signedchar8.name = e8; + stVSC_signedchar9.name = e9; + stVSC_signedchar10.name = e10; + stVSC_signedchar11.name = e11; + stVSC_signedchar12.name = e12; + stVSC_signedchar13.name = e13; + stVSC_signedchar14.name = e14; + stVSC_signedchar15.name = e15; + + stVUS_unsignedshort0.name = e0; + stVUS_unsignedshort1.name = e1; + stVUS_unsignedshort2.name = e2; + stVUS_unsignedshort3.name = e3; + stVUS_unsignedshort4.name = e4; + stVUS_unsignedshort5.name = e5; + stVUS_unsignedshort6.name = e6; + stVUS_unsignedshort7.name = e7; + stVSS_signedshort0.name = e0; + stVSS_signedshort1.name = e1; + stVSS_signedshort2.name = e2; + stVSS_signedshort3.name = e3; + stVSS_signedshort4.name = e4; + stVSS_signedshort5.name = e5; + stVSS_signedshort6.name = e6; + stVSS_signedshort7.name = e7; + + stVUL_unsignedlong0.name = e0; + stVUL_unsignedlong1.name = e1; + stVUL_unsignedlong2.name = e2; + stVUL_unsignedlong3.name = e3; + stVSL_signedlong0.name = e0; + stVSL_signedlong1.name = e1; + stVSL_signedlong2.name = e2; + stVSL_signedlong3.name = e3; + + stVF_float0.name = e0; + stVF_float1.name = e1; + stVF_float2.name = e2; + stVF_float3.name = e3; +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/CodeGen.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/CodeGen.c new file mode 100644 index 0000000..d3bac77 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/CodeGen.c @@ -0,0 +1,2437 @@ +#include "compiler/CodeGen.h" +#include "compiler/CCompiler.h" +#include "compiler/CDecl.h" +#include "compiler/CError.h" +#include "compiler/CExpr.h" +#include "compiler/CFunc.h" +#include "compiler/CInt64.h" +#include "compiler/CMachine.h" +#include "compiler/CMangler.h" +#include "compiler/COptimizer.h" +#include "compiler/CParser.h" +#include "compiler/CPrep.h" +#include "compiler/CPrepTokenizer.h" +#include "compiler/Coloring.h" +#include "compiler/CompilerTools.h" +#include "compiler/DumpIR.h" +#include "compiler/Exceptions.h" +#include "compiler/InlineAsmPPC.h" +#include "compiler/Intrinsics.h" +#include "compiler/InstrSelection.h" +#include "compiler/GlobalOptimizer.h" +#include "compiler/ObjGenMachO.h" +#include "compiler/Operands.h" +#include "compiler/PCode.h" +#include "compiler/PCodeAssembly.h" +#include "compiler/PCodeInfo.h" +#include "compiler/PCodeListing.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/Peephole.h" +#include "compiler/PPCError.h" +#include "compiler/RegisterInfo.h" +#include "compiler/Scheduler.h" +#include "compiler/StackFrame.h" +#include "compiler/Switch.h" +#include "compiler/TOC.h" +#include "compiler/ValueNumbering.h" +#include "compiler/enode.h" +#include "compiler/objc.h" +#include "compiler/objects.h" +#include "compiler/scopes.h" +#include "compiler/tokens.h" +#include "compiler/types.h" + +static Macro powcM; +static Macro __powcM; +static Macro ppc_cpu; +static Macro profM; +static Macro hostM; +static Macro bendM; +static Macro _ppc_M; +static Macro longI; +static Macro IEEED; +Macro vecM; +Macro altivecM; +static Macro macM2; +static Macro appleM; +static Macro optM; +static Macro alignM; +static Macro _machM; +static Macro archM; +static Macro dynM; +static Macro ppcM; +Object *gFunction; +static ObjectList *temps; +CLabel *returnlabel; +CLabel *cleanreturnlabel; +Boolean needs_cleanup; +Statement *current_statement; +int has_catch_blocks; +int disable_optimizer; +SInt32 current_linenumber; +Boolean has_altivec_arrays; +short high_reg; +short low_reg; +short high_offset; +short low_offset; +short low_reg2; +short high_reg2; +PCodeLabel *pic_base_pcodelabel; +Object *dyld_stub_binding_helper; +Object *rt_cvt_fp2unsigned; +Object *rt_profile_entry; +Object *rt_profile_exit; +Object *rt_div2i; +Object *rt_div2u; +Object *rt_mod2i; +Object *rt_mod2u; +Object *rt_shr2i; +Object *rt_shr2u; +Object *rt_shl2i; +Object *rt_cvt_ull_dbl; +Object *rt_cvt_sll_dbl; +Object *rt_cvt_ull_flt; +Object *rt_cvt_sll_flt; +Object *rt_cvt_dbl_usll; +static heaperror_t saveheaperror; + +enum { + GPRLimit = 10, + FPRLimit = 13, + VRLimit = 13 +}; + +// forward decls +static void CodeGen_heaperror(void); + +VarInfo *CodeGen_GetNewVarInfo(void) { + VarInfo *vi; + + vi = lalloc(sizeof(VarInfo)); + memclrw(vi, sizeof(VarInfo)); + + vi->deftoken = *CPrep_CurStreamElement(); + vi->varnumber = localcount++; + + return vi; +} + +Object *maketemporary(Type *type) { + ObjectList *list; + Object *obj; + + for (list = temps; list; list = list->next) { + obj = list->object; + if (obj->u.var.uid == 0 && obj->type == type) { + obj->u.var.uid = 1; + return obj; + } + } + + obj = lalloc(sizeof(Object)); + memclrw(obj, sizeof(Object)); + obj->otype = OT_OBJECT; + obj->access = ACCESSPUBLIC; + obj->datatype = DLOCAL; + obj->type = type; + obj->name = CParser_GetUniqueName(); + obj->u.var.info = CodeGen_GetNewVarInfo(); + obj->u.var.uid = 1; + + list = lalloc(sizeof(ObjectList)); + memclrw(list, sizeof(ObjectList)); + list->next = temps; + list->object = obj; + temps = list; + + return obj; +} + +static void free_temporaries(void) { + ObjectList *list; + + for (list = temps; list; list = list->next) + list->object->u.var.uid = 0; +} + +static void allocate_temporaries(void) { + ObjectList *list; + + for (list = temps; list; list = list->next) + assign_local_memory(list->object); +} + +void process_arguments(ArgumentProcessor func, Boolean flag) { + short gpr = 3; + short fpr = 1; + short vr = 2; + Type *type; + ObjectList *list; + + for (list = arguments; list; list = list->next) { + type = list->object->type; + if (IS_TYPE_FLOAT(type)) { + func(list->object, (fpr <= FPRLimit) ? fpr : 0); + fpr++; + if (type->size == 4) + gpr++; + else + gpr += 2; + } else if (IS_TYPE_VECTOR(type)) { + func(list->object, (vr <= VRLimit) ? vr : 0); + vr++; + if (flag) { + if ((vr - 1) == 2) + gpr = 9; + else if ((vr - 1) > 2) + gpr = 11; + } + } else { + func(list->object, (gpr <= GPRLimit) ? gpr : 0); + if (TYPE_FITS_IN_REGISTER(type)) { + if (type->size <= 4) + gpr += 1; + else + gpr += 2; + } else { + gpr += (type->size >> 2); + if (type->size & 3) + gpr++; + } + } + } + + last_argument_register[RegClass_GPR] = gpr - 1; + last_argument_register[RegClass_FPR] = fpr - 1; + last_argument_register[RegClass_VR] = vr - 1; + if (flag) + move_varargs_to_memory(); +} + +static void retain_argument_register(Object *obj, short reg) { + VarInfo *vi = Registers_GetVarInfo(obj); + Type *type = obj->type; + + if (reg && !vi->noregister && vi->used) { + if (TYPE_FITS_IN_REGISTER(type)) { + if (type->size <= 4) { + retain_register(obj, RegClass_GPR, reg); + } else if (reg < GPRLimit) { + if (copts.littleendian) + retain_GPR_pair(obj, reg, reg + 1); + else + retain_GPR_pair(obj, reg + 1, reg); + } + } else if (IS_TYPE_FLOAT(type)) { + retain_register(obj, RegClass_FPR, reg); + } else if (IS_TYPE_VECTOR(type)) { + retain_register(obj, RegClass_VR, reg); + } + } +} + +static void allocate_local_vregs(void) { + ObjectList *list; + Object *obj; + VarInfo *vi; + + if (copts.codegen_pic && uses_globals && assignable_registers[RegClass_GPR]) { + if (assignable_registers[RegClass_GPR]) { + vi = pic_base.u.var.info; + vi->reg = 0; + assign_register_by_type(&pic_base); + pic_base_reg = vi->reg; + CError_ASSERT(497, pic_base_reg); + } else { + CError_FATAL(500); + } + } else { + pic_base_reg = 0; + } + + for (list = exceptionlist; list; list = list->next) { + obj = list->object; + vi = Registers_GetVarInfo(obj); + + if (vi->used && !vi->noregister) { + if (!OBJ_GET_TARGET_VOLATILE(obj) && !vi->reg) + assign_register_by_type(obj); + } + } + + set_last_exception_registers(); + + for (list = arguments; list; list = list->next) { + obj = list->object; + vi = Registers_GetVarInfo(obj); + + if (vi->used && !vi->noregister) { + if (!OBJ_GET_TARGET_VOLATILE(obj) && !vi->reg) + assign_register_by_type(obj); + } + } + + for (list = locals; list; list = list->next) { + obj = list->object; + if (!IsTempName(obj->name)) { + vi = Registers_GetVarInfo(obj); + + if (vi->used && !vi->noregister) { + if (!OBJ_GET_TARGET_VOLATILE(obj) && !vi->reg) + assign_register_by_type(obj); + } + } + } + + open_fe_temp_registers(); + + for (list = locals; list; list = list->next) { + obj = list->object; + if (IsTempName(obj->name)) { + vi = Registers_GetVarInfo(obj); + + if (vi->used && !vi->noregister) { + if (!OBJ_GET_TARGET_VOLATILE(obj) && !vi->reg) + assign_register_by_type(obj); + } + } + } + + for (list = toclist; list; list = list->next) { + obj = list->object; + vi = Registers_GetVarInfo(obj); + + if (!vi->reg && vi->used && vi->usage > 1) + assign_register_by_type(obj); + } +} + +static void allocate_local_GPRs(void) { + ObjectList *list; + Object *obj; + Object *winning_obj; + SInt32 winning_usage; + VarInfo *vi; + Type *type; + + if (copts.codegen_pic && uses_globals && assignable_registers[RegClass_GPR]) { + vi = pic_base.u.var.info; + vi->reg = 0; + assign_register_by_type(&pic_base); + pic_base_reg = vi->reg; + CError_ASSERT(605, pic_base_reg); + } else { + pic_base_reg = 0; + } + + while (assignable_registers[RegClass_GPR]) { + winning_obj = NULL; + winning_usage = -1; + if (!(disable_optimizer & 2)) { + for (list = arguments; list; list = list->next) { + obj = list->object; + vi = Registers_GetVarInfo(obj); + type = obj->type; + if (vi->flags & VarInfoFlag40) + vi->usage = 100000; + if (!vi->reg && vi->used && !vi->noregister) { + if (!OBJ_GET_TARGET_VOLATILE(obj) && vi->usage >= winning_usage && vi->usage >= 2) { + if (TYPE_FITS_IN_REGISTER(type) && (!TYPE_IS_8BYTES(type) || assignable_registers[RegClass_GPR] >= 2)) { + winning_obj = obj; + winning_usage = vi->usage; + } + } + } + } + } + if (!(disable_optimizer & 2)) { + for (list = locals; list; list = list->next) { + obj = list->object; + vi = Registers_GetVarInfo(obj); + type = obj->type; + if (vi->flags & VarInfoFlag40) + vi->usage = 100000; + if (!vi->reg && vi->used && !vi->noregister) { + if (!OBJ_GET_TARGET_VOLATILE(obj) && vi->usage >= winning_usage && vi->usage >= 2) { + if (TYPE_FITS_IN_REGISTER(type) && (!TYPE_IS_8BYTES(type) || assignable_registers[RegClass_GPR] >= 2)) { + winning_obj = obj; + winning_usage = vi->usage; + } + } + } + } + } + for (list = toclist; list; list = list->next) { + obj = list->object; + vi = Registers_GetVarInfo(obj); + if (vi->flags & VarInfoFlag40) + vi->usage = 100000; + if (!vi->reg && vi->used) { + if (vi->usage >= winning_usage && vi->usage >= 3) { + winning_obj = obj; + winning_usage = vi->usage; + } + } + } + + if (!winning_obj) + break; + + assign_register_by_type(winning_obj); + CError_ASSERT(698, Registers_GetVarInfo(winning_obj)->flags & VarInfoFlag2); + } +} + +static void allocate_local_FPRs(void) { + ObjectList *list; + Object *obj; + Object *winning_obj; + SInt32 winning_usage; + VarInfo *vi; + Type *type; + + while (assignable_registers[RegClass_FPR]) { + winning_obj = NULL; + winning_usage = -1; + if (!(disable_optimizer & 2)) { + for (list = arguments; list; list = list->next) { + obj = list->object; + type = obj->type; + vi = Registers_GetVarInfo(obj); + if (vi->flags & VarInfoFlag40) + vi->usage = 100000; + if (!vi->reg && vi->used && !vi->noregister) { + if (!OBJ_GET_TARGET_VOLATILE(obj) && vi->usage >= winning_usage && vi->usage >= 2) { + if (IS_TYPE_FLOAT(type)) { + winning_obj = obj; + winning_usage = vi->usage; + } + } + } + } + } + if (!(disable_optimizer & 2)) { + for (list = locals; list; list = list->next) { + obj = list->object; + vi = Registers_GetVarInfo(obj); + if (vi->flags & VarInfoFlag40) + vi->usage = 100000; + if (!vi->reg && vi->used && !vi->noregister) { + if (!OBJ_GET_TARGET_VOLATILE(obj) && vi->usage >= winning_usage && vi->usage >= 2) { + if (IS_TYPE_FLOAT(obj->type)) { + winning_obj = obj; + winning_usage = vi->usage; + } + } + } + } + } + + if (!winning_obj) + break; + + assign_register_by_type(winning_obj); + CError_ASSERT(782, Registers_GetVarInfo(winning_obj)->flags & VarInfoFlag2); + } +} + +static void allocate_local_VRs(void) { + ObjectList *list; + Object *obj; + Object *winning_obj; + SInt32 winning_usage; + VarInfo *vi; + + while (assignable_registers[RegClass_VR]) { + winning_obj = NULL; + winning_usage = -1; + if (!(disable_optimizer & 2)) { + for (list = arguments; list; list = list->next) { + obj = list->object; + vi = Registers_GetVarInfo(obj); + if (vi->flags & VarInfoFlag40) + vi->usage = 100000; + if (!vi->reg && vi->used && !vi->noregister) { + if (!OBJ_GET_TARGET_VOLATILE(obj) && vi->usage >= winning_usage && vi->usage >= 2) { + if (IS_TYPE_VECTOR(obj->type)) { + winning_obj = obj; + winning_usage = vi->usage; + } + } + } + } + } + if (!(disable_optimizer & 2)) { + for (list = locals; list; list = list->next) { + obj = list->object; + vi = Registers_GetVarInfo(obj); + if (vi->flags & VarInfoFlag40) + vi->usage = 100000; + if (!vi->reg && vi->used && !vi->noregister) { + if (!OBJ_GET_TARGET_VOLATILE(obj) && vi->usage >= winning_usage && vi->usage >= 2) { + if (IS_TYPE_VECTOR(obj->type)) { + winning_obj = obj; + winning_usage = vi->usage; + } + } + } + } + } + + if (!winning_obj) + break; + + assign_register_by_type(winning_obj); + CError_ASSERT(846, Registers_GetVarInfo(winning_obj)->flags & VarInfoFlag2); + } +} + +static void allocate_locals(void) { + has_altivec_arrays = 0; + + if (!requires_frame && !optimizing) + process_arguments(retain_argument_register, 0); + + if (optimizing) { + allocate_local_vregs(); + } else { + allocate_local_GPRs(); + allocate_local_FPRs(); + allocate_local_VRs(); + } + + assign_locals_to_memory(locals); +} + +void move_assigned_argument(Object *obj, short reg) { + VarInfo *vi; + Type *type; + SInt32 bytesLeft; + SInt32 offset; + + vi = Registers_GetVarInfo(obj); + type = obj->type; + CError_ASSERT(901, obj->datatype == DLOCAL); + + if (!vi->used) + return; + + if (reg) { + if (vi->reg) { + if (TYPE_IS_8BYTES(type)) { + if (copts.littleendian) { + if (vi->reg != reg) + emitpcode(PC_MR, vi->reg, reg); + if (reg < GPRLimit) { + CError_FAIL(916, (vi->regHi == reg) || (vi->reg == (reg + 1))); + if (vi->regHi != (reg + 1)) + emitpcode(PC_MR, vi->regHi, reg + 1); + } else { + load_store_register(PC_LWZ, vi->regHi, local_base_register(obj), obj, high_offset); + } + } else { + if (vi->regHi != reg) + emitpcode(PC_MR, vi->regHi, reg); + if (reg < GPRLimit) { + CError_FAIL(931, (vi->reg == reg) || (vi->regHi == (reg + 1))); + if (vi->reg != (reg + 1)) + emitpcode(PC_MR, vi->reg, reg + 1); + } else { + load_store_register(PC_LWZ, vi->reg, local_base_register(obj), obj, low_offset); + } + } + } else if (vi->reg != reg) { + if (IS_TYPE_FLOAT(type)) { + emitpcode(PC_FMR, vi->reg, reg); + } else if (IS_TYPE_VECTOR(type)) { + emitpcode(PC_VMR, vi->reg, reg); + } else { + emitpcode(PC_MR, vi->reg, reg); + } + } + } else { + if (IS_TYPE_POINTER(type) || IS_TYPE_4BYTES_MEMBERPOINTER(type)) { + load_store_register(PC_STW, reg, local_base_register(obj), obj, 0); + } else if (IS_TYPE_INT(type) || IS_TYPE_ENUM(type)) { + switch (type->size) { + case 1: + load_store_register(PC_STB, reg, local_base_register(obj), obj, 0); + break; + case 2: + load_store_register(PC_STH, reg, local_base_register(obj), obj, 0); + break; + case 4: + load_store_register(PC_STW, reg, local_base_register(obj), obj, 0); + break; + case 8: + load_store_register(PC_STW, reg, local_base_register(obj), obj, 0); + if (reg < GPRLimit) + load_store_register(PC_STW, reg + 1, local_base_register(obj), obj, 4); + break; + default: + CError_FATAL(993); + } + } else if (IS_TYPE_FLOAT(type)) { + load_store_register((type->size == 4) ? PC_STFS : PC_STFD, reg, local_base_register(obj), obj, 0); + } else if (IS_TYPE_VECTOR(type)) { + load_store_register(PC_STVX, reg, local_base_register(obj), obj, 0); + } else { + bytesLeft = (11 - reg) * 4; + if (bytesLeft > obj->type->size) + bytesLeft = obj->type->size; + offset = 0; + while (bytesLeft > 0) { + load_store_register(PC_STW, reg, local_base_register(obj), obj, offset); + reg++; + offset += 4; + bytesLeft -= 4; + } + } + } + } else { + if (vi->reg) { + if (IS_TYPE_POINTER(type) || IS_TYPE_4BYTES_MEMBERPOINTER(type)) { + load_store_register(PC_LWZ, vi->reg, local_base_register(obj), obj, 0); + } else if (IS_TYPE_FLOAT(type)) { + load_store_register((type->size == 4) ? PC_LFS : PC_LFD, vi->reg, local_base_register(obj), obj, 0); + } else if (IS_TYPE_VECTOR(type)) { + load_store_register(PC_LVX, vi->reg, local_base_register(obj), obj, 0); + } else { + switch (type->size) { + case 1: + load_store_register(PC_LBZ, vi->reg, local_base_register(obj), obj, 0); + break; + case 2: + load_store_register(is_unsigned(type) ? PC_LHZ : PC_LHA, vi->reg, local_base_register(obj), obj, 0); + break; + case 4: + load_store_register(PC_LWZ, vi->reg, local_base_register(obj), obj, 0); + break; + case 8: + load_store_register(PC_LWZ, vi->regHi, local_base_register(obj), obj, high_offset); + load_store_register(PC_LWZ, vi->reg, local_base_register(obj), obj, low_offset); + break; + default: + CError_FATAL(1095); + } + } + } else if (!optimizing) { + local_base_register(obj); + } + } +} + +static void load_TOC_pointers(void) { + VarInfo *vi; + Object *obj; + ObjectList *list; + PCode *pc; + Operand opnd; + + if (uses_globals && pic_base_reg) { + pic_base_pcodelabel = makepclabel(); + pc = makepcode(PC_BC, 20, 7, 3, pic_base_pcodelabel); + pcsetlinkbit(pc); + pcsetsideeffects(pc); + appendpcode(pclastblock, pc); + pcbranch(pclastblock, pic_base_pcodelabel); + makepcblock(); + pclabel(pclastblock, pic_base_pcodelabel); + emitpcode(PC_MFLR, pic_base_reg); + } + + memclrw(&opnd, sizeof(Operand)); + for (list = toclist; list; list = list->next) { + obj = list->object; + if ((vi = Registers_GetVarInfo(obj)) && (vi->flags & VarInfoFlag2)) { + switch (obj->datatype) { + case DNONLAZYPTR: + symbol_operand(&opnd, obj); + opnd.optype = OpndType_IndirectSymbol; + if (opnd.optype) + Coerce_to_register(&opnd, TYPE(&void_ptr), vi->reg); + if (opnd.reg != vi->reg) + emitpcode(PC_MR, vi->reg, opnd.reg); + break; + default: + CError_FATAL(1206); + } + } + } +} + +static int has_vararglist(Object *funcobj) { + FuncArg *arg; + + arg = TYPE_FUNC(funcobj->type)->args; + while (arg && arg != &elipsis) + arg = arg->next; + + return (arg == &elipsis); +} + +void assign_labels(Statement *stmt) { + Statement *last; + + last = NULL; + while (stmt) { + if (stmt->type == ST_LABEL && !stmt->label->pclabel) { + if (last && last->type == ST_LABEL) + stmt->label->pclabel = last->label->pclabel; + else + stmt->label->pclabel = makepclabel(); + } + + last = stmt; + stmt = stmt->next; + } +} + +static Boolean islaststatement(Statement *stmt) { + for (stmt = stmt->next; stmt; stmt = stmt->next) { + if (stmt->type > ST_LABEL) + return 0; + } + return 1; +} + +static void newstatement(SInt32 sourceoffset, SInt32 value, int flag) { + PCodeBlock *block = pclastblock; + + pcloopweight = value; + if (!block->pcodeCount) + block->loopWeight = value; + + if (block->pcodeCount > 100) + branch_label(makepclabel()); + + if (flag) + block->flags |= fPCBlockFlag4000; +} + +static void expressionstatement(ENode *expr) { + Operand opnd; + + memclrw(&opnd, sizeof(Operand)); + cgdispatch[expr->type](expr, 0, 0, &opnd); + + if (ENODE_IS(expr, EINDIRECT) && (opnd.flags & OpndFlags_Volatile)) { + if (TYPE_FITS_IN_REGISTER_2(expr->rtype)) { + if (opnd.optype != OpndType_GPR) + Coerce_to_register(&opnd, expr->rtype, 0); + } else if (IS_TYPE_FLOAT(expr->rtype)) { + if (opnd.optype != OpndType_FPR) + Coerce_to_fp_register(&opnd, expr->rtype, 0); + } else if (IS_TYPE_VECTOR(expr->rtype)) { + if (opnd.optype != OpndType_VR) + Coerce_to_v_register(&opnd, expr->rtype, 0); + } + } +} + +static void labelstatement(CLabel *label) { + if (!label->pclabel) + label->pclabel = makepclabel(); + if (!label->pclabel->resolved) + branch_label(label->pclabel); + free_temporaries(); +} + +static void gotostatement(CLabel *label) { + if (!label->pclabel) + label->pclabel = makepclabel(); + branch_always(label->pclabel); + free_temporaries(); +} + +static void gotoexpression(ENode *expr) { + Operand opnd; + CodeLabelList *list; + + memclrw(&opnd, sizeof(Operand)); + cgdispatch[expr->type](expr, 0, 0, &opnd); + + if (opnd.optype != OpndType_GPR) + Coerce_to_register(&opnd, TYPE(&void_ptr), 0); + CError_ASSERT(1397, opnd.optype == OpndType_GPR); + + for (list = codelabellist; list; list = list->next) + pcbranch(pclastblock, list->label->pclabel); + + emitpcode(PC_MTCTR, opnd.reg); + branch_indirect(NULL); +} + +static void conditionalstatement(ENode *cond, CLabel *label, short truthy) { + Operand opnd; + memclrw(&opnd, sizeof(Operand)); + + cond = evaluate_and_skip_comma(cond); + if (!label->pclabel) + label->pclabel = makepclabel(); + + if (TYPE_IS_8BYTES(cond->data.diadic.right->rtype) || TYPE_IS_8BYTES(cond->data.diadic.left->rtype)) + I8_gen_condition(cond, &opnd, 0); + else + gen_condition(cond, &opnd); + + branch_conditional(opnd.reg, opnd.regOffset, truthy, label->pclabel); + free_temporaries(); +} + +static void returnstatement(ENode *expr, Boolean dont_branch) { + Operand opnd; + Type *type; + memclrw(&opnd, sizeof(Operand)); + + if (expr) { + type = expr->rtype; + if (TYPE_FITS_IN_REGISTER(type)) { + if (TYPE_IS_8BYTES(type)) { + cgdispatch[expr->type](expr, low_reg, high_reg, &opnd); + coerce_to_register_pair(&opnd, type, low_reg, high_reg); + } else { + cgdispatch[expr->type](expr, 3, 0, &opnd); + if (opnd.optype != OpndType_GPR) + Coerce_to_register(&opnd, type, 3); + if (opnd.reg != 3) + emitpcode(PC_MR, 3, opnd.reg); + } + } else if (IS_TYPE_FLOAT(type)) { + cgdispatch[expr->type](expr, 1, 0, &opnd); + if (opnd.optype != OpndType_FPR) + Coerce_to_fp_register(&opnd, type, 1); + if (opnd.reg != 1) + emitpcode(PC_FMR, 1, opnd.reg); + } else if (IS_TYPE_VECTOR(type)) { + cgdispatch[expr->type](expr, 2, 0, &opnd); + if (opnd.optype != OpndType_VR) + Coerce_to_v_register(&opnd, type, 2); + if (opnd.reg != 2) + emitpcode(PC_VMR, 2, opnd.reg); + } else { + cgdispatch[expr->type](expr, 0, 0, &opnd); + } + } + + if (!dont_branch) { + if (!returnlabel->pclabel) + returnlabel->pclabel = makepclabel(); + branch_always(returnlabel->pclabel); + free_temporaries(); + } +} + +static void capturestackpointer(Object *obj) { + branch_label(makepclabel()); + CError_ASSERT(1568, obj->datatype == DLOCAL); + + load_store_register(PC_STW, 1, local_base_register(obj), obj, 20); + branch_label(makepclabel()); +} + +static void resetstackpointer(Object *obj) { + PCode *pc; + + CError_ASSERT(1595, obj->datatype == DLOCAL); + + branch_label(makepclabel()); + + load_store_register(PC_LWZ, 0, 1, NULL, 0); + + pc = makepcode(PC_LWZ, 1, local_base_register(obj), obj, 20); + pcsetsideeffects(pc); + appendpcode(pclastblock, pc); + + load_store_register(PC_STW, 0, 1, NULL, 0); + + branch_label(makepclabel()); +} + +static void callprofiler(char *name) { + UInt32 masks[RegClassMax] = {0, 0, 0, 0, 0}; + load_store_register(PC_LWZ, 3, 1, NULL, 0); + load_store_register(PC_LWZ, 3, 3, NULL, 8); + masks[RegClass_GPR] |= (1 << 3); + branch_subroutine(rt_profile_entry, 1, masks); +} + +static void exitprofiler(void) { +} + +void CodeGen_Generator(Statement *statements, Object *func, UInt8 mysteryFlag, Boolean callOnModuleBind) { + Statement *stmt; + Boolean has_varargs; + PCodeBlock *tmp; + SInt32 size; + + CodeGen_InitialSanityCheck(); + if (!saveheaperror) { + saveheaperror = getheaperror(); + setheaperror(CodeGen_heaperror); + } + + if (cparamblkptr->precompile == 1) + CError_Error(CErrorStr180); + + if (!func) { + func = createstaticinitobject(); + } else { +#ifdef CW_CLT + if (func && func->name) + PrintProgressFunction(func->name->name); +#endif + } + + gFunction = func; + has_varargs = has_vararglist(func); + init_endian(); + init_stack_globals(func); + assign_arguments_to_memory(func, mysteryFlag, has_varargs); + + needs_cleanup = 0; + disable_optimizer = 0; + has_catch_blocks = 0; + current_statement = NULL; + current_linenumber = 0; + precomputedoperands = NULL; + switchtables = NULL; + temps = NULL; + initializeexceptiontables(); + returnlabel = cleanreturnlabel = newlabel(); + + if (copts.debuglisting) + DumpIR(statements, func); + + statements = COpt_Optimizer(func, statements); + if (copts.debuglisting) + DumpIR(statements, func); + + resetTOCvarinfo(); + init_registers(); + expandTOCreferences(&statements->next); + if (copts.debuglisting) + DumpIR(statements, func); + + if (copts.profile) { + makes_call = 1; + requires_frame = 1; + } + + initpcode(); + + pclabel(prologue = makepcblock(), makepclabel()); + prologue->flags |= fIsProlog; + + pclabel(tmp = makepcblock(), makepclabel()); + pcbranch(prologue, tmp->labels); + + init_frame_sizes(has_varargs); + allocate_locals(); + process_arguments(move_assigned_argument, has_varargs); + + if (copts.schedule_factor || copts.altivec_model) + branch_label(makepclabel()); + + load_TOC_pointers(); + + if (copts.profile) + callprofiler(CMangler_GetLinkName(func)->name); + + assign_labels(statements->next); + open_temp_registers(); + + for (stmt = statements->next; stmt; stmt = stmt->next) { + current_statement = stmt; + current_linenumber = stmt->sourceoffset; + switch (stmt->type) { + case ST_NOP: + break; + case ST_EXPRESSION: + newstatement(stmt->sourceoffset, stmt->value, (stmt->flags & StmtFlag_10) != 0); + expressionstatement(stmt->expr); + break; + case ST_LABEL: + labelstatement(stmt->label); + break; + case ST_IFGOTO: + newstatement(stmt->sourceoffset, stmt->value, (stmt->flags & StmtFlag_10) != 0); + conditionalstatement(stmt->expr, stmt->label, 1); + break; + case ST_IFNGOTO: + newstatement(stmt->sourceoffset, stmt->value, (stmt->flags & StmtFlag_10) != 0); + conditionalstatement(stmt->expr, stmt->label, 0); + break; + case ST_GOTOEXPR: + newstatement(stmt->sourceoffset, stmt->value, (stmt->flags & StmtFlag_10) != 0); + gotoexpression(stmt->expr); + break; + case ST_GOTO: + newstatement(stmt->sourceoffset, stmt->value, (stmt->flags & StmtFlag_10) != 0); + gotostatement(stmt->label); + break; + case ST_RETURN: + newstatement(stmt->sourceoffset, stmt->value, (stmt->flags & StmtFlag_10) != 0); + returnstatement(stmt->expr, islaststatement(stmt)); + break; + case ST_SWITCH: + newstatement(stmt->sourceoffset, stmt->value, (stmt->flags & StmtFlag_10) != 0); + switchstatement(stmt->expr, (SwitchInfo *) stmt->label); + break; + case ST_BEGINCATCH: + capturestackpointer(stmt->expr->data.objref); + break; + case ST_ENDCATCHDTOR: + CError_ASSERT(2056, stmt->expr->data.objref->datatype == DLOCAL); + add_immediate(3, local_base_register(stmt->expr->data.objref), stmt->expr->data.objref, 0); + { + UInt32 masks[RegClassMax] = {0, 0, 0, 0, 0}; + masks[RegClass_GPR] |= 1 << 3; + branch_subroutine(Xecth_func, 1, masks); + } + case ST_ENDCATCH: + resetstackpointer(stmt->expr->data.objref); + break; + case ST_ASM: + if (stmt->expr) { + if (((InlineAsm *) stmt->expr)->flags & IAFlag1) { + CError_FATAL(2076); + } else { + branch_label(makepclabel()); + InlineAsm_TranslateIRtoPCode(stmt); + } + } + break; + case ST_BEGINLOOP: + CError_FATAL(2086); + break; + case ST_ENDLOOP: + CError_FATAL(2090); + break; + default: + CError_FATAL(2094); + } + check_temp_registers(); + } + + close_temp_registers(); + labelstatement(returnlabel); + + current_statement = NULL; + + epilogue = pclastblock; + pclastblock->flags |= fIsEpilogue; + + pccomputepredecessors(); + deleteunreachableblocks(); + + if (copts.report_heap_info > 0) + CodeGen_reportheapinfo(0, CMangler_GetLinkName(func)->name, "before optimization"); + + if (copts.optimizationlevel > 0 && !disable_optimizer) + globallyoptimizepcode(func); + else + pclistblocks(CMangler_GetLinkName(func)->name, "INITIAL CODE"); + + if (copts.schedule_factor == 2) { + if (copts.peephole) + peepholemergeblocks(func, 0); + if (copts.debuglisting) + pclistblocks_start_scheduler(CMangler_GetLinkName(func)->name, "BEFORE SCHEDULING"); + scheduleinstructions(0); + if (copts.debuglisting) + pclistblocks_end_scheduler(); + if (copts.debuglisting) + pclistblocks(CMangler_GetLinkName(func)->name, "AFTER INSTRUCTION SCHEDULING"); + } + + if (copts.peephole) { + if (copts.schedule_factor == 0 && copts.optimizationlevel > 1) + peepholemergeblocks(func, 0); + peepholeoptimizeforward(func); + if (copts.debuglisting) + pclistblocks(CMangler_GetLinkName(func)->name, "AFTER PEEPHOLE FORWARD"); + } + + allocate_temporaries(); + colorinstructions(func); + if (copts.debuglisting) + pclistblocks(CMangler_GetLinkName(func)->name, "AFTER REGISTER COLORING"); + + if (copts.optimizationlevel > 1 && !disable_optimizer) { + removecommonsubexpressions(func, 1); + if (removedcommonsubexpressions && copts.debuglisting) + pclistblocks(CMangler_GetLinkName(func)->name, "AFTER VALUE NUMBERING (POST COLORING)"); + } + + compute_frame_sizes(); + generate_prologue(prologue, has_varargs); + if (copts.profile) + exitprofiler(); + generate_epilogue(epilogue, 1); + + if (copts.debuglisting) + pclistblocks(CMangler_GetLinkName(func)->name, "AFTER GENERATING EPILOGUE, PROLOGUE"); + + if (copts.peephole) { + if (copts.schedule_factor) { + peepholemergeblocks(func, 1); + if (copts.debuglisting) + pclistblocks(CMangler_GetLinkName(func)->name, "AFTER MERGING EPILOGUE, PROLOGUE"); + } + peepholeoptimizepcode(func); + if (copts.debuglisting) + pclistblocks(CMangler_GetLinkName(func)->name, "AFTER PEEPHOLE OPTIMIZATION"); + } + + if (copts.schedule_factor) { + if (copts.debuglisting) + pclistblocks_start_scheduler(CMangler_GetLinkName(func)->name, "BEFORE SCHEDULING"); + scheduleinstructions(1); + if (copts.debuglisting) + pclistblocks_end_scheduler(); + if (copts.debuglisting) + pclistblocks(CMangler_GetLinkName(func)->name, "FINAL CODE AFTER INSTRUCTION SCHEDULING"); + } else { + if (copts.debuglisting) + pclistblocks(CMangler_GetLinkName(func)->name, "FINAL CODE"); + } + + size = assemblefunction(func, NULL); + dumpswitchtables(func); + dumpcodelabels(func); + if (callOnModuleBind) + ObjGen_DeclareInitFunction(func); + + pic_base_pcodelabel = NULL; + pic_base_reg = 0; + if (copts.exceptions && requires_frame) + dumpexceptiontables(func, size); + + if (copts.report_heap_info > 0) + CodeGen_reportheapinfo(0, CMangler_GetLinkName(func)->name, "finished"); + + if (saveheaperror) + setheaperror(saveheaperror); +} + +enum { + reg3 = 3, + reg4 = 4 +}; + +void CodeGen_GenVDispatchThunk(Object *thunkobj, Object *obj, SInt32 a, SInt32 b, SInt32 c) { + Boolean save_debug; + Boolean save_peephole; + Boolean save_traceback; + char reg; + + save_debug = copts.filesyminfo; + save_peephole = copts.peephole; + save_traceback = copts.traceback; + + CodeGen_InitialSanityCheck(); + CError_ASSERT(2270, b == 0); + + initpcode(); + makepcblock(); + + if (a) { + reg = CMach_PassResultInHiddenArg(TYPE_FUNC(obj->type)->functype) ? reg4 : reg3; + + if (c >= 0) { + if (!FITS_IN_SHORT(c)) { + emitpcode(PC_ADDIS, 11, 0, 0, HIGH_PART(c)); + if (c) + emitpcode(PC_ADDI, 11, 11, 0, LOW_PART(c)); + } else { + emitpcode(PC_ADDI, 11, 0, 0, c); + } + emitpcode(PC_LWZX, 11, reg, 11); + emitpcode(PC_ADD, reg, reg, 11); + } + + if (!FITS_IN_SHORT(a)) { + emitpcode(PC_ADDIS, reg, reg, 0, HIGH_PART(a)); + if (a) + emitpcode(PC_ADDI, reg, reg, 0, LOW_PART(a)); + } else { + emitpcode(PC_ADDI, reg, reg, 0, a); + } + } + + emitpcode(PC_B, 0, obj); + + copts.filesyminfo = 0; + copts.peephole = 0; + copts.traceback = 0; + assemblefunction(thunkobj, NULL); + copts.filesyminfo = save_debug; + copts.peephole = save_peephole; + copts.traceback = save_traceback; +} + +void CodeGen_SetupRuntimeObjects(void) { + setupaddressing(); + dyld_stub_binding_helper = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_cvt_fp2unsigned = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_profile_entry = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_profile_exit = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_div2i = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_div2u = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_mod2i = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_mod2u = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_shr2i = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_shr2u = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_shl2i = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_cvt_ull_dbl = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_cvt_sll_dbl = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_cvt_ull_flt = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_cvt_sll_flt = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + rt_cvt_dbl_usll = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + Intrinsics_SetupRuntimeObjects(); +} + +Boolean CodeGen_ReInitRuntimeObjects(Boolean is_precompiler) { + dyld_stub_binding_helper->name = GetHashNameNodeExport("dyld_stub_binding_helper"); + dyld_stub_binding_helper->u.func.linkname = dyld_stub_binding_helper->name; + + rt_cvt_fp2unsigned->name = GetHashNameNodeExport("__cvt_fp2unsigned"); + + rt_profile_entry->name = GetHashNameNodeExport("mcount"); + rt_profile_entry->u.func.linkname = rt_profile_entry->name; + + rt_profile_exit->name = GetHashNameNodeExport("profile_exit"); + + rt_div2i->name = GetHashNameNodeExport("__div2i"); + rt_div2u->name = GetHashNameNodeExport("__div2u"); + rt_mod2i->name = GetHashNameNodeExport("__mod2i"); + rt_mod2u->name = GetHashNameNodeExport("__mod2u"); + rt_shr2i->name = GetHashNameNodeExport("__shr2i"); + rt_shr2u->name = GetHashNameNodeExport("__shr2u"); + rt_shl2i->name = GetHashNameNodeExport("__shl2i"); + + rt_cvt_ull_dbl->name = GetHashNameNodeExport("__cvt_ull_dbl"); + rt_cvt_sll_dbl->name = GetHashNameNodeExport("__cvt_sll_dbl"); + rt_cvt_ull_flt->name = GetHashNameNodeExport("__cvt_ull_flt"); + rt_cvt_sll_flt->name = GetHashNameNodeExport("__cvt_sll_flt"); + rt_cvt_dbl_usll->name = GetHashNameNodeExport("__cvt_dbl_usll"); + + CMach_ReInitRuntimeObjects(); + return Intrinsics_ReInitRuntimeObjects(is_precompiler); +} + +Boolean CodeGen_IsPublicRuntimeObject(Object *obj) { + return Intrinsics_IsPublicRuntimeObject(obj); +} + +void CodeGen_SOMStub(Object *a, Object *b, Object *c, SInt32 offset) { + Boolean save_debug; + Boolean save_peephole; + Boolean save_traceback; + Object *tmp; + Operand opnd; + + save_debug = copts.filesyminfo; + save_peephole = copts.peephole; + save_traceback = copts.traceback; + + CodeGen_InitialSanityCheck(); + memclrw(&opnd, sizeof(Operand)); + CError_ASSERT(2528, offset <= 0x7FFF); + + initpcode(); + makepcblock(); + + tmp = createIndirect(c, 1, 1); + if (tmp) { + opnd.optype = OpndType_Symbol; + opnd.object = tmp; + indirect(&opnd, NULL); + } else { + opnd.optype = OpndType_Symbol; + opnd.object = c; + } + + if (opnd.optype != OpndType_GPR) + Coerce_to_register(&opnd, TYPE(&void_ptr), 12); + + if (opnd.optype != OpndType_GPR) { + CError_FATAL(2562); + } else if (opnd.reg != 12) { + emitpcode(PC_MR, 12, opnd.reg); + } + + load_store_register(PC_LWZ, 12, 12, NULL, (short) offset); + emitpcode(PC_B, 0, b); + + copts.filesyminfo = 0; + copts.peephole = 0; + copts.traceback = 0; + assemblefunction(a, NULL); + copts.filesyminfo = save_debug; + copts.peephole = save_peephole; + copts.traceback = save_traceback; +} + +void CodeGen_ParseDeclSpec(HashNameNode *identifier, DeclInfo *declinfo) { + if (!strcmp(identifier->name, "private_extern")) { + declinfo->storageclass = TK_EXTERN; + declinfo->exportflags = EXPORT_FLAGS_INTERNAL; + } else { + CError_Error(CErrorStr176); + } +} + +static void CodeGen_EOLCheck(void) { + short t; + + if (plex() != TK_EOL) { + CPrep_Error(CErrorStr113); + do { + t = plex(); + } while (t != TK_EOL && t != 0); + } +} + +static void schedule_for(int what) { + CPrep_PushOption(OPT_OFFSET(scheduling), what); + if (copts.schedule_factor == 0) + CPrep_PushOption(OPT_OFFSET(schedule_factor), 2); +} + +static void pragma_scheduling(void) { + Boolean *flag; + int cpu; + + tk = plex(); + if (tk == TK_IDENTIFIER) { + flag = &copts.altivec_model; + if (!strcmp(tkidentifier->name, "vger")) { + schedule_for(10); + return; + } else if (!strcmp(tkidentifier->name, "altivec")) { + schedule_for(7); + return; + } else if (!strcmp(tkidentifier->name, "reset")) { + CPrep_PopOption(OPT_OFFSET(scheduling)); + CPrep_PopOption(OPT_OFFSET(schedule_factor)); + return; + } else if (!strcmp(tkidentifier->name, "off")) { + CPrep_PushOption(OPT_OFFSET(schedule_factor), 0); + return; + } else if (!strcmp(tkidentifier->name, "once")) { + CPrep_PushOption(OPT_OFFSET(schedule_factor), 1); + return; + } else if (!strcmp(tkidentifier->name, "twice")) { + CPrep_PushOption(OPT_OFFSET(schedule_factor), 2); + return; + } else if (!strcmp(tkidentifier->name, "on")) { + CPrep_PushOption(OPT_OFFSET(schedule_factor), 2); + return; + } else if (!*flag) { + if (!strcmp(tkidentifier->name, "603e")) { + schedule_for(5); + return; + } else if (!strcmp(tkidentifier->name, "604e")) { + schedule_for(6); + return; + } else if (!strcmp(tkidentifier->name, "PPC603e")) { + schedule_for(5); + return; + } else if (!strcmp(tkidentifier->name, "PPC604e")) { + schedule_for(6); + return; + } + } else { + PPCError_Error(PPCErrorStr115); + return; + } + CPrep_Error(CErrorStr186); + return; + } + + if (tk == TK_INTCONST) { + switch (CInt64_GetULong(&tkintconst)) { + case 601: + cpu = 1; + break; + case 603: + cpu = 2; + break; + case 604: + cpu = 3; + break; + case 750: + cpu = 4; + break; + case 7400: + cpu = 7; + break; + case 7450: + cpu = 10; + break; + case 8240: + case 8260: + cpu = 5; + break; + case 801: + case 821: + case 823: + case 850: + case 860: + cpu = 9; + break; + default: + CPrep_Error(CErrorStr186); + return; + } + schedule_for(cpu); + return; + } + + if (copts.warn_illpragma) + CPrep_Warning(CErrorStr186); +} + +static SInt32 CodeGen_ParseLongIntegerORonORoff(void) { + SInt32 result; + short t; + + result = 0; + t = plex(); + if (t == TK_INTCONST) { + if (!tkintconst.hi) + result = CInt64_GetULong(&tkintconst); + else + CPrep_Error(CErrorStr154); + CodeGen_EOLCheck(); + } else if (t == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "on")) { + CodeGen_EOLCheck(); + return 1; + } + if (!strcmp(tkidentifier->name, "off")) { + CodeGen_EOLCheck(); + return 0; + } + if (copts.warn_illpragma) + CPrep_Warning(CErrorStr186); + return 0; + } else { + if (copts.warn_illpragma) + CPrep_Warning(CErrorStr186); + } + + return result; +} + +void CodeGen_ParsePragma(HashNameNode *name) { + short t; + SInt32 value; + char *str; + NameSpace *nspace; + NameSpaceObjectList *list; + + if (!strcmp(name->name, "debuglisting")) { + copts.debuglisting = CodeGen_ParseLongIntegerORonORoff(); + return; + } + + if (!strcmp(name->name, "report_heap_info")) { + t = plex(); + if (t == TK_INTCONST) { + copts.report_heap_info = CInt64_GetULong(&tkintconst); + if (copts.report_heap_info < 0) { + copts.report_heap_info = 0; + CError_Error(CErrorStr186); + } + } else if (t == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + copts.report_heap_info = 0; + } else if (!strcmp(tkidentifier->name, "on")) { + copts.report_heap_info = 1; + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "scheduling")) { + pragma_scheduling(); + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "ppc_unroll_speculative")) { + if (plex() == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + copts.unroll_speculative = 0; + } else if (!strcmp(tkidentifier->name, "on")) { + copts.unroll_speculative = 1; + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "ppc_unroll_instructions_limit")) { + t = plex(); + if (t == TK_INTCONST) { + copts.unroll_instr_limit = CInt64_GetULong(&tkintconst); + if (copts.unroll_instr_limit < 0) { + copts.unroll_instr_limit = 0; + CError_Error(CErrorStr186); + } + } else if (t == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + copts.unroll_instr_limit = 0; + } else if (!strcmp(tkidentifier->name, "on")) { + copts.unroll_instr_limit = 70; + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "gen_fsel")) { + t = plex(); + if (t == TK_INTCONST) { + value = CInt64_GetULong(&tkintconst); + if (value < 0) { + copts.gen_fsel = 0; + CError_Error(CErrorStr186); + } else if (value > 255) { + copts.gen_fsel = 255; + CError_Error(CErrorStr186); + } else { + copts.gen_fsel = value; + } + } else if (t == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + copts.gen_fsel = 0; + } else if (!strcmp(tkidentifier->name, "on")) { + copts.gen_fsel = 10; + } else if (!strcmp(tkidentifier->name, "always")) { + copts.gen_fsel = 255; + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "ppc_unroll_factor_limit")) { + t = plex(); + if (t == TK_INTCONST) { + copts.unroll_factor_limit = CInt64_GetULong(&tkintconst); + if (copts.unroll_factor_limit < 0) { + copts.unroll_factor_limit = 0; + CError_Error(CErrorStr186); + } + } else if (t == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + copts.unroll_factor_limit = 0; + } else if (!strcmp(tkidentifier->name, "on")) { + copts.unroll_factor_limit = 10; + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "altivec_model")) { + if (plex() == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + copts.altivec_model = 0; + } else if (!strcmp(tkidentifier->name, "on")) { + copts.altivec_model = 1; + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "altivec_vrsave")) { + if (plex() == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + CPrep_PushOption(OPT_OFFSET(altivec_vrsave), 0); + } else if (!strcmp(tkidentifier->name, "on")) { + CPrep_PushOption(OPT_OFFSET(altivec_vrsave), 1); + } else if (!strcmp(tkidentifier->name, "allon")) { + CPrep_PushOption(OPT_OFFSET(altivec_vrsave), 2); + } else if (!strcmp(tkidentifier->name, "reset")) { + CPrep_PopOption(OPT_OFFSET(altivec_vrsave)); + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "function_align")) { + t = plex(); + if (t == TK_INTCONST) { + value = CInt64_GetULong(&tkintconst); + switch (value) { + case 4: + case 8: + case 16: + case 32: + case 64: + case 128: + CPrep_PushOption(OPT_OFFSET(function_align), value); + break; + default: + PPCError_Warning(PPCErrorStr161); + CodeGen_EOLCheck(); + return; + } + } else if (t == TK_IDENTIFIER && !strcmp(tkidentifier->name, "reset")) { + CPrep_PopOption(OPT_OFFSET(function_align)); + } else { + PPCError_Warning(PPCErrorStr161); + } + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "processor")) { + if (cscope_currentfunc) { + PPCError_Warning(PPCErrorStr156, "pragma processor"); + return; + } + t = plex(); + if (t == TK_INTCONST) { + switch (CInt64_GetULong(&tkintconst)) { + case 401: + copts.processor = 0; + break; + case 403: + copts.processor = 1; + break; + case 505: + copts.processor = 2; + break; + case 509: + copts.processor = 3; + break; + case 555: + copts.processor = 4; + break; + case 556: + copts.processor = 25; + break; + case 565: + copts.processor = 26; + break; + case 601: + copts.processor = 5; + break; + case 602: + copts.processor = 6; + break; + case 8240: + copts.processor = 18; + break; + case 8260: + copts.processor = 19; + break; + case 603: + copts.processor = 7; + break; + case 604: + copts.processor = 9; + break; + case 740: + copts.processor = 11; + break; + case 750: + copts.processor = 12; + break; + case 801: + copts.processor = 13; + break; + case 821: + copts.processor = 14; + break; + case 823: + copts.processor = 15; + break; + case 850: + copts.processor = 16; + break; + case 860: + copts.processor = 17; + break; + case 7400: + copts.processor = 21; + break; + default: + PPCError_Warning(PPCErrorStr208); + CodeGen_EOLCheck(); + return; + } + } else if (t == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "generic")) + copts.processor = 20; + else if (!strcmp(tkidentifier->name, "603e")) + copts.processor = 8; + else if (!strcmp(tkidentifier->name, "604e")) + copts.processor = 10; + else if (!strcmp(tkidentifier->name, "PPC603e")) + copts.processor = 8; + else if (!strcmp(tkidentifier->name, "PPC604e")) + copts.processor = 10; + else + PPCError_Warning(PPCErrorStr208); + } else { + PPCError_Warning(PPCErrorStr208); + } + + if ((str = CMach_GetCPU())) + CPrep_InsertSpecialMacro(&ppc_cpu, str); + + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "min_struct_alignment")) { + t = plex(); + if (t == TK_INTCONST) { + value = CInt64_GetULong(&tkintconst); + switch (value) { + case 4: + case 8: + case 16: + case 32: + case 64: + case 128: + CPrep_PushOption(OPT_OFFSET(min_struct_alignment), value); + break; + default: + PPCError_Warning(PPCErrorStr191); + CodeGen_EOLCheck(); + return; + } + } else if (t == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "reset")) + CPrep_PopOption(OPT_OFFSET(min_struct_alignment)); + else if (!strcmp(tkidentifier->name, "on")) + CPrep_PushOption(OPT_OFFSET(min_struct_alignment), 4); + else if (!strcmp(tkidentifier->name, "off")) + CPrep_PushOption(OPT_OFFSET(min_struct_alignment), 1); + } else { + PPCError_Warning(PPCErrorStr161); + } + + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "tvectors")) { + if (plex() == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + no_descriptors = 1; + } else if (!strcmp(tkidentifier->name, "on")) { + no_descriptors = 0; + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "dynamic")) { + if (plex() == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + copts.codegen_dynamic = 0; + copts.codegen_pic = 0; + } else if (!strcmp(tkidentifier->name, "on")) { + copts.codegen_dynamic = 1; + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "pic")) { + if (plex() == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + copts.codegen_pic = 0; + } else if (!strcmp(tkidentifier->name, "on")) { + copts.codegen_pic = 1; + if (!copts.codegen_dynamic) { + PPCError_Error(PPCErrorStr189); + copts.codegen_pic = 0; + } + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "implicit_templates")) { + if (plex() == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + copts.no_implicit_templates = 1; + } else if (!strcmp(tkidentifier->name, "on")) { + copts.no_implicit_templates = 0; + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "common")) { + if (plex() == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "off")) { + copts.no_common = 1; + } else if (!strcmp(tkidentifier->name, "on")) { + copts.no_common = 0; + } else { + CError_Error(CErrorStr186); + return; + } + } else { + CError_Error(CErrorStr186); + } + + CodeGen_EOLCheck(); + return; + } + + if (!strcmp(name->name, "CALL_ON_MODULE_BIND")) { + if (plex() == TK_IDENTIFIER) { + for (nspace = cscope_current; nspace; nspace = nspace->parent) { + list = CScope_GetLocalObject(nspace, tkidentifier); + if (list && list->object->otype == OT_OBJECT && OBJECT(list->object)->datatype == DFUNC) { + ObjGen_DeclareInitFunction(OBJECT(list->object)); + break; + } + } + } else { + CError_Error(CErrorStr186); + } + + CodeGen_EOLCheck(); + return; + } + + if (copts.warn_illpragma) + CPrep_Warning(CErrorStr186); + + if (plex() != TK_EOL) { + do { + t = plex(); + } while (t != TK_EOL && t != 0); + } +} + +void CodeGen_UpdateObject(Object *object) { + if (object->datatype == DDATA && object->section == SECT_DEFAULT && object == rt_ptmf_null) + object->sclass = TK_EXTERN; +} + +void CodeGen_UpdateBackEndOptions(void) { + copts.globaloptimizer = 1; +} + +SInt32 CodeGen_objc_method_self_offset(ObjCMethod *meth) { + SInt32 size; + + if (!meth->return_type) { + size = 4; + } else if ( + IS_TYPE_ARRAY(meth->return_type) || + IS_TYPE_NONVECTOR_STRUCT(meth->return_type) || + IS_TYPE_CLASS(meth->return_type) || + IS_TYPE_12BYTES_MEMBERPOINTER(meth->return_type) + ) + { + size = 8; + } else { + size = meth->return_type->size; + } + + if (size == 0) + size = 1; + + return (size + 3) & ~3; +} + +SInt32 CodeGen_objc_method_sel_offset(ObjCMethod *meth) { + return (CodeGen_objc_method_self_offset(meth) + 7) & ~3; +} + +SInt32 CodeGen_objc_method_arg_offset(ObjCMethod *meth, ObjCMethodArg *arg) { + SInt32 pos; + ObjCMethodArg *scan; + + pos = CodeGen_objc_method_sel_offset(meth) + 4; + for (scan = meth->selector_args; scan; scan = scan->next) { + if (scan == arg) + return pos; + + if (scan->type == NULL) + pos += 4; + else + pos += scan->type->size; + + pos = (pos + 3) & ~3; + } + + return 0; +} + +SInt32 CodeGen_objc_method_args_size(ObjCMethod *meth) { + SInt32 size; + ObjCMethodArg *scan; + + size = CodeGen_objc_method_self_offset(meth); + for (scan = meth->selector_args; scan; scan = scan->next) { + if (scan->next == NULL && scan->type == NULL) + return size; + + size = (size + 3) & ~3; + if (scan->type == NULL) + size += 4; + else + size += scan->type->size; + } + + return size; +} + +ENode *CodeGen_HandleIntrinsicCall(Object *func, ENodeList *arg_exprs) { + return Intrinsics_HandleIntrinsicCall(func, arg_exprs); +} + +ENode *CodeGen_HandleTypeCast(ENode *expr, Type *type, UInt32 qual) { + short flags; + + if (copts.altivec_model) { + flags = qual & ENODE_FLAG_QUALS; + if (IS_TYPE_STRUCT(type) && IS_TYPE_STRUCT(expr->rtype) && expr->flags == flags) { + switch (TYPE_STRUCT(type)->stype) { + case STRUCT_VECTOR_UCHAR: + case STRUCT_VECTOR_SCHAR: + case STRUCT_VECTOR_BCHAR: + case STRUCT_VECTOR_USHORT: + case STRUCT_VECTOR_SSHORT: + case STRUCT_VECTOR_BSHORT: + case STRUCT_VECTOR_UINT: + case STRUCT_VECTOR_SINT: + case STRUCT_VECTOR_BINT: + case STRUCT_VECTOR_FLOAT: + case STRUCT_VECTOR_PIXEL: + expr = makemonadicnode(expr, ETYPCON); + expr->rtype = type; + expr->flags = flags; + return expr; + } + } + } + + return NULL; +} + +short CodeGen_AssignCheck(const ENode *expr, const Type *type, Boolean flag1, Boolean flag2) { + short result; + const Type *exprtype = expr->rtype; + + if ( + copts.altivec_model && + IS_TYPE_VECTOR(type) && + IS_TYPE_VECTOR(exprtype) && + TYPE_STRUCT(type)->stype == TYPE_STRUCT(exprtype)->stype + ) + result = CheckResult3; + else + result = CheckResult0; + return result; +} + +Boolean CodeGen_CollapseVectorExpression(ENode *expr, MWVector128 *vec, Type *type) { + // this function is a mess and needs lots of fixing + Boolean result; + //int count; + int i; + ENode *escan; + + result = 0; + for (i = 0; i < 4; i++) + vec->ul[i] = 0; + + if (ENODE_IS(expr, ECOMMA)) { + i = 0; + escan = expr; + while (ENODE_IS(escan, ECOMMA)) { + i++; + escan = escan->data.diadic.left; + } + switch (TYPE_STRUCT(type)->stype) { + case STRUCT_VECTOR_UCHAR: + case STRUCT_VECTOR_SCHAR: + case STRUCT_VECTOR_BCHAR: + if (i < 15) { + PPCError_Error(PPCErrorStr110, type, 0); + } else if (i > 15) { + PPCError_Error(PPCErrorStr111, type, 0); + } else { + escan = expr; + i = 15; + while (ENODE_IS(escan, ECOMMA)) { + CInt64 v; + expr = escan->data.diadic.right; + v = expr->data.intval; + if (!ENODE_IS(expr, EINTCONST)) { + PPCError_Error(PPCErrorStr112); + break; + } + + if (copts.pedantic) { + if (TYPE_STRUCT(type)->stype == STRUCT_VECTOR_UCHAR) { + if (!CInt64_IsInURange(v, 1)) + PPCError_Warning(PPCErrorStr113, type, 0); + } else { + if (!CInt64_IsInRange(v, 1)) + PPCError_Warning(PPCErrorStr113, type, 0); + } + } + + vec->uc[i--] = (UInt8) v.lo; + escan = escan->data.diadic.left; + } + + if (ENODE_IS(escan, EINTCONST)) { + CInt64 v = escan->data.intval; + if (copts.pedantic) { + if (TYPE_STRUCT(type)->stype == STRUCT_VECTOR_UCHAR) { + if (!CInt64_IsInURange(v, 1)) + PPCError_Warning(PPCErrorStr113, type, 0); + } else { + if (!CInt64_IsInRange(v, 1)) + PPCError_Warning(PPCErrorStr113, type, 0); + } + } + vec->uc[0] = (UInt8) v.lo; + } else { + PPCError_Error(PPCErrorStr112); + break; + } + result = 1; + } + break; + + case STRUCT_VECTOR_USHORT: + case STRUCT_VECTOR_SSHORT: + case STRUCT_VECTOR_BSHORT: + case STRUCT_VECTOR_PIXEL: + if (i < 7) { + PPCError_Error(PPCErrorStr110, type, 0); + } else if (i > 7) { + PPCError_Error(PPCErrorStr111, type, 0); + } else { + escan = expr; + i = 7; + while (ENODE_IS(escan, ECOMMA)) { + ENode *e = escan->data.diadic.right; + CInt64 v = e->data.intval; + if (!ENODE_IS(e, EINTCONST)) { + PPCError_Error(PPCErrorStr112); + break; + } + + if (copts.pedantic) { + if (TYPE_STRUCT(type)->stype == STRUCT_VECTOR_USHORT || TYPE_STRUCT(type)->stype == STRUCT_VECTOR_PIXEL) { + if (!CInt64_IsInURange(v, 2)) + PPCError_Warning(PPCErrorStr113, type, 0); + } else { + if (!CInt64_IsInRange(v, 2)) + PPCError_Warning(PPCErrorStr113, type, 0); + } + } + + vec->us[i--] = (UInt16) e->data.intval.lo; + escan = escan->data.diadic.left; + } + + if (ENODE_IS(escan, EINTCONST)) { + CInt64 v = escan->data.intval; + if (copts.pedantic) { + if (TYPE_STRUCT(type)->stype == STRUCT_VECTOR_USHORT || TYPE_STRUCT(type)->stype == STRUCT_VECTOR_PIXEL) { + if (!CInt64_IsInURange(v, 2)) + PPCError_Warning(PPCErrorStr113, type, 0); + } else { + if (!CInt64_IsInRange(v, 2)) + PPCError_Warning(PPCErrorStr113, type, 0); + } + } + vec->us[0] = (UInt16) v.lo; + } else { + PPCError_Error(PPCErrorStr112); + break; + } + result = 1; + } + break; + + case STRUCT_VECTOR_UINT: + case STRUCT_VECTOR_SINT: + case STRUCT_VECTOR_BINT: + if (i < 3) { + PPCError_Error(PPCErrorStr110, type, 0); + } else if (i > 3) { + PPCError_Error(PPCErrorStr111, type, 0); + } else { + escan = expr; + i = 3; + while (ENODE_IS(escan, ECOMMA)) { + CInt64 v; + expr = escan->data.diadic.right; + v = expr->data.intval; + if (!ENODE_IS(expr, EINTCONST)) { + PPCError_Error(PPCErrorStr112); + break; + } + + if (copts.pedantic) { + if (TYPE_STRUCT(type)->stype == STRUCT_VECTOR_UINT) { + if (!CInt64_IsInURange(v, 4)) + PPCError_Warning(PPCErrorStr113, type, 0); + } else { + if (!CInt64_IsInRange(v, 4)) + PPCError_Warning(PPCErrorStr113, type, 0); + } + } + + vec->ul[i--] = expr->data.intval.lo; + escan = escan->data.diadic.left; + } + + if (ENODE_IS(escan, EINTCONST)) { + CInt64 v = escan->data.intval; + if (copts.pedantic) { + if (TYPE_STRUCT(type)->stype == STRUCT_VECTOR_UINT) { + if (!CInt64_IsInURange(v, 4)) + PPCError_Warning(PPCErrorStr113, type, 0); + } else { + if (!CInt64_IsInRange(v, 4)) + PPCError_Warning(PPCErrorStr113, type, 0); + } + } + vec->ul[0] = v.lo; + } else { + PPCError_Error(PPCErrorStr112); + break; + } + result = 1; + } + break; + + case STRUCT_VECTOR_FLOAT: + if (i < 3) { + PPCError_Error(PPCErrorStr110, type, 0); + } else if (i > 3) { + PPCError_Error(PPCErrorStr111, type, 0); + } else { + Float fv; + escan = expr; + i = 3; + while (ENODE_IS(escan, ECOMMA)) { + expr = escan->data.diadic.right; + if (ENODE_IS(expr, EFLOATCONST)) { + fv = expr->data.floatval; + } else if (ENODE_IS(escan->data.diadic.right, EINTCONST)) { + fv = CMach_CalcFloatConvertFromInt(expr->rtype, + expr->data.intval); + } else { + PPCError_Error(PPCErrorStr112); + break; + } + + CMach_InitFloatMem(TYPE(&stfloat), fv, &vec->f[i]); + i--; + escan = escan->data.diadic.left; + } + + if (ENODE_IS(escan, EFLOATCONST)) { + fv = escan->data.floatval; + } else if (ENODE_IS(escan, EINTCONST)) { + fv = CMach_CalcFloatConvertFromInt(escan->rtype, escan->data.intval); + } else { + PPCError_Error(PPCErrorStr112); + break; + } + + CMach_InitFloatMem(TYPE(&stfloat), fv, &vec->f[0]); + result = 1; + } + break; + } + } else if (ENODE_IS(expr, EINTCONST)) { + int i = 0; + switch (TYPE_STRUCT(type)->stype) { + case STRUCT_VECTOR_UCHAR: + case STRUCT_VECTOR_SCHAR: + case STRUCT_VECTOR_BCHAR: + { + CInt64 v = expr->data.intval; + if (copts.pedantic) { + if (TYPE_STRUCT(type)->stype == STRUCT_VECTOR_UCHAR) { + if (!CInt64_IsInURange(v, 1)) + PPCError_Warning(PPCErrorStr113, type, 0); + } else { + if (!CInt64_IsInRange(v, 1)) + PPCError_Warning(PPCErrorStr113, type, 0); + } + } + while (i < 16) + vec->uc[i++] = (UInt8) v.lo; + result = 1; + break; + } + case STRUCT_VECTOR_USHORT: + case STRUCT_VECTOR_SSHORT: + case STRUCT_VECTOR_BSHORT: + case STRUCT_VECTOR_PIXEL: + { + CInt64 v = expr->data.intval; + if (copts.pedantic) { + if (TYPE_STRUCT(type)->stype == STRUCT_VECTOR_USHORT || TYPE_STRUCT(type)->stype == STRUCT_VECTOR_PIXEL) { + if (!CInt64_IsInURange(v, 2)) + PPCError_Warning(PPCErrorStr113, type, 0); + } else { + if (!CInt64_IsInRange(v, 2)) + PPCError_Warning(PPCErrorStr113, type, 0); + } + } + while (i < 8) + vec->us[i++] = (UInt16) v.lo; + result = 1; + break; + } + case STRUCT_VECTOR_UINT: + case STRUCT_VECTOR_SINT: + case STRUCT_VECTOR_BINT: + { + CInt64 v = expr->data.intval; + if (copts.pedantic) { + if (TYPE_STRUCT(type)->stype == STRUCT_VECTOR_UINT) { + if (!CInt64_IsInURange(v, 4)) + PPCError_Warning(PPCErrorStr113, type, 0); + } else { + if (!CInt64_IsInRange(v, 4)) + PPCError_Warning(PPCErrorStr113, type, 0); + } + } + while (i < 4) + vec->ul[i++] = v.lo; + result = 1; + break; + } + case STRUCT_VECTOR_FLOAT: + { + Float fv; + if (!CInt64_IsInRange(expr->data.intval, 4)) { + PPCError_Error(PPCErrorStr112); + break; + } + fv = CMach_CalcFloatConvertFromInt(expr->rtype, expr->data.intval); + for (; i < 4; i++) + CMach_InitFloatMem(TYPE(&stfloat), fv, &vec->f[i]); + result = 1; + break; + } + default: + PPCError_Error(PPCErrorStr112); + } + } else if (ENODE_IS(expr, EFLOATCONST)) { + switch (TYPE_STRUCT(type)->stype) { + default: + PPCError_Error(PPCErrorStr112); + break; + case STRUCT_VECTOR_FLOAT: + { + Float fv; + i = 0; + fv = expr->data.floatval; + CMach_InitFloatMem(TYPE(&stfloat), fv, &vec->f[0]); + while (i < 4) + CMach_InitFloatMem(TYPE(&stfloat), fv, &vec->f[i++]); + result = 1; + break; + } + } + } else if (ENODE_IS2(expr, EINDIRECT, EFUNCCALL)) { + if (!IS_TYPE_STRUCT(expr->rtype)) + PPCError_Error(PPCErrorStr112); + } else if (!ENODE_IS(expr, EVECTOR128CONST)) { + PPCError_Error(PPCErrorStr112); + } + + return result; +} + +void CodeGen_InsertSpecialMacros(void) { + char *str; + + CPrep_InsertSpecialMacro(&vecM, "__VEC__"); + CPrep_InsertSpecialMacro(&altivecM, "__ALTIVEC__"); + CPrep_InsertSpecialMacro(&powcM, "powerc"); + CPrep_InsertSpecialMacro(&__powcM, "__powerc"); + CPrep_InsertSpecialMacro(&hostM, "__POWERPC__"); + CPrep_InsertSpecialMacro(&_ppc_M, "__ppc__"); + + CPrep_InsertSpecialMacro(&bendM, "__BIG_ENDIAN__"); + + if ((str = CMach_GetCPU())) + CPrep_InsertSpecialMacro(&ppc_cpu, str); + + CPrep_InsertSpecialMacro(&profM, "__profile__"); + CPrep_InsertSpecialMacro(&longI, "__fourbyteints__"); + CPrep_InsertSpecialMacro(&IEEED, "__IEEEdoubles__"); + CPrep_InsertSpecialMacro(&macM2, "__MACOS__"); + CPrep_InsertSpecialMacro(&appleM, "__APPLE__"); + CPrep_InsertSpecialMacro(&_machM, "__MACH__"); + CPrep_InsertSpecialMacro(&archM, "__ARCHITECTURE__"); + + if (copts.optimizationlevel > 0) + CPrep_InsertSpecialMacro(&optM, "__OPTIMIZE__"); + + if (copts.codegen_dynamic) + CPrep_InsertSpecialMacro(&dynM, "__DYNAMIC__"); + if (!copts.codegen_dynamic) + CPrep_InsertSpecialMacro(&dynM, "__STATIC__"); + + if (copts.oldalignment && copts.structalignment == AlignMode2_PPC) + CPrep_InsertSpecialMacro(&alignM, "__NATURAL_ALIGNMENT__"); + + if (!copts.ANSIstrict) + CPrep_InsertSpecialMacro(&ppcM, "ppc"); +} + +char *CodeGen_ExpandSpecialMacro(Macro *macro) { + if (macro == &vecM) return "10205"; + if (macro == &altivecM) return "100000000"; + if (macro == &powcM) return "1"; + if (macro == &__powcM) return "1"; + if (macro == &hostM) return "1"; + if (macro == &bendM) return "1"; + if (macro == &_ppc_M) return "1"; + if (CMach_GetCPU() && macro == &ppc_cpu) return "1"; + if (macro == &profM) return copts.profile ? "1" : "0"; + if (macro == &longI) return "1"; + if (macro == &IEEED) return "1"; + if (macro == &macM2) return "1"; + if (macro == &appleM) return "1"; + if (macro == &alignM) return "1"; + + if (macro == &optM) { + switch (copts.optimizationlevel) { + case 1: return "1"; + case 2: return "2"; + case 3: return "3"; + case 4: return "4"; + case 0: return "0"; + default: return "9"; + } + } + + if (macro == &_machM) return "1"; + if (macro == &archM) return "ppc"; + if (macro == &dynM) return "1"; + if (!copts.ANSIstrict && macro == &ppcM) return "1"; + if (macro == &_ppc_M) return "1"; + + CError_FATAL(4801); + return "0"; +} + +void CodeGen_reportheapinfo(Boolean release_flag, char *name, char *text) { + HeapInfo o; + HeapInfo all; + + CTool_GetHeapInfo(&o, 3); + CTool_GetHeapInfo(&all, 5); + + if (release_flag) + releaseoheap(); + + PPCError_Message( + "%n:%u HEAP STATUS\n" + " optimizer: %i blocks, used: %i, free: %i, total: %i, largest free: %i\n" + " all: %i blocks, used: %i, free: %i, total: %i, largest free: %i", + name, text, + o.blocks, o.total_size - o.total_free, o.total_free, o.total_size, o.largest_free_block, + all.blocks, all.total_size - all.total_free, all.total_free, all.total_size, all.largest_free_block + ); +} + +static void CodeGen_heaperror(void) { + CodeGen_reportheapinfo(1, "?", "out of memory"); + if (saveheaperror) { + setheaperror(saveheaperror); + saveheaperror(); + } +} + +void CodeGen_InitialSanityCheck(void) { +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Exceptions.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Exceptions.c new file mode 100644 index 0000000..14627cf --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Exceptions.c @@ -0,0 +1,857 @@ +#include "compiler/Exceptions.h" +#include "compiler/CError.h" +#include "compiler/CException.h" +#include "compiler/CInit.h" +#include "compiler/CFunc.h" +#include "compiler/CParser.h" +#include "compiler/CompilerTools.h" +#include "compiler/ObjGenMachO.h" +#include "compiler/PCode.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StackFrame.h" +#include "compiler/objects.h" + +static PCAction *pc_actions; +static PCAction *last_pc_action; +EANode *DAG[EAT_NACTIONS]; +static GList exceptmodule; +static OLinkList *except_refs; +static OLinkList *last_except_ref; + +static EANode *makeEAnode(ExceptionAction *ea) { + EANode *prev; + EANode *node; + + for (node = DAG[ea->type]; node; node = node->dagListNext) { + if (node->action == ea) + return node; + } + + if (ea->prev) + prev = makeEAnode(ea->prev); + else + prev = NULL; + + for (node = DAG[ea->type]; node; node = node->dagListNext) { + if (node->prev == prev && CExcept_ActionCompare(node->action, ea)) + return node; + } + + node = lalloc(sizeof(EANode)); + node->prev = prev; + node->action = ea; + node->count = 0; + node->xE = 0; + + node->dagListNext = DAG[ea->type]; + DAG[ea->type] = node; + + if (prev) + prev->count++; + return node; +} + +static void addrelocation(Object *obj, SInt32 offset) { + OLinkList *ref; + + ref = lalloc(sizeof(OLinkList)); + ref->next = NULL; + ref->obj = obj; + ref->offset = offset; + ref->somevalue = 0; + if (except_refs) + last_except_ref->next = ref; + else + except_refs = ref; + last_except_ref = ref; +} + +#ifdef __MWERKS__ +#pragma options align=mac68k +#endif +typedef struct AABC { + UInt8 a; + UInt8 b; + UInt16 c; + UInt32 d; +} AABC; + +typedef struct AACC { + UInt8 a; + UInt8 b; + UInt32 c; + UInt32 d; +} AACC; + +typedef struct AABBC { + UInt8 a; + UInt8 b; + UInt16 c; + UInt16 d; + UInt32 e; +} AABBC; + +typedef struct AABCC { + UInt8 a; + UInt8 b; + UInt16 c; + UInt32 d; + UInt32 e; +} AABCC; + +typedef struct AACCC { + UInt8 a; + UInt8 b; + UInt32 c; + UInt32 d; + UInt32 e; +} AACCC; + +typedef struct AABBBC { + UInt8 a; + UInt8 b; + UInt16 c; + UInt16 d; + UInt16 e; + UInt32 f; +} AABBBC; + +typedef struct AABBCC { + UInt8 a; + UInt8 b; + UInt16 c; + UInt16 d; + UInt32 e; + UInt32 f; +} AABBCC; + +typedef struct AACCCC { + UInt8 a; + UInt8 b; + UInt32 c; + UInt32 d; + UInt32 e; + UInt32 f; +} AACCCC; + +typedef struct AABCCCC { + UInt8 a; + UInt8 b; + UInt16 c; + UInt32 d; + UInt32 e; + UInt32 f; + UInt32 g; +} AABCCCC; + +typedef struct AACCCCC { + UInt8 a; + UInt8 b; + UInt32 c; + UInt32 d; + UInt32 e; + UInt32 f; + UInt32 g; +} AACCCCC; + +typedef struct AAB { + UInt8 a; + UInt8 b; + UInt16 c; +} AAB; + +typedef struct AAC { + UInt8 a; + UInt8 b; + UInt32 c; +} AAC; + +typedef struct AA { + UInt8 a; + UInt8 b; +} AA; +#ifdef __MWERKS__ +#pragma options align=reset +#endif + +static void allocateactioninfo(EANode *node) { + ExceptionAction *ea; + SInt32 offset; + UInt32 flag26; + int reg; + int reg2; + + while (node && (node->xE == 0 || node->prev == NULL)) { + offset = exceptmodule.size; + if (node->xE == 0) + node->xE = offset; + + flag26 = node->prev ? 0 : 0x80; + + ea = node->action; + + switch (ea->type) { + case EAT_NOP: + CError_FATAL(146); + break; + case EAT_DESTROYLOCAL: { + if (local_is_16bit_offset(ea->data.destroy_local.local)) { + AABC e; + e.a = flag26 | 2; + e.b = 0; + e.c = CTool_EndianConvertWord16(local_offset_16(ea->data.destroy_local.local)); + e.d = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_local.dtor, offset + 4); + } else { + AACC e; + e.a = flag26 | 0x11; + e.b = 0; + e.c = CTool_EndianConvertWord32(local_offset_32(ea->data.destroy_local.local)); + e.d = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_local.dtor, offset + 6); + } + break; + } + case EAT_DESTROYLOCALCOND: { + reg = OBJECT_REG(ea->data.destroy_local_cond.cond); + if ( + (reg || local_is_16bit_offset(ea->data.destroy_local_cond.cond)) && + local_is_16bit_offset(ea->data.destroy_local_cond.local) + ) + { + AABBC e; + e.a = flag26 | 3; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_local_cond.cond)); + e.d = CTool_EndianConvertWord16(local_offset_16(ea->data.destroy_local_cond.local)); + e.e = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_local_cond.dtor, offset + 6); + } else { + AACCC e; + e.a = flag26 | 0x12; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_local_cond.cond)); + e.d = CTool_EndianConvertWord32(local_offset_32(ea->data.destroy_local_cond.local)); + e.e = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_local_cond.dtor, offset + 10); + } + break; + } + case EAT_DESTROYLOCALOFFSET: { + if (local_is_16bit_offset(ea->data.destroy_local_offset.local)) { + AABC e; + e.a = flag26 | 2; + e.b = 0; + e.c = CTool_EndianConvertWord16(ea->data.destroy_local_offset.offset + local_offset_16(ea->data.destroy_local_offset.local)); + e.d = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_local_offset.dtor, offset + 4); + } else { + AACC e; + e.a = flag26 | 0x11; + e.b = 0; + e.c = CTool_EndianConvertWord32(ea->data.destroy_local_offset.offset + local_offset_32(ea->data.destroy_local_offset.local)); + e.d = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_local_offset.dtor, offset + 6); + } + break; + } + case EAT_DESTROYLOCALPOINTER: { + reg = OBJECT_REG(ea->data.destroy_local_pointer.pointer); + if (reg || local_is_16bit_offset(ea->data.destroy_local_pointer.pointer)) { + AABC e; + e.a = flag26 | 4; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_local_pointer.pointer)); + e.d = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_local_pointer.dtor, offset + 4); + } else { + AACC e; + e.a = flag26 | 0x13; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_local_pointer.pointer)); + e.d = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_local_pointer.dtor, offset + 6); + } + break; + } + case EAT_DESTROYLOCALARRAY: { + if (local_is_16bit_offset(ea->data.destroy_local_array.localarray)) { + AABBBC e; + e.a = flag26 | 5; + e.b = 0; + e.c = CTool_EndianConvertWord16(local_offset_16(ea->data.destroy_local_array.localarray)); + e.d = CTool_EndianConvertWord16(ea->data.destroy_local_array.elements); + e.e = CTool_EndianConvertWord16(ea->data.destroy_local_array.element_size); + e.f = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_local_array.dtor, offset + 8); + } else { + AACCCC e; + e.a = flag26 | 0x14; + e.b = 0; + e.c = CTool_EndianConvertWord32(local_offset_32(ea->data.destroy_local_array.localarray)); + e.d = CTool_EndianConvertWord32(ea->data.destroy_local_array.elements); + e.e = CTool_EndianConvertWord32(ea->data.destroy_local_array.element_size); + e.f = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_local_array.dtor, offset + 14); + } + break; + } + case EAT_DESTROYMEMBER: { + reg = OBJECT_REG(ea->data.destroy_member.objectptr); + if (reg || local_is_16bit_offset(ea->data.destroy_member.objectptr)) { + AABCC e; + e.a = flag26 | 7; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_member.objectptr)); + e.d = CTool_EndianConvertWord32(ea->data.destroy_member.offset); + e.e = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_member.dtor, offset + 8); + } else { + AACCC e; + e.a = flag26 | 0x16; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_member.objectptr)); + e.d = CTool_EndianConvertWord32(ea->data.destroy_member.offset); + e.e = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_member.dtor, offset + 10); + } + break; + } + case EAT_DESTROYBASE: { + reg = OBJECT_REG(ea->data.destroy_member.objectptr); + if (reg || local_is_16bit_offset(ea->data.destroy_member.objectptr)) { + AABCC e; + e.a = flag26 | 6; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_member.objectptr)); + e.d = CTool_EndianConvertWord32(ea->data.destroy_member.offset); + e.e = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_member.dtor, offset + 8); + } else { + AACCC e; + e.a = flag26 | 0x15; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_member.objectptr)); + e.d = CTool_EndianConvertWord32(ea->data.destroy_member.offset); + e.e = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_member.dtor, offset + 10); + } + break; + } + case EAT_DESTROYMEMBERCOND: { + reg = OBJECT_REG(ea->data.destroy_member_cond.cond); + reg2 = OBJECT_REG(ea->data.destroy_member_cond.objectptr); + if ( + (reg || local_is_16bit_offset(ea->data.destroy_member_cond.cond)) && + (reg2 || local_is_16bit_offset(ea->data.destroy_member_cond.objectptr)) + ) + { + AABBCC e; + e.a = flag26 | 8; + e.b = ((reg ? 1 : 0) << 7) | ((reg2 ? 1 : 0) << 6); + e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_member_cond.cond)); + e.d = CTool_EndianConvertWord16(reg2 ? reg2 : local_offset_16(ea->data.destroy_member_cond.objectptr)); + e.e = CTool_EndianConvertWord32(ea->data.destroy_member_cond.offset); + e.f = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_member_cond.dtor, offset + 10); + } else { + AACCCC e; + e.a = flag26 | 0x17; + e.b = ((reg ? 1 : 0) << 7) | ((reg2 ? 1 : 0) << 6); + e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_member_cond.cond)); + e.d = CTool_EndianConvertWord32(reg2 ? reg2 : local_offset_32(ea->data.destroy_member_cond.objectptr)); + e.e = CTool_EndianConvertWord32(ea->data.destroy_member_cond.offset); + e.f = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_member_cond.dtor, offset + 14); + } + break; + } + case EAT_DESTROYMEMBERARRAY: { + reg = OBJECT_REG(ea->data.destroy_member_array.objectptr); + if (reg || local_is_16bit_offset(ea->data.destroy_member_array.objectptr)) { + AABCCCC e; + e.a = flag26 | 9; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_member_array.objectptr)); + e.d = CTool_EndianConvertWord32(ea->data.destroy_member_array.offset); + e.e = CTool_EndianConvertWord32(ea->data.destroy_member_array.elements); + e.f = CTool_EndianConvertWord32(ea->data.destroy_member_array.element_size); + e.g = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_member_array.dtor, offset + 16); + } else { + AACCCCC e; + e.a = flag26 | 0x18; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_member_array.objectptr)); + e.d = CTool_EndianConvertWord32(ea->data.destroy_member_array.offset); + e.e = CTool_EndianConvertWord32(ea->data.destroy_member_array.elements); + e.f = CTool_EndianConvertWord32(ea->data.destroy_member_array.element_size); + e.g = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.destroy_member_array.dtor, offset + 18); + } + break; + } + case EAT_DELETEPOINTER: + case EAT_DELETELOCALPOINTER: { + reg = OBJECT_REG(ea->data.delete_pointer.pointerobject); + if (reg || local_is_16bit_offset(ea->data.delete_pointer.pointerobject)) { + AABC e; + e.a = flag26 | 0xA; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.delete_pointer.pointerobject)); + e.d = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.delete_pointer.deletefunc, offset + 4); + } else { + AACC e; + e.a = flag26 | 0x19; + e.b = (reg != 0) << 7; + e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.delete_pointer.pointerobject)); + e.d = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.delete_pointer.deletefunc, offset + 6); + } + break; + } + case EAT_DELETEPOINTERCOND: { + reg = OBJECT_REG(ea->data.delete_pointer_cond.cond); + reg2 = OBJECT_REG(ea->data.delete_pointer_cond.pointerobject); + if ( + (reg || local_is_16bit_offset(ea->data.delete_pointer_cond.cond)) && + (reg2 || local_is_16bit_offset(ea->data.delete_pointer_cond.pointerobject)) + ) + { + AABBC e; + e.a = flag26 | 0xB; + e.b = ((reg ? 1 : 0) << 7) | ((reg2 ? 1 : 0) << 6); + e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.delete_pointer_cond.cond)); + e.d = CTool_EndianConvertWord16(reg2 ? reg2 : local_offset_16(ea->data.delete_pointer_cond.pointerobject)); + e.e = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.delete_pointer_cond.deletefunc, offset + 6); + } else { + AACCC e; + e.a = flag26 | 0x1A; + e.b = ((reg ? 1 : 0) << 7) | ((reg2 ? 1 : 0) << 6); + e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.delete_pointer_cond.cond)); + e.d = CTool_EndianConvertWord32(reg2 ? reg2 : local_offset_32(ea->data.delete_pointer_cond.pointerobject)); + e.e = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + addrelocation(ea->data.delete_pointer_cond.deletefunc, offset + 10); + } + break; + } + case EAT_CATCHBLOCK: { + AACCC e; + e.a = flag26 | 0x10; + e.b = 0; + e.c = 0; + if (ea->data.catch_block.catch_label->pclabel) + e.d = CTool_EndianConvertWord32(ea->data.catch_block.catch_label->pclabel->block->codeOffset); + else + e.d = 0; + e.e = CTool_EndianConvertWord32(local_offset_16(ea->data.catch_block.catch_info_object)); + AppendGListData(&exceptmodule, &e, sizeof(e)); + if (ea->data.catch_block.catch_typeid) + addrelocation(ea->data.catch_block.catch_typeid, offset + 2); + break; + } + case EAT_ACTIVECATCHBLOCK: { + if (local_is_16bit_offset(ea->data.active_catch_block.catch_info_object)) { + AAB e; + e.a = flag26 | 0xD; + e.b = 0; + e.c = CTool_EndianConvertWord16(local_offset_16(ea->data.active_catch_block.catch_info_object)); + AppendGListData(&exceptmodule, &e, sizeof(e)); + } else { + AAC e; + e.a = flag26 | 0x1B; + e.b = 0; + e.c = CTool_EndianConvertWord32(local_offset_32(ea->data.active_catch_block.catch_info_object)); + AppendGListData(&exceptmodule, &e, sizeof(e)); + } + break; + } + case EAT_SPECIFICATION: { + AABCC e; + int i; + + e.a = flag26 | 0xF; + e.b = 0; + e.c = CTool_EndianConvertWord16(ea->data.specification.unexp_ids); + if (ea->data.specification.unexp_label->pclabel) + e.d = CTool_EndianConvertWord32(ea->data.specification.unexp_label->pclabel->block->codeOffset); + e.e = CTool_EndianConvertWord32(local_offset_16(ea->data.specification.unexp_info_object)); + AppendGListData(&exceptmodule, &e, sizeof(e)); + + for (i = 0; i < ea->data.specification.unexp_ids; i++) { + addrelocation(ea->data.specification.unexp_id[i], 12 + i * 4 + offset); + AppendGListLong(&exceptmodule, 0); + } + break; + } + case EAT_TERMINATE: { + AA e; + e.a = flag26 | 0xE; + e.b = 0; + AppendGListData(&exceptmodule, &e, sizeof(e)); + break; + } + default: + CError_FATAL(671); + } + + node = node->prev; + } + + if (node) { + AAB e; + e.a = 1; + e.b = 0; + e.c = CTool_EndianConvertWord16(node->xE); + AppendGListData(&exceptmodule, &e, sizeof(e)); + } +} + +static UInt32 findPC(PCode *instr) { + UInt32 pc = instr->block->codeOffset; + instr = instr->prevPCode; + while (instr) { + instr = instr->prevPCode; + pc += 4; + } + CError_ASSERT(704, FITS_IN_USHORT(pc)); + return pc; +} + +static UInt32 findPC_long(PCode *instr) { + UInt32 pc = instr->block->codeOffset; + instr = instr->prevPCode; + while (instr) { + instr = instr->prevPCode; + pc += 4; + } + return pc; +} + +void initializeexceptiontables(void) { + int i; + + for (i = 0; i < EAT_NACTIONS; i++) + DAG[i] = NULL; + + pc_actions = last_pc_action = NULL; + except_refs = last_except_ref = NULL; +} + +int countexceptionactionregisters(ExceptionAction *actions) { + int count = 0; + + while (actions) { + switch (actions->type) { + case EAT_DESTROYLOCALCOND: + if (OBJECT_REG(actions->data.destroy_local_cond.cond)) + count++; + break; + case EAT_DESTROYLOCALPOINTER: + if (OBJECT_REG(actions->data.destroy_local_pointer.pointer)) + count++; + break; + case EAT_DESTROYMEMBER: + if (OBJECT_REG(actions->data.destroy_member.objectptr)) + count++; + break; + case EAT_DESTROYBASE: + if (OBJECT_REG(actions->data.destroy_base.objectptr)) + count++; + break; + case EAT_DESTROYMEMBERCOND: + if (OBJECT_REG(actions->data.destroy_member_cond.cond)) + count++; + if (OBJECT_REG(actions->data.destroy_member_cond.objectptr)) + count++; + break; + case EAT_DESTROYMEMBERARRAY: + if (OBJECT_REG(actions->data.destroy_member_array.objectptr)) + count++; + break; + case EAT_DELETEPOINTER: + case EAT_DELETELOCALPOINTER: + if (OBJECT_REG(actions->data.delete_pointer.pointerobject)) + count++; + break; + case EAT_DELETEPOINTERCOND: + if (OBJECT_REG(actions->data.delete_pointer_cond.cond)) + count++; + if (OBJECT_REG(actions->data.delete_pointer_cond.pointerobject)) + count++; + break; + } + actions = actions->prev; + } + + return count; +} + +void noteexceptionactionregisters(ExceptionAction *actions, PCodeArg *ops) { + Object *obj; + int reg; + + while (actions) { + switch (actions->type) { + case EAT_DESTROYLOCALCOND: + if ((reg = OBJECT_REG(obj = actions->data.destroy_local_cond.cond))) { + ops->kind = PCOp_REGISTER; + ops->arg = obj->u.var.info->rclass; + ops->data.reg.reg = reg; + ops->data.reg.effect = EffectRead | Effect8; + ops++; + } + break; + case EAT_DESTROYLOCALPOINTER: + if ((reg = OBJECT_REG(obj = actions->data.destroy_local_pointer.pointer))) { + ops->kind = PCOp_REGISTER; + ops->arg = obj->u.var.info->rclass; + ops->data.reg.reg = reg; + ops->data.reg.effect = EffectRead | Effect8; + ops++; + } + break; + case EAT_DESTROYMEMBER: + if ((reg = OBJECT_REG(obj = actions->data.destroy_member.objectptr))) { + ops->kind = PCOp_REGISTER; + ops->arg = obj->u.var.info->rclass; + ops->data.reg.reg = reg; + ops->data.reg.effect = EffectRead | Effect8; + ops++; + } + break; + case EAT_DESTROYBASE: + if ((reg = OBJECT_REG(obj = actions->data.destroy_base.objectptr))) { + ops->kind = PCOp_REGISTER; + ops->arg = obj->u.var.info->rclass; + ops->data.reg.reg = reg; + ops->data.reg.effect = EffectRead | Effect8; + ops++; + } + break; + case EAT_DESTROYMEMBERCOND: + if ((reg = OBJECT_REG(obj = actions->data.destroy_member_cond.cond))) { + ops->kind = PCOp_REGISTER; + ops->arg = obj->u.var.info->rclass; + ops->data.reg.reg = reg; + ops->data.reg.effect = EffectRead | Effect8; + ops++; + } + if ((reg = OBJECT_REG(obj = actions->data.destroy_member_cond.objectptr))) { + ops->kind = PCOp_REGISTER; + ops->arg = obj->u.var.info->rclass; + ops->data.reg.reg = reg; + ops->data.reg.effect = EffectRead | Effect8; + ops++; + } + break; + case EAT_DESTROYMEMBERARRAY: + if ((reg = OBJECT_REG(obj = actions->data.destroy_member_array.objectptr))) { + ops->kind = PCOp_REGISTER; + ops->arg = obj->u.var.info->rclass; + ops->data.reg.reg = reg; + ops->data.reg.effect = EffectRead | Effect8; + ops++; + } + break; + case EAT_DELETEPOINTER: + case EAT_DELETELOCALPOINTER: + if ((reg = OBJECT_REG(obj = actions->data.delete_pointer.pointerobject))) { + ops->kind = PCOp_REGISTER; + ops->arg = obj->u.var.info->rclass; + ops->data.reg.reg = reg; + ops->data.reg.effect = EffectRead | Effect8; + ops++; + } + break; + case EAT_DELETEPOINTERCOND: + if ((reg = OBJECT_REG(obj = actions->data.delete_pointer_cond.cond))) { + ops->kind = PCOp_REGISTER; + ops->arg = obj->u.var.info->rclass; + ops->data.reg.reg = reg; + ops->data.reg.effect = EffectRead | Effect8; + ops++; + } + if ((reg = OBJECT_REG(obj = actions->data.delete_pointer_cond.pointerobject))) { + ops->kind = PCOp_REGISTER; + ops->arg = obj->u.var.info->rclass; + ops->data.reg.reg = reg; + ops->data.reg.effect = EffectRead | Effect8; + ops++; + } + break; + } + actions = actions->prev; + } +} + +void recordexceptionactions(PCode *instr, ExceptionAction *actions) { + PCAction *pca; + + if (!actions && (!last_pc_action || !last_pc_action->actions)) + return; + + pca = lalloc(sizeof(PCAction)); + pca->next = NULL; + pca->firstInstr = pca->lastInstr = instr; + pca->actions = actions; + pca->prev = last_pc_action; + if (last_pc_action) + last_pc_action->next = pca; + else + pc_actions = pca; + last_pc_action = pca; + + branch_label(makepclabel()); + while (actions) { + if (actions->type == EAT_CATCHBLOCK && actions->data.catch_block.catch_label->pclabel) + pcbranch(instr->block, actions->data.catch_block.catch_label->pclabel); + else if (actions->type == EAT_SPECIFICATION && actions->data.specification.unexp_label->pclabel) + pcbranch(instr->block, actions->data.specification.unexp_label->pclabel); + actions = actions->prev; + } +} + +static void deleteexceptionaction(PCAction *pca) { + if (pca->prev) + pca->prev->next = pca->next; + else + pc_actions = pca->next; + + if (pca->next) + pca->next->prev = pca->prev; +} + +static int mergeexceptionactions(void) { + int count; + PCAction *pca; + PCAction *prev; + + if (!pc_actions) + return 0; + + for (pca = pc_actions; pca; pca = pca->next) { + if (pca->firstInstr->block->flags & fDeleted) + deleteexceptionaction(pca); + } + + if (!(pca = pc_actions)) + return 0; + + while (pca) { + pca->node = pca->actions ? makeEAnode(pca->actions) : NULL; + pca = pca->next; + } + + prev = pc_actions; + for (pca = pc_actions->next; pca; pca = pca->next) { + if (pca->node == prev->node) { + prev->lastInstr = pca->lastInstr; + deleteexceptionaction(pca); + } else { + prev = pca; + } + } + + count = 0; + for (pca = pc_actions; pca; pca = pca->next) { + if (!pca->actions) + deleteexceptionaction(pca); + else + count++; + } + + return count; +} + +typedef struct ExceptionThing { + UInt32 x0; + UInt16 x4; + UInt16 x6; +} ExceptionThing; + +void dumpexceptiontables(Object *function, SInt32 codesize) { + PCAction *pca; + UInt32 insn_start; + UInt32 insn_count; + UInt16 *sh; + ExceptionThing *thing; + int count; + + count = mergeexceptionactions(); + InitGList(&exceptmodule, 256); + AppendGListNoData(&exceptmodule, 8 * count + 4); + AppendGListLong(&exceptmodule, 0); + + for (pca = pc_actions; pca; pca = pca->next) { + if (pca->node->count == 0 && pca->node->xE == 0) + allocateactioninfo(pca->node); + } + + sh = (UInt16 *) *exceptmodule.data; + if (copts.altivec_model && used_nonvolatile_registers[RegClass_VR]) { + sh[0] = + CTool_EndianConvertWord16( + (used_nonvolatile_registers[RegClass_GPR] << 11) | + ((used_nonvolatile_registers[RegClass_FPR] & 0x1F) << 6) | + (((used_nonvolatile_registers[RegClass_CRFIELD] != 0) & 1) << 5) | + ((dynamic_stack & 1) << 4) | + 8); + sh[0] |= 4; + if (copts.altivec_vrsave) + sh[1] = CTool_EndianConvertWord16((used_nonvolatile_registers[RegClass_VR] << 11) | 0x400); + else + sh[1] = CTool_EndianConvertWord16((used_nonvolatile_registers[RegClass_VR] & 0x1F) << 11); + } else { + sh[0] = + CTool_EndianConvertWord16( + (used_nonvolatile_registers[RegClass_GPR] << 11) | + ((used_nonvolatile_registers[RegClass_FPR] & 0x1F) << 6) | + (((used_nonvolatile_registers[RegClass_CRFIELD] != 0) & 1) << 5) | + ((dynamic_stack & 1) << 4) | + 8); + sh[1] = 0; + } + + thing = (ExceptionThing *) (sh + 2); + pca = pc_actions; + while (pca) { + insn_start = findPC_long(pca->firstInstr); + insn_count = (findPC_long(pca->lastInstr) - insn_start) / 4; + CError_ASSERT(1203, (insn_count & 0xFFFF0000) == 0); + thing->x0 = CTool_EndianConvertWord32(insn_start + 4); + thing->x4 = CTool_EndianConvertWord16(insn_count); + thing->x6 = CTool_EndianConvertWord16(pca->node->xE); + pca = pca->next; + thing++; + } + + LockGList(&exceptmodule); + ObjGen_DeclareExceptionTables(function, codesize, *exceptmodule.data, exceptmodule.size, except_refs); + FreeGList(&exceptmodule); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/FunctionCalls.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/FunctionCalls.c new file mode 100644 index 0000000..67d7443 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/FunctionCalls.c @@ -0,0 +1,642 @@ +#include "compiler/FunctionCalls.h" +#include "compiler/CError.h" +#include "compiler/CFunc.h" +#include "compiler/CMachine.h" +#include "compiler/CParser.h" +#include "compiler/CodeGen.h" +#include "compiler/CompilerTools.h" +#include "compiler/InstrSelection.h" +#include "compiler/Operands.h" +#include "compiler/PCode.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/Registers.h" +#include "compiler/StackFrame.h" +#include "compiler/StructMoves.h" +#include "compiler/types.h" + +enum { + AIF_PassInGPR = 1, + AIF_PassInFPR = 2, + AIF_PassOnStack = 4, + AIF_ExtendTo32Bits = 8, + AIF_ForceDoublePrecision = 0x10, + AIF_PassInVR = 0x20, + AIF_PassMask = AIF_PassInGPR | AIF_PassInFPR | AIF_PassOnStack | AIF_PassInVR +}; + +#ifdef __MWERKS__ +#pragma options align=mac68k +#endif +typedef struct ArgInfo { + struct ArgInfo *next; + ENode *expr; + Operand opnd; + SInt32 offset; + short gpr; + short gprHi; + short fpr; + short vr; + short evaluated; + short flags; +} ArgInfo; +#ifdef __MWERKS__ +#pragma options align=reset +#endif + +// forward decls +static void branch_subroutine_indirect_ctr(Operand *addrOpnd, UInt32 *used_regs); + +static ArgInfo *make_arginfo(ENode *expr) { + ArgInfo *info = lalloc(sizeof(ArgInfo)); + memclrw(info, sizeof(ArgInfo)); + + info->next = NULL; + info->expr = expr; + info->offset = -1; + info->gpr = -1; + info->gprHi = -1; + info->fpr = -1; + info->vr = -1; + info->evaluated = 0; + info->flags = 0; + + return info; +} + +static ArgInfo *analyze_arguments(ENode *funcref, ENodeList *arg_expr, FuncArg *arg, UInt32 *used_regs, Boolean *resultHasFloats, char has_varargs) { + ArgInfo *infos; + ArgInfo *info; + SInt32 displ; + SInt32 arg_size; + int gpr_counter; + int fpr_counter; + int vr_counter; + Type *type; + RegClass rclass; + Boolean spilledVectorFlag; + + infos = NULL; + displ = 0; + gpr_counter = 3; + fpr_counter = 1; + vr_counter = 2; + + for (rclass = 0; rclass < RegClassMax; rclass++) + used_regs[rclass] = 0; + *resultHasFloats = 0; + + while (arg_expr) { + if (arg_expr->node == funcref) { + arg_expr = arg_expr->next; + arg = arg->next; + continue; + } + + type = arg_expr->node->rtype; + if (infos) { + info->next = make_arginfo(arg_expr->node); + info = info->next; + } else { + infos = info = make_arginfo(arg_expr->node); + } + + arg_size = 0; + if (IS_TYPE_VECTOR(type)) { + if (arg == &elipsis) { + spilledVectorFlag = 1; + info->flags |= AIF_PassOnStack; + } else { + spilledVectorFlag = 0; + if (vr_counter <= 13) { + info->flags |= AIF_PassInVR; + info->vr = vr_counter; + used_regs[RegClass_VR] |= 1 << vr_counter; + } else { + spilledVectorFlag = 1; + info->flags |= AIF_PassOnStack; + } + } + + if (has_varargs) { + if (gpr_counter < 10) { + gpr_counter = ((gpr_counter - 2) & ~3) + 5; + if (arg == &elipsis && gpr_counter < 10) { + info->flags |= AIF_PassInGPR; + info->gpr = gpr_counter; + used_regs[RegClass_GPR] |= (15 << gpr_counter) & 0x7E0; + } + gpr_counter += 4; + } + spilledVectorFlag = 1; + } + + if (spilledVectorFlag) + arg_size = 16; + vr_counter++; + } else if (IS_TYPE_FLOAT(type)) { + *resultHasFloats = 1; + if (!arg || arg == &oldstyle) { + if (fpr_counter <= 13) { + info->flags |= AIF_PassInFPR; + info->fpr = fpr_counter; + used_regs[RegClass_FPR] |= 1 << fpr_counter; + } else { + info->flags |= AIF_PassOnStack | AIF_ForceDoublePrecision; + } + arg_size = 8; + fpr_counter++; + gpr_counter += 2; + } else if (arg == &elipsis) { + if (gpr_counter < 10) { + info->flags |= AIF_PassInGPR; + info->gpr = gpr_counter; + used_regs[RegClass_GPR] |= 3 << gpr_counter; + } else if (gpr_counter == 10) { + info->flags |= AIF_PassInGPR | AIF_PassOnStack | AIF_ForceDoublePrecision; + info->gpr = gpr_counter; + used_regs[RegClass_GPR] |= 3 << gpr_counter; + } else { + info->flags |= AIF_PassOnStack | AIF_ForceDoublePrecision; + } + arg_size = 8; + fpr_counter++; + gpr_counter += 2; + } else { + if (fpr_counter <= 13) { + info->flags |= AIF_PassInFPR; + info->fpr = fpr_counter; + used_regs[RegClass_FPR] |= 1 << fpr_counter; + } else { + info->flags |= AIF_PassOnStack; + } + + if (type->size == 4) { + arg_size = 4; + gpr_counter++; + } else { + arg_size = 8; + gpr_counter += 2; + } + + fpr_counter++; + } + } else if (TYPE_IS_8BYTES(type)) { + if (gpr_counter <= 10) { + info->flags |= AIF_PassInGPR; + if (copts.littleendian) { + info->gpr = gpr_counter; + info->gprHi = gpr_counter + 1; + } else { + info->gpr = gpr_counter + 1; + info->gprHi = gpr_counter; + } + used_regs[RegClass_GPR] |= 1 << gpr_counter; + if ((gpr_counter + 1) <= 10) + used_regs[RegClass_GPR] |= 1 << (gpr_counter + 1); + } else { + info->flags |= AIF_PassOnStack; + } + + arg_size = 8; + gpr_counter += 2; + } else if (TYPE_FITS_IN_REGISTER(type)) { + if ((!arg || arg == &elipsis || arg == &oldstyle) && type->size < 4) + info->flags |= AIF_ExtendTo32Bits; + + if (gpr_counter <= 10) { + info->flags |= AIF_PassInGPR; + info->gpr = gpr_counter; + used_regs[RegClass_GPR] |= 1 << gpr_counter; + } else { + info->flags |= AIF_PassOnStack; + } + + arg_size = 4; + gpr_counter++; + } else if (IS_TYPE_ARRAY(type) || IS_TYPE_NONVECTOR_STRUCT(type) || IS_TYPE_CLASS(type) || + IS_TYPE_12BYTES_MEMBERPOINTER(type)) { + SInt32 gprs_needed = (type->size >> 2) + ((type->size & 3) != 0); + if (gpr_counter <= 10) { + if ((gpr_counter + gprs_needed - 1) <= 10) { + info->flags |= AIF_PassInGPR; + info->gpr = gpr_counter; + used_regs[RegClass_GPR] |= ((1 << gprs_needed) - 1) << gpr_counter; + } else { + info->flags |= AIF_PassInGPR | AIF_PassOnStack; + info->gpr = gpr_counter; + used_regs[RegClass_GPR] |= ((1 << (11 - gpr_counter)) - 1) << gpr_counter; + } + } else { + info->flags |= AIF_PassOnStack; + } + + gpr_counter += gprs_needed; + arg_size = type->size; + } else { + CError_FATAL(421); + } + + displ = set_out_param_displ(displ, type, info->flags & AIF_PassOnStack, &info->offset, arg_size); + + arg_expr = arg_expr->next; + if (arg && arg != &elipsis && arg != &oldstyle) + arg = arg->next; + } + + update_out_param_size(displ); + + return infos; +} + +static void pass_in_memory(ArgInfo *info) { + Type *type; + Operand opnd; + + type = info->expr->rtype; + memclrw(&opnd, sizeof(Operand)); + + if (TYPE_FITS_IN_REGISTER(type)) { + if (TYPE_IS_8BYTES(type)) { + if (!info->evaluated) + GEN_NODE(info->expr, &info->opnd); + coerce_to_register_pair(&info->opnd, type, 0, 0); + + load_store_register( + PC_STW, info->opnd.reg, 1, + NULL, low_offset + out_param_displ_to_offset(info->offset)); + load_store_register( + PC_STW, info->opnd.regHi, 1, + NULL, high_offset + out_param_displ_to_offset(info->offset)); + } else { + if (!info->evaluated) + GEN_NODE(info->expr, &info->opnd); + if (info->flags & AIF_ExtendTo32Bits) + extend32(&info->opnd, type, 0); + ENSURE_GPR(&info->opnd, type, 0); + + load_store_register( + PC_STW, info->opnd.reg, 1, + NULL, out_param_displ_to_offset(info->offset)); + } + } else if (IS_TYPE_FLOAT(type)) { + if (!info->evaluated) + GEN_NODE(info->expr, &info->opnd); + ENSURE_FPR(&info->opnd, type, 0); + + if (type->size == 4 && !(info->flags & AIF_ForceDoublePrecision)) { + load_store_register( + PC_STFS, info->opnd.reg, 1, + NULL, out_param_displ_to_offset(info->offset)); + } else { + load_store_register( + PC_STFD, info->opnd.reg, 1, + NULL, out_param_displ_to_offset(info->offset)); + } + } else if (IS_TYPE_VECTOR(type)) { + if (!info->evaluated) + GEN_NODE(info->expr, &info->opnd); + ENSURE_VR(&info->opnd, type, 0); + + load_store_register( + PC_STVX, info->opnd.reg, 1, + NULL, out_param_displ_to_offset(info->offset)); + } else { + opnd.optype = OpndType_IndirectGPR_ImmOffset; + opnd.reg = 1; + opnd.object = NULL; + opnd.immOffset = out_param_displ_to_offset(info->offset); + + if (!info->evaluated) + GEN_NODE(info->expr, &info->opnd); + + move_block(&opnd, &info->opnd, type->size, CMach_ArgumentAlignment(type)); + } +} + +static void pass_in_register(ArgInfo *info) { + Type *type; + + type = info->expr->rtype; + + if ((info->flags & AIF_PassMask) == AIF_PassInFPR) { + if (!info->evaluated) + GEN_NODE_TO_REG(info->expr, info->fpr, 0, &info->opnd); + ENSURE_FPR(&info->opnd, type, info->fpr); + if (info->opnd.reg != info->fpr) + emitpcode(PC_FMR, info->fpr, info->opnd.reg); + } else if ((info->flags & AIF_PassMask) == AIF_PassInVR) { + if (!info->evaluated) + GEN_NODE_TO_REG(info->expr, info->vr, 0, &info->opnd); + ENSURE_VR(&info->opnd, type, info->vr); + if (info->opnd.reg != info->vr) + emitpcode(PC_VMR, info->vr, info->opnd.reg); + } else if (TYPE_FITS_IN_REGISTER(type)) { + if (TYPE_IS_8BYTES(type)) { + if (!info->evaluated) + GEN_NODE_TO_REG(info->expr, info->gpr, info->gprHi, &info->opnd); + coerce_to_register_pair(&info->opnd, type, info->gpr, info->gprHi); + if (copts.littleendian) { + if (info->gprHi > 10) { + load_store_register( + PC_STW, info->opnd.regHi, 1, + NULL, high_offset + out_param_displ_to_offset(info->offset)); + } + } else { + if (info->gpr > 10) { + load_store_register( + PC_STW, info->opnd.reg, 1, + NULL, low_offset + out_param_displ_to_offset(info->offset)); + } + } + } else { + if (!info->evaluated) + GEN_NODE_TO_REG(info->expr, info->gpr, 0, &info->opnd); + if (info->flags & AIF_ExtendTo32Bits) + extend32(&info->opnd, type, info->gpr); + ENSURE_GPR(&info->opnd, type, info->gpr); + if (info->opnd.reg != info->gpr) + emitpcode(PC_MR, info->gpr, info->opnd.reg); + } + } else if (IS_TYPE_FLOAT(type)) { + if (!info->evaluated) + GEN_NODE(info->expr, &info->opnd); + + if (type->size != 4 && info->opnd.optype == OpndType_IndirectGPR_ImmOffset) { + load_store_register( + PC_LWZ, info->gpr, info->opnd.reg, + info->opnd.object, info->opnd.immOffset); + load_store_register( + PC_LWZ, info->gpr + 1, info->opnd.reg, + info->opnd.object, info->opnd.immOffset + 4); + } else { + ENSURE_FPR(&info->opnd, type, 0); + load_store_register( + PC_STFD, info->opnd.reg, 1, + NULL, out_param_displ_to_offset(info->offset)); + load_store_register( + PC_LWZ, info->gpr, 1, + NULL, out_param_displ_to_offset(info->offset)); + load_store_register( + PC_LWZ, info->gpr + 1, 1, + NULL, out_param_displ_to_offset(info->offset) + 4); + } + } else if (IS_TYPE_VECTOR(type)) { + if (!info->evaluated) + GEN_NODE(info->expr, &info->opnd); + + if (info->opnd.optype == OpndType_IndirectGPR_ImmOffset) { + load_store_register( + PC_LWZ, info->gpr, info->opnd.reg, + info->opnd.object, info->opnd.immOffset); + load_store_register( + PC_LWZ, info->gpr + 1, info->opnd.reg, + info->opnd.object, info->opnd.immOffset + 4); + if ((info->gpr + 2) < 10) { + load_store_register( + PC_LWZ, info->gpr + 2, info->opnd.reg, + info->opnd.object, info->opnd.immOffset + 8); + load_store_register( + PC_LWZ, info->gpr + 3, info->opnd.reg, + info->opnd.object, info->opnd.immOffset + 12); + } + } else { + ENSURE_VR(&info->opnd, type, 0); + load_store_register( + PC_STVX, info->opnd.reg, 1, + NULL, out_param_displ_to_offset(info->offset)); + load_store_register( + PC_LWZ, info->gpr, 1, + NULL, out_param_displ_to_offset(info->offset)); + load_store_register( + PC_LWZ, info->gpr + 1, 1, + NULL, out_param_displ_to_offset(info->offset) + 4); + if ((info->gpr + 2) < 10) { + load_store_register( + PC_LWZ, info->gpr + 2, 1, + NULL, out_param_displ_to_offset(info->offset) + 8); + load_store_register( + PC_LWZ, info->gpr + 3, 1, + NULL, out_param_displ_to_offset(info->offset) + 12); + } + } + } else { + if (!info->evaluated) + GEN_NODE(info->expr, &info->opnd); + + if (type->size <= 4) { + if (info->opnd.optype == OpndType_IndirectSymbol) + coerce_to_addressable(&info->opnd); + + if (info->opnd.optype == OpndType_IndirectGPR_ImmOffset) { + load_store_register( + PC_LWZ, info->gpr, info->opnd.reg, + info->opnd.object, info->opnd.immOffset); + } else if (info->opnd.optype == OpndType_IndirectGPR_Indexed) { + emitpcode( + PC_LWZX, info->gpr, info->opnd.reg, + info->opnd.regOffset); + } + } else { + SInt32 gprs_needed = (type->size >> 2) + ((type->size & 3) != 0); + SInt32 i; + + make_addressable(&info->opnd, gprs_needed * 4, 12); + for (i = 0; i < gprs_needed; i++) { + if (info->opnd.reg != (info->gpr + i)) { + load_store_register( + PC_LWZ, info->gpr + i, info->opnd.reg, + info->opnd.object, info->opnd.immOffset + i * 4); + } + } + + if (info->opnd.reg >= info->gpr && info->opnd.reg < (info->gpr + gprs_needed)) { + load_store_register( + PC_LWZ, info->opnd.reg, info->opnd.reg, + info->opnd.object, info->opnd.immOffset + (info->opnd.reg - info->gpr) * 4); + } + } + } +} + +static void pass_in_register_and_memory(ArgInfo *info) { + Type *type; + int gpr; + SInt32 offset; + + type = info->expr->rtype; + gpr = info->gpr; + offset = 0; + while (offset < type->size && gpr <= 10) { + load_store_register( + PC_LWZ, gpr, 1, + NULL, offset + out_param_displ_to_offset(info->offset)); + gpr++; + offset += 4; + } +} + +static Boolean needs_TOC_reload(Object *func) { + return 0; +} + +static void load_virtual_function(TypeClass *tclass, SInt32 offset, int reg, Operand *opnd) { + if (tclass->flags & CLASS_HANDLEOBJECT) { + load_store_register(PC_LWZ, 12, reg, NULL, 0); + load_store_register(PC_LWZ, 12, 12, NULL, tclass->vtable->offset); + } else { + load_store_register(PC_LWZ, 12, reg, NULL, tclass->vtable->offset); + } + load_store_register(PC_LWZ, 12, 12, NULL, offset); + opnd->optype = OpndType_GPR; + opnd->reg = 12; +} + +static void branch_subroutine_indirect(Object *func, Operand *addrOpnd, UInt32 *used_regs) { + if (addrOpnd->reg != 12) + emitpcode(PC_MR, 12, addrOpnd->reg); + + used_regs[RegClass_GPR] |= 1 << 12; + branch_subroutine(func, 1, used_regs); +} + +static void evaluate_nested_function_calls(ArgInfo *info) { + ArgInfo *scan; + + scan = info->next; + while (scan && !scan->expr->hascall) + scan = scan->next; + + if (scan) + evaluate_nested_function_calls(scan); + + if (info->expr->hascall) { + GEN_NODE(info->expr, &info->opnd); + info->evaluated = 1; + } +} + +void call_function(ENode *expr, Operand *output) { + ArgInfo *infos; // r31 + ENode *funcref = expr->data.funccall.funcref; // r27 + Type *resultType = expr->data.funccall.functype->functype; // r26 + ENode *node = NULL; // r25 + char has_varargs; // r24 + ArgInfo *info; // r22 + Operand opnd; + UInt32 used_regs[RegClassMax] = {0}; + Boolean has_floats; + FuncArg *arg; + + memclrw(&opnd, sizeof(Operand)); + + has_varargs = 0; + for (arg = expr->data.funccall.functype->args; arg; arg = arg->next) { + if (arg == &elipsis) { + has_varargs = 1; + break; + } + } + + if (expr->data.funccall.functype->flags & FUNC_FLAGS_80) { + if (CMach_PassResultInHiddenArg(resultType)) + node = expr->data.funccall.args->next->node; + else + node = expr->data.funccall.args->node; + } + + infos = analyze_arguments( + node, + expr->data.funccall.args, + expr->data.funccall.functype->args, + used_regs, + &has_floats, + has_varargs); + + if (infos) + evaluate_nested_function_calls(infos); + + if (funcref->hascall) { + GEN_NODE_TO_GPR(funcref, &opnd, TYPE(&void_ptr), 0); + } else if (node && node->hascall) { + GEN_NODE_TO_GPR(node, &opnd, TYPE(&void_ptr), 0); + } + + for (info = infos; info; info = info->next) { + if (info->flags & AIF_PassOnStack) + pass_in_memory(info); + } + for (info = infos; info; info = info->next) { + if ((info->flags & AIF_PassMask) == (AIF_PassInGPR | AIF_PassOnStack)) + pass_in_register_and_memory(info); + } + for (info = infos; info; info = info->next) { + int flag = info->flags & AIF_PassMask; + if ( + flag == AIF_PassInGPR || + flag == AIF_PassInFPR || + flag == AIF_PassInVR + ) + pass_in_register(info); + } + + if (funcref->type == EOBJREF) { + TypeClass *tclass; + SInt32 vfOffset; + if (CParser_IsVirtualFunction(funcref->data.objref, &tclass, &vfOffset)) { + load_virtual_function( + tclass, + vfOffset, + CMach_PassResultInHiddenArg(resultType) ? Register4 : Register3, + &opnd + ); + branch_subroutine_indirect_ctr(&opnd, used_regs); + } else if (node) { + if (!node->hascall) { + GEN_NODE_TO_REG(node, 12, 0, &opnd); + ENSURE_GPR(&opnd, TYPE(&void_ptr), 12); + } + branch_subroutine_indirect(funcref->data.objref, &opnd, used_regs); + } else { + branch_subroutine(funcref->data.objref, needs_TOC_reload(funcref->data.objref), used_regs); + } + } else { + if (!funcref->hascall) + GEN_NODE_TO_REG(funcref, 12, 0, &opnd); + ENSURE_GPR(&opnd, TYPE(&void_ptr), 12); + branch_subroutine_indirect_ctr(&opnd, used_regs); + } + + if (IS_TYPE_FLOAT(resultType)) { + output->optype = OpndType_FPR; + output->reg = used_virtual_registers[RegClass_FPR]++; + emitpcode(PC_FMR, output->reg, 1); + } else if (IS_TYPE_VECTOR(resultType)) { + output->optype = OpndType_VR; + output->reg = used_virtual_registers[RegClass_VR]++; + emitpcode(PC_VMR, output->reg, 2); + } else if (TYPE_FITS_IN_REGISTER(resultType)) { + if (resultType->size > 4) { + output->optype = OpndType_GPRPair; + output->reg = used_virtual_registers[RegClass_GPR]++; + output->regHi = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_MR, output->reg, low_reg); + emitpcode(PC_MR, output->regHi, high_reg); + } else { + output->optype = OpndType_GPR; + output->reg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_MR, output->reg, 3); + } + } else { + output->optype = OpndType_Absolute; + output->immediate = 0; + } +} + +static void branch_subroutine_indirect_ctr(Operand *addrOpnd, UInt32 *used_regs) { + if (addrOpnd->reg != 12) + emitpcode(PC_MR, 12, addrOpnd->reg); + + emitpcode(PC_MTCTR, 12); + used_regs[RegClass_GPR] |= 1 << 12; + branch_subroutine_ctr(used_regs); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/InstrSelection.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/InstrSelection.c new file mode 100644 index 0000000..359c980 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/InstrSelection.c @@ -0,0 +1,5348 @@ +#include "compiler/InstrSelection.h" +#include "compiler/CError.h" +#include "compiler/CInt64.h" +#include "compiler/CMachine.h" +#include "compiler/CParser.h" +#include "compiler/CodeGen.h" +#include "compiler/CompilerTools.h" +#include "compiler/FunctionCalls.h" +#include "compiler/Intrinsics.h" +#include "compiler/Operands.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StructMoves.h" +#include "compiler/TOC.h" +#include "compiler/enode.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +PrecomputedOperand *precomputedoperands; +void (*cgdispatch[MAXEXPR + 1])(ENode *, short, short, Operand *); + +// forward decls +static int ispowerof2(SInt32 val); +static void binary_immediate(Opcode opcode, ENode *left, SInt32 value, short outputReg, Operand *output); +static void shift_left_immediate(ENode *expr, short shift, short negate, short outputReg, Operand *output); +static void shift_right_immediate(ENode *expr, Type *type, short shift, short outputReg, Operand *output); +static void or_xor_immediate(Opcode opcode, ENode *expr, SInt32 value, short outputReg, Operand *output); +static void signed_divide_by_power_of_2(ENode *expr, int shift, int negate, short outputReg, Operand *output); +static void signed_mod_by_power_of_2(ENode *expr, int shift, int negate, short outputReg, Operand *output); +static void fp_binary_operator(Opcode opcode, ENode *left, ENode *right, short outputReg, Operand *output); +static void logical_expression_nobranch(ENode *cond, Boolean invert, Operand *output); +static void shift_and_mask(ENode *expr, short a, short b, short c, short outputReg, Operand *output); +static ENodeType invert_relop(ENodeType nt); + +#define IS_INT_CONST(node) ( ENODE_IS((node), EINTCONST) && IS_TYPE_INT((node)->rtype) && (node)->rtype->size <= 4 ) +#define IS_INT_CONST_ZERO(node) ( IS_INT_CONST(node) && (node)->data.intval.lo == 0 ) + +void init_cgdispatch(void) { + ENodeType t; + + for (t = 0; t <= MAXEXPR; t++) + cgdispatch[t] = gen_UNEXPECTED; + + cgdispatch[EPOSTINC] = gen_POSTINCDEC; + cgdispatch[EPOSTDEC] = gen_POSTINCDEC; + cgdispatch[EINDIRECT] = gen_INDIRECT; + cgdispatch[EMONMIN] = gen_MONMIN; + cgdispatch[EBINNOT] = gen_BINNOT; + cgdispatch[ELOGNOT] = gen_LOGICAL; + cgdispatch[EFORCELOAD] = gen_FORCELOAD; + cgdispatch[EMUL] = gen_MUL; + cgdispatch[EDIV] = gen_DIV; + cgdispatch[EMODULO] = gen_MODULO; + cgdispatch[EADD] = gen_ADD; + cgdispatch[ESUB] = gen_SUB; + cgdispatch[ESHL] = gen_SHL; + cgdispatch[ESHR] = gen_SHR; + cgdispatch[ELESS] = gen_COMPARE; + cgdispatch[EGREATER] = gen_COMPARE; + cgdispatch[ELESSEQU] = gen_COMPARE; + cgdispatch[EGREATEREQU] = gen_COMPARE; + cgdispatch[EEQU] = gen_COMPARE; + cgdispatch[ENOTEQU] = gen_COMPARE; + cgdispatch[EAND] = gen_AND; + cgdispatch[EXOR] = gen_XOR; + cgdispatch[EOR] = gen_OR; + cgdispatch[ELAND] = gen_LOGICAL; + cgdispatch[ELOR] = gen_LOGICAL; + cgdispatch[EASS] = gen_ASS; + cgdispatch[ECOMMA] = gen_COMMA; + cgdispatch[ETYPCON] = gen_TYPCON; + cgdispatch[EBITFIELD] = gen_BITFIELD; + cgdispatch[EINTCONST] = gen_INTCONST; + cgdispatch[EFLOATCONST] = gen_FLOATCONST; + cgdispatch[ESTRINGCONST] = gen_STRINGCONST; + cgdispatch[ECOND] = gen_COND; + cgdispatch[EFUNCCALL] = gen_FUNCCALL; + cgdispatch[EFUNCCALLP] = gen_FUNCCALL; + cgdispatch[EOBJREF] = gen_OBJREF; + cgdispatch[ENULLCHECK] = gen_NULLCHECK; + cgdispatch[EPRECOMP] = gen_PRECOMP; + cgdispatch[EDEFINE] = gen_DEFINE; + cgdispatch[EREUSE] = gen_REUSE; + cgdispatch[EVECTOR128CONST] = gen_VECTOR128CONST; + cgdispatch[ECONDASS] = gen_CONDASS; +} + +void gen_DEFINE(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + Operand *op; + + if (!expr->data.diadic.right) { + op = lalloc(sizeof(Operand)); + memclrw(op, sizeof(Operand)); + expr->data.diadic.right = (ENode *) op; + GEN_NODE(expr->data.diadic.left, op); + } + + op = (Operand *) expr->data.diadic.right; + *output = *op; +} + +void gen_REUSE(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *inner = expr->data.monadic; + CError_ASSERT(250, ENODE_IS(inner, EDEFINE)); + gen_DEFINE(inner, outputReg, outputRegHi, output); +} + +void gen_POSTINCDEC(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + TypeBitfield *tbitfield; + ENode *inner; + Type *type; + Operand a; + Operand b; + Operand c; + Float fval; + int objReg; + int constReg; + int finalReg; + SInt32 incval; + + inner = expr->data.monadic->data.monadic; + type = expr->rtype; + tbitfield = NULL; + + memclrw(&a, sizeof(Operand)); + memclrw(&b, sizeof(Operand)); + memclrw(&c, sizeof(Operand)); + + if (TYPE_IS_8BYTES(type)) { + I8_gen_POSTINCDEC(expr, outputReg, outputRegHi, output); + return; + } + + if (IS_TYPE_FLOAT(type)) { + if (ENODE_IS(inner, EOBJREF) && (objReg = OBJECT_REG(inner->data.objref))) { + output->optype = OpndType_FPR; + output->reg = (outputReg && outputReg != objReg) ? outputReg : ALLOC_FPR(); + emitpcode(PC_FMR, output->reg, objReg); + fval = one_point_zero; + load_floating_constant(constReg = ALLOC_FPR(), type, &fval); + + if (ENODE_IS(expr, EPOSTINC)) { + emitpcode((type->size == 4) ? PC_FADDS : PC_FADD, objReg, objReg, constReg); + } else { + emitpcode((type->size == 4) ? PC_FSUBS : PC_FSUB, objReg, objReg, constReg); + } + } else { + GEN_NODE(inner, &a); + indirect(&a, inner); + b = a; + ENSURE_FPR(&b, type, 0); + + output->optype = OpndType_FPR; + output->reg = ALLOC_FPR(); + emitpcode(PC_FMR, output->reg, b.reg); + + fval = one_point_zero; + load_floating_constant(constReg = ALLOC_FPR(), type, &fval); + + finalReg = ALLOC_FPR(); + if (ENODE_IS(expr, EPOSTINC)) + emitpcode((type->size == 4) ? PC_FADDS : PC_FADD, finalReg, b.reg, constReg); + else + emitpcode((type->size == 4) ? PC_FSUBS : PC_FSUB, finalReg, b.reg, constReg); + + store_fp(finalReg, &a, type); + } + } else { + if (IS_TYPE_POINTER(type)) { + if (ENODE_IS(expr, EPOSTINC)) + incval = TPTR_TARGET(type)->size; + else + incval = -TPTR_TARGET(type)->size; + } else { + if (ENODE_IS(expr, EPOSTINC)) + incval = 1; + else + incval = -1; + } + + if (ENODE_IS(inner, EOBJREF) && (objReg = OBJECT_REG(inner->data.objref))) { + output->optype = OpndType_GPR; + output->reg = (outputReg && outputReg != objReg) ? outputReg : ALLOC_GPR(); + emitpcode(PC_MR, output->reg, objReg); + add_register_immediate(objReg, objReg, incval); + } else { + if (ENODE_IS(inner, EBITFIELD)) { + tbitfield = TYPE_BITFIELD(TPTR_TARGET(inner)); + inner = inner->data.monadic; + } + GEN_NODE(inner, &a); + indirect(&a, inner); + b = a; + ENSURE_GPR(&b, type, 0); + + if (tbitfield) { + c = b; + extract_bitfield(&c, tbitfield, 0, &b); + } + output->optype = OpndType_GPR; + + output->reg = ALLOC_GPR(); + emitpcode(PC_MR, output->reg, b.reg); + + finalReg = ALLOC_GPR(); + add_register_immediate(finalReg, b.reg, incval); + + if (tbitfield) { + insert_bitfield(finalReg, &c, tbitfield); + finalReg = c.reg; + } + + store(finalReg, &a, type); + } + } +} + +void gen_INDIRECT(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + Type *type; + ENode *inner; + VarInfo *vi; + SInt32 postincvalue; + Operand op; + + type = expr->rtype; + inner = expr->data.monadic; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_INDIRECT(expr, outputReg, outputRegHi, output); + return; + } + + memclrw(&op, sizeof(Operand)); + if (ENODE_IS(inner, EOBJREF) && OBJECT_REG(inner->data.objref)) { + vi = Registers_GetVarInfo(inner->data.objref); + switch (vi->rclass) { + case RegClass_GPR: + output->optype = OpndType_GPR; + break; + case RegClass_FPR: + output->optype = OpndType_FPR; + break; + case RegClass_VR: + output->optype = OpndType_VR; + break; + case RegClass_CRFIELD: + output->optype = OpndType_CRField; + break; + default: + CError_FATAL(456); + } + output->reg = vi->reg; + output->object = NULL; + return; + } + + if (ENODE_IS(inner, EBITFIELD)) { + GEN_NODE(inner->data.monadic, &op); + indirect(&op, expr); + ENSURE_GPR(&op, type, 0); + extract_bitfield(&op, TYPE_BITFIELD(inner->rtype), outputReg, output); + return; + } + + if (ispostincrementopportunity(inner, &op, &postincvalue) && (TYPE_FITS_IN_REGISTER(type) || IS_TYPE_FLOAT(type) || IS_TYPE_VECTOR(type))) { + indirect(&op, expr); + *output = op; + if (TYPE_FITS_IN_REGISTER(type)) { + ENSURE_GPR(output, type, outputReg); + } else if (IS_TYPE_FLOAT(type)) { + ENSURE_FPR(output, type, outputReg); + } else { + ENSURE_VR(output, type, outputReg); + } + + add_register_immediate(op.reg, op.reg, postincvalue); + return; + } + + GEN_NODE(inner, output); + indirect(output, expr); +} + +void gen_MONMIN(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *inner; + Type *type; + ENode *scan; + + inner = expr->data.monadic; + type = expr->rtype; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_MONMIN(expr, outputReg, outputRegHi, output); + return; + } + + if (IS_TYPE_FLOAT(type)) { + if (ENODE_IS(inner, EADD) && ENODE_IS(inner->data.diadic.left, EMUL) && copts.fp_contract) { + fp_multiply_add( + (type->size == 4) ? PC_FNMADDS : PC_FNMADD, + inner->data.diadic.left->data.diadic.left, + inner->data.diadic.left->data.diadic.right, + inner->data.diadic.right, + outputReg, + output); + } else if (ENODE_IS(inner, EADD) && ENODE_IS(inner->data.diadic.right, EMUL) && copts.fp_contract) { + fp_multiply_add( + (type->size == 4) ? PC_FNMADDS : PC_FNMADD, + inner->data.diadic.right->data.diadic.left, + inner->data.diadic.right->data.diadic.right, + inner->data.diadic.left, + outputReg, + output); + } else if (ENODE_IS(inner, ESUB) && ENODE_IS(inner->data.diadic.left, EMUL) && copts.fp_contract) { + fp_multiply_add( + (type->size == 4) ? PC_FNMSUBS : PC_FNMSUB, + inner->data.diadic.left->data.diadic.left, + inner->data.diadic.left->data.diadic.right, + inner->data.diadic.right, + outputReg, + output); + } else { + fp_unary_operator(PC_FNEG, inner, outputReg, output); + } + return; + } + + scan = inner; + while (ENODE_IS(scan, ETYPCON) && IS_TYPE_INT_OR_ENUM(type) && !is_unsigned(type)) + scan = scan->data.monadic; + + switch (scan->type) { + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + case EEQU: + case ENOTEQU: + if (TYPE_FITS_IN_REGISTER(scan->data.diadic.left->rtype) && TYPE_FITS_IN_REGISTER(scan->data.diadic.right->rtype)) { + gen_negated_condition_gpr(scan, output, outputReg); + return; + } + } + unary_operator(PC_NEG, inner, outputReg, output); +} + +void gen_BINNOT(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *inner; + Type *type; + + inner = expr->data.monadic; + type = expr->rtype; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_BINNOT(expr, outputReg, outputRegHi, output); + return; + } + + if (ENODE_IS(inner, EAND)) + binary_operator(PC_NAND, inner->data.diadic.left, inner->data.diadic.right, outputReg, output); + else if (ENODE_IS(inner, EOR)) + binary_operator(PC_NOR, inner->data.diadic.left, inner->data.diadic.right, outputReg, output); + else if (ENODE_IS(inner, EXOR)) + binary_operator(PC_EQV, inner->data.diadic.left, inner->data.diadic.right, outputReg, output); + else + unary_operator(PC_NOT, inner, outputReg, output); +} + +void gen_FORCELOAD(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *inner; + + inner = expr->data.monadic; + GEN_NODE(inner, output); + + if (IS_TYPE_FLOAT(inner->rtype)) { + ENSURE_FPR(output, inner->rtype, outputReg); + } else if (IS_TYPE_VECTOR(inner->rtype)) { + ENSURE_VR(output, inner->rtype, outputReg); + } else if (TYPE_FITS_IN_REGISTER(inner->rtype)) { + if (TYPE_IS_8BYTES(inner->rtype)) + coerce_to_register_pair(output, inner->rtype, outputReg, outputRegHi); + else + ENSURE_GPR(output, inner->rtype, outputReg); + } else if (!IS_TYPE_VOID(inner->rtype)) { + CError_FATAL(681); + } +} + +void gen_MUL(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *left; + ENode *right; + Type *type; + int tmp; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + type = expr->rtype; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_MUL(expr, outputReg, outputRegHi, output); + return; + } + + if (IS_TYPE_FLOAT(type)) { + fp_binary_operator((type->size == 4) ? PC_FMULS : PC_FMUL, left, right, outputReg, output); + return; + } + + if (ENODE_IS(right, EINTCONST) && (tmp = ispowerof2(right->data.intval.lo))) { + shift_left_immediate(left, tmp, 0, outputReg, output); + } else if (ENODE_IS(right, EINTCONST) && (tmp = ispowerof2(-right->data.intval.lo))) { + shift_left_immediate(left, tmp, 1, outputReg, output); + } else if (ENODE_IS(left, EINTCONST) && (tmp = ispowerof2(left->data.intval.lo))) { + shift_left_immediate(right, tmp, 0, outputReg, output); + } else if (ENODE_IS(left, EINTCONST) && (tmp = ispowerof2(-left->data.intval.lo))) { + shift_left_immediate(right, tmp, 1, outputReg, output); + } else if (ENODE_IS(right, EINTCONST) && FITS_IN_SHORT(right->data.intval.lo)) { + binary_immediate(PC_MULLI, left, right->data.intval.lo, outputReg, output); + } else if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT(left->data.intval.lo)) { + binary_immediate(PC_MULLI, right, left->data.intval.lo, outputReg, output); + } else { + binary_operator(PC_MULLW, left, right, outputReg, output); + } +} + +struct ms { + SInt32 m; + int s; +}; +static void create_signed_magic(SInt32 val, struct ms *output) { + // PowerPC CWG page 57-58 + int p; + UInt32 ad, anc, delta, q1, r1, q2, r2, t; + + ad = abs(val); + t = 0x80000000U + ((UInt32) val >> 31); + anc = t - 1 - t % ad; + p = 31; + q1 = 0x80000000U / anc; + r1 = 0x80000000U - q1 * anc; + q2 = 0x80000000U / ad; + r2 = 0x80000000U - q2 * ad; + + do { + p = p + 1; + q1 = 2 * q1; + r1 = 2 * r1; + if (r1 >= anc) { + q1 = q1 + 1; + r1 = r1 - anc; + } + q2 = 2 * q2; + r2 = 2 * r2; + if (r2 >= ad) { + q2 = q2 + 1; + r2 = r2 - ad; + } + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + // after loop + output->m = q2 + 1; + if (val < 0) + output->m = -output->m; + output->s = p - 32; +} + +struct mu { + UInt32 m; + int a; + int s; +}; +static void create_unsigned_magic(UInt32 val, struct mu *output) { + // PowerPC CWG page 58-59 + int p; + UInt32 nc, delta, q1, r1, q2, r2; + + output->a = 0; + nc = - 1 - (-val) % val; + p = 31; + q1 = 0x80000000U / nc; + r1 = 0x80000000U - q1 * nc; + q2 = 0x7FFFFFFFU / val; + r2 = 0x7FFFFFFFU - q2 * val; + do { + p = p + 1; + if (r1 >= nc - r1) { + q1 = 2 * q1 + 1; + r1 = 2 * r1 - nc; + } else { + q1 = 2 * q1; + r1 = 2 * r1; + } + if (r2 + 1 >= val - r2) { + if (q2 >= 0x7FFFFFFFU) + output->a = 1; + q2 = 2 * q2 + 1; + r2 = 2 * r2 + 1 - val; + } else { + if (q2 >= 0x80000000U) + output->a = 1; + q2 = 2 * q2; + r2 = 2 * r2 + 1; + } + delta = val - 1 - r2; + } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0))); + + output->m = q2 + 1; + output->s = p - 32; +} + +void gen_DIV(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *left; + ENode *right; + Type *type; + int tmp; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + type = expr->rtype; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_DIV_MOD(expr, outputReg, outputRegHi, output); + return; + } + + if (IS_TYPE_FLOAT(type)) { + fp_binary_operator((type->size == 4) ? PC_FDIVS : PC_FDIV, left, right, outputReg, output); + return; + } + + if (is_unsigned(type)) { + if (ENODE_IS(right, EINTCONST) && (tmp = ispowerof2(right->data.intval.lo))) { + shift_right_immediate(left, type, tmp, outputReg, output); + } else if (!copts.optimizesize && ENODE_IS(right, EINTCONST) && right->data.intval.lo != 1) { + SInt32 value; + int tmpreg1; + int tmpreg2; + int tmpreg3; + int tmpreg4; + int tmpreg5; + int tmpreg6; + int finalReg; + struct mu u_magicoutput; + Operand op1; + value = right->data.intval.lo; + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + + memclrw(&op1, sizeof(Operand)); + GEN_NODE(left, &op1); + ENSURE_GPR(&op1, left->rtype, 0); + + tmpreg3 = op1.reg; + create_unsigned_magic(value, &u_magicoutput); + load_immediate(tmpreg2, u_magicoutput.m); + emitpcode(PC_MULHWU, tmpreg1, tmpreg2, tmpreg3); + if (u_magicoutput.a == 0) { + if (u_magicoutput.s) + emitpcode(PC_RLWINM, finalReg, tmpreg1, (32 - u_magicoutput.s) & 31, u_magicoutput.s, 31); + else + emitpcode(PC_MR, finalReg, tmpreg1); + } else if (u_magicoutput.a == 1) { + tmpreg4 = ALLOC_GPR(); + if (copts.optimizationlevel > 1) { + tmpreg5 = ALLOC_GPR(); + tmpreg6 = ALLOC_GPR(); + } else { + tmpreg5 = tmpreg4; + tmpreg6 = tmpreg4; + } + + emitpcode(PC_SUBF, tmpreg4, tmpreg1, tmpreg3); + emitpcode(PC_RLWINM, tmpreg5, tmpreg4, 31, 1, 31); + emitpcode(PC_ADD, tmpreg6, tmpreg5, tmpreg1); + emitpcode(PC_RLWINM, finalReg, tmpreg6, (32 - (u_magicoutput.s - 1)) & 31, u_magicoutput.s - 1, 31); + } + + output->optype = OpndType_GPR; + output->reg = finalReg; + } else { + binary_operator(PC_DIVWU, left, right, outputReg, output); + } + } else { + SInt32 value; + if (ENODE_IS(right, EINTCONST) && (tmp = ispowerof2(right->data.intval.lo))) { + signed_divide_by_power_of_2(left, tmp, 0, outputReg, output); + } else if (ENODE_IS(right, EINTCONST) && (tmp = ispowerof2(-right->data.intval.lo))) { + signed_divide_by_power_of_2(left, tmp, 1, outputReg, output); + } else if (!copts.optimizesize && ENODE_IS(right, EINTCONST) && (value = right->data.intval.lo) != 1u && value != -1) { + int tmpreg2; + int tmpreg3; + int tmpreg1; + int tmpreg4; + int finalReg; + struct ms s_magicoutput; + Operand op2; + value = right->data.intval.lo; + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + tmpreg3 = ALLOC_GPR(); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + + memclrw(&op2, sizeof(Operand)); + GEN_NODE(left, &op2); + ENSURE_GPR(&op2, left->rtype, 0); + + tmpreg4 = op2.reg; + create_signed_magic(value, &s_magicoutput); + load_immediate(tmpreg2, s_magicoutput.m); + emitpcode(PC_MULHW, tmpreg1, tmpreg2, tmpreg4); + if (value > 0 && s_magicoutput.m < 0) { + int t = ALLOC_GPR(); + emitpcode(PC_ADD, t, tmpreg1, tmpreg4); + tmpreg1 = t; + } else if (value < 0 && s_magicoutput.m > 0) { + int t = ALLOC_GPR(); + emitpcode(PC_SUBF, t, tmpreg4, tmpreg1); + tmpreg1 = t; + } + + if (s_magicoutput.s) { + int t = ALLOC_GPR(); + emitpcode(PC_SRAWI, t, tmpreg1, s_magicoutput.s); + tmpreg1 = t; + } + + emitpcode(PC_RLWINM, tmpreg3, tmpreg1, 1, 31, 31); + emitpcode(PC_ADD, finalReg, tmpreg1, tmpreg3); + + output->optype = OpndType_GPR; + output->reg = finalReg; + } else { + binary_operator(PC_DIVW, left, right, outputReg, output); + } + } +} + +void gen_MODULO(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *left; + ENode *right; + int tmp; + struct mu u_magicoutput; + struct ms s_magicoutput; + Operand op1; + Operand op2; + SInt32 value; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + memclrw(&op1, sizeof(Operand)); + memclrw(&op2, sizeof(Operand)); + + if (TYPE_IS_8BYTES(expr->rtype)) { + I8_gen_DIV_MOD(expr, outputReg, outputRegHi, output); + return; + } + + if (ENODE_IS(right, EINTCONST) && (tmp = ispowerof2(right->data.intval.lo))) { + if (is_unsigned(expr->rtype)) + shift_and_mask(left, 0, 32 - tmp, 31, outputReg, output); + else + signed_mod_by_power_of_2(left, tmp, 0, outputReg, output); + } else if (!copts.optimizesize && ENODE_IS(right, EINTCONST) && (value = right->data.intval.lo) != 1u && value != -1) { + GEN_NODE(left, &op1); + ENSURE_GPR(&op1, left->rtype, 0); + + if (is_unsigned(expr->rtype)) { + int tmpreg1; + int tmpreg2; + int tmpreg3; + int tmpreg4; + int tmpreg5; + int tmpreg6; + int tmpreg7; + int tmpreg8; + int finalReg; + + tmpreg1 = op1.reg; + tmpreg2 = ALLOC_GPR(); + tmpreg3 = ALLOC_GPR(); + tmpreg4 = ALLOC_GPR(); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + + create_unsigned_magic(right->data.intval.lo, &u_magicoutput); + load_immediate(tmpreg3, u_magicoutput.m); + emitpcode(PC_MULHWU, tmpreg2, tmpreg3, tmpreg1); + + if (u_magicoutput.a == 0 && u_magicoutput.s != 0) + emitpcode(PC_RLWINM, tmpreg2, tmpreg2, (32 - u_magicoutput.s) & 31, u_magicoutput.s, 31); + + if (u_magicoutput.a == 1) { + tmpreg5 = ALLOC_GPR(); + if (copts.optimizationlevel > 1) { + tmpreg6 = ALLOC_GPR(); + tmpreg7 = ALLOC_GPR(); + tmpreg8 = ALLOC_GPR(); + } else { + tmpreg6 = tmpreg5; + tmpreg7 = tmpreg5; + tmpreg8 = tmpreg5; + } + emitpcode(PC_SUBF, tmpreg5, tmpreg2, tmpreg1); + emitpcode(PC_RLWINM, tmpreg6, tmpreg5, 31, 1, 31); + emitpcode(PC_ADD, tmpreg7, tmpreg6, tmpreg2); + emitpcode(PC_RLWINM, tmpreg8, tmpreg7, (32 - (u_magicoutput.s - 1)) & 31, u_magicoutput.s - 1, 31); + tmpreg2 = tmpreg8; + } + + if (value > 0 && value < 0x7FFF) { + emitpcode(PC_MULLI, tmpreg4, tmpreg2, value); + } else { + GEN_NODE(right, &op2); + ENSURE_GPR(&op2, right->rtype, 0); + emitpcode(PC_MULLW, tmpreg4, tmpreg2, op2.reg); + } + + emitpcode(PC_SUBF, finalReg, tmpreg4, tmpreg1); + output->optype = OpndType_GPR; + output->reg = finalReg; + } else { + int tmpreg1; + int tmpreg2; + int tmpreg3; + int tmpreg4; + int tmpreg5; + int tmpreg6; + int finalReg; + + tmpreg1 = op1.reg; + tmpreg2 = ALLOC_GPR(); + tmpreg3 = ALLOC_GPR(); + tmpreg4 = ALLOC_GPR(); + tmpreg5 = ALLOC_GPR(); + tmpreg6 = ALLOC_GPR(); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + + create_signed_magic(right->data.intval.lo, &s_magicoutput); + load_immediate(tmpreg3, s_magicoutput.m); + emitpcode(PC_MULHW, tmpreg2, tmpreg3, tmpreg1); + + if (value > 0 && s_magicoutput.m < 0) { + int tmp = ALLOC_GPR(); + emitpcode(PC_ADD, tmp, tmpreg2, tmpreg1); + tmpreg2 = tmp; + } else if (value < 0 && s_magicoutput.m > 0) { + int tmp = ALLOC_GPR(); + emitpcode(PC_SUBF, tmp, tmpreg1, tmpreg2); + tmpreg2 = tmp; + } + + if (s_magicoutput.s != 0) { + int tmp = ALLOC_GPR(); + emitpcode(PC_SRAWI, tmp, tmpreg2, s_magicoutput.s); + tmpreg2 = tmp; + } + + emitpcode(PC_RLWINM, tmpreg4, tmpreg2, 1, 31, 31); + emitpcode(PC_ADD, tmpreg5, tmpreg2, tmpreg4); + + if (value < 0x7FFF && value > -0x4000) { + emitpcode(PC_MULLI, tmpreg6, tmpreg5, value); + } else { + GEN_NODE(right, &op2); + ENSURE_GPR(&op2, right->rtype, 0); + + emitpcode(PC_MULLW, tmpreg6, tmpreg5, op2.reg); + } + + emitpcode(PC_SUBF, finalReg, tmpreg6, tmpreg1); + output->optype = OpndType_GPR; + output->reg = finalReg; + } + } else { + int tmpreg1; + int tmpreg2; + int finalReg; + + if (right->hascall) { + GEN_NODE(right, &op2); + ENSURE_GPR(&op2, right->rtype, 0); + GEN_NODE(left, &op1); + ENSURE_GPR(&op1, left->rtype, 0); + } else { + GEN_NODE(left, &op1); + ENSURE_GPR(&op1, left->rtype, 0); + GEN_NODE(right, &op2); + ENSURE_GPR(&op2, right->rtype, 0); + } + + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + + emitpcode(is_unsigned(expr->rtype) ? PC_DIVWU : PC_DIVW, tmpreg1, op1.reg, op2.reg); + emitpcode(PC_MULLW, tmpreg2, tmpreg1, op2.reg); + emitpcode(PC_SUBF, finalReg, tmpreg2, op1.reg); + + output->optype = OpndType_GPR; + output->reg = finalReg; + } +} + +void gen_ADD(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *left; + ENode *right; + Type *type; + Operand opleft; + Operand opright; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + type = expr->rtype; + + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (TYPE_IS_8BYTES(type)) { + I8_gen_ADD(expr, outputReg, outputRegHi, output); + return; + } + + if (IS_TYPE_FLOAT(type)) { + if (ENODE_IS(left, EMUL) && copts.fp_contract) { + fp_multiply_add( + (type->size == 4) ? PC_FMADDS : PC_FMADD, + left->data.diadic.left, + left->data.diadic.right, + right, + outputReg, output); + } else if (ENODE_IS(right, EMUL) && copts.fp_contract) { + fp_multiply_add( + (type->size == 4) ? PC_FMADDS : PC_FMADD, + right->data.diadic.left, + right->data.diadic.right, + left, + outputReg, output); + } else { + fp_binary_operator( + (type->size == 4) ? PC_FADDS : PC_FADD, + left, right, + outputReg, output); + } + return; + } + + if (right->hascall) { + GEN_NODE(right, &opright); + if (opright.optype >= OpndType_IndirectGPR_ImmOffset) + ENSURE_GPR(&opright, right->rtype, 0); + + GEN_NODE(left, &opleft); + if (opleft.optype >= OpndType_IndirectGPR_ImmOffset) + ENSURE_GPR(&opleft, left->rtype, 0); + } else { + GEN_NODE(left, &opleft); + if (opleft.optype >= OpndType_IndirectGPR_ImmOffset) + ENSURE_GPR(&opleft, left->rtype, 0); + + GEN_NODE(right, &opright); + if (opright.optype >= OpndType_IndirectGPR_ImmOffset) + ENSURE_GPR(&opright, right->rtype, 0); + } + + if (IS_TYPE_POINTER(expr->rtype)) { + if (TYPE_IS_8BYTES(expr->data.diadic.left->rtype)) { + opleft.optype = OpndType_GPR; + opleft.regHi = 0; + } + if (TYPE_IS_8BYTES(expr->data.diadic.right->rtype)) { + opright.optype = OpndType_GPR; + opright.regHi = 0; + } + } + + combine(&opleft, &opright, outputReg, output); +} + +void gen_SUB(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *left; + ENode *right; + Type *type; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + type = expr->rtype; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_SUB(expr, outputReg, outputRegHi, output); + return; + } + + if (IS_TYPE_FLOAT(type)) { + if (ENODE_IS(left, EMUL) && copts.fp_contract) { + fp_multiply_add( + (type->size == 4) ? PC_FMSUBS : PC_FMSUB, + left->data.diadic.left, + left->data.diadic.right, + right, + outputReg, output); + } else if (ENODE_IS(right, EMUL) && copts.fp_contract) { + fp_multiply_add( + (type->size == 4) ? PC_FNMSUBS : PC_FNMSUB, + right->data.diadic.left, + right->data.diadic.right, + left, + outputReg, output); + } else { + fp_binary_operator( + (type->size == 4) ? PC_FSUBS : PC_FSUB, + left, right, + outputReg, output); + } + return; + } + + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT(left->data.intval.lo)) + binary_immediate(PC_SUBFIC, right, left->data.intval.lo, outputReg, output); + else + binary_operator(PC_SUBF, right, left, outputReg, output); +} + +void gen_SHL(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + Type *type; + ENode *left; + ENode *right; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + type = expr->rtype; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_SHL_SHR(expr, outputReg, outputRegHi, output); + return; + } + + if (ENODE_IS(right, EINTCONST)) + shift_left_immediate(left, right->data.intval.lo, 0, outputReg, output); + else + binary_operator(PC_SLW, left, right, outputReg, output); +} + +void gen_SHR(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + Type *type; + ENode *left; + ENode *right; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + type = expr->rtype; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_SHL_SHR(expr, outputReg, outputRegHi, output); + return; + } + + if (ENODE_IS(right, EINTCONST)) + shift_right_immediate(left, type, right->data.intval.lo, outputReg, output); + else + binary_operator(is_unsigned(type) ? PC_SRW : PC_SRAW, left, right, outputReg, output); +} + +void gen_AND(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + Type *type; + ENode *left; + ENode *right; + short first; + short last; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + type = expr->rtype; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_AND(expr, outputReg, outputRegHi, output); + return; + } + + if (ENODE_IS(right, EINTCONST) && ismaskconstant(right->data.intval.lo, &first, &last)) { + if (ENODE_IS(left, ESHL) && ENODE_IS(left->data.diadic.right, EINTCONST) && (int)(left->data.diadic.right->data.intval.lo + last) < 32) { + shift_and_mask( + left->data.diadic.left, + left->data.diadic.right->data.intval.lo, + first, last, + outputReg, output); + } else if (ENODE_IS(left, ESHR) && ENODE_IS(left->data.diadic.right, EINTCONST) && (int)left->data.diadic.right->data.intval.lo <= first && last >= first) { + if (left->data.diadic.right->data.intval.lo == 0) + shift_and_mask(left->data.diadic.left, 0, first, last, outputReg, output); + else + shift_and_mask(left->data.diadic.left, 32 - left->data.diadic.right->data.intval.lo, first, last, outputReg, output); + } else { + shift_and_mask(left, 0, first, last, outputReg, output); + } + return; + } + + if (ENODE_IS(right, EINTCONST) && FITS_IN_USHORT(right->data.intval.lo)) { + binary_immediate(PC_ANDI, left, right->data.intval.lo, outputReg, output); + return; + } + if (ENODE_IS(right, EINTCONST) && FITS_IN_HI_SHORT(right->data.intval.lo)) { + binary_immediate(PC_ANDIS, left, right->data.intval.lo >> 16, outputReg, output); + return; + } + + if (ENODE_IS(left, EINTCONST) && ismaskconstant(left->data.intval.lo, &first, &last)) { + if (ENODE_IS(right, ESHL) && ENODE_IS(right->data.diadic.right, EINTCONST) && (int)(right->data.diadic.right->data.intval.lo + last) < 32) { + shift_and_mask( + right->data.diadic.left, + right->data.diadic.right->data.intval.lo, + first, last, + outputReg, output); + } else if (ENODE_IS(right, ESHR) && ENODE_IS(right->data.diadic.right, EINTCONST) && (int)right->data.diadic.right->data.intval.lo <= first) { + if (right->data.diadic.right->data.intval.lo == 0) + shift_and_mask(right->data.diadic.left, 0, first, last, outputReg, output); + else + shift_and_mask(right->data.diadic.left, 32 - right->data.diadic.right->data.intval.lo, first, last, outputReg, output); + } else { + shift_and_mask(right, 0, first, last, outputReg, output); + } + return; + } + + if (ENODE_IS(left, EINTCONST) && FITS_IN_USHORT(left->data.intval.lo)) { + binary_immediate(PC_ANDI, right, left->data.intval.lo, outputReg, output); + return; + } + if (ENODE_IS(left, EINTCONST) && FITS_IN_HI_SHORT(left->data.intval.lo)) { + binary_immediate(PC_ANDIS, right, left->data.intval.lo >> 16, outputReg, output); + return; + } + + if (ENODE_IS(right, EBINNOT)) + binary_operator(PC_ANDC, left, right->data.monadic, outputReg, output); + else if (ENODE_IS(left, EBINNOT)) + binary_operator(PC_ANDC, right, left->data.monadic, outputReg, output); + else + binary_operator(PC_AND, left, right, outputReg, output); +} + +void gen_XOR(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + Type *type; + ENode *left; + ENode *right; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + type = expr->rtype; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_XOR(expr, outputReg, outputRegHi, output); + return; + } + + if (ENODE_IS(left, EINTCONST)) + or_xor_immediate(PC_XORI, right, left->data.intval.lo, outputReg, output); + else if (ENODE_IS(right, EINTCONST)) + or_xor_immediate(PC_XORI, left, right->data.intval.lo, outputReg, output); + else if (ENODE_IS(right, EBINNOT)) + binary_operator(PC_EQV, left, right->data.monadic, outputReg, output); + else if (ENODE_IS(left, EBINNOT)) + binary_operator(PC_EQV, left->data.monadic, right, outputReg, output); + else + binary_operator(PC_XOR, left, right, outputReg, output); +} + +void gen_OR(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + Type *type; + ENode *left; + ENode *right; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + type = expr->rtype; + + if (TYPE_IS_8BYTES(type)) { + I8_gen_OR(expr, outputReg, outputRegHi, output); + return; + } + + if (ENODE_IS(left, EINTCONST)) + or_xor_immediate(PC_ORI, right, left->data.intval.lo, outputReg, output); + else if (ENODE_IS(right, EINTCONST)) + or_xor_immediate(PC_ORI, left, right->data.intval.lo, outputReg, output); + else if (ENODE_IS(right, EBINNOT)) + binary_operator(PC_ORC, left, right->data.monadic, outputReg, output); + else if (ENODE_IS(left, EBINNOT)) + binary_operator(PC_ORC, right, left->data.monadic, outputReg, output); + else + binary_operator(PC_OR, left, right, outputReg, output); +} + +void gen_ASS(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + Type *type; + ENode *left; + ENode *right; + Operand opleft; + Operand opright; + Operand op2; + VarInfo *vi; + SInt32 incval; + short align; + short align2; + + type = expr->rtype; + if (ENODE_IS(expr, ECONDASS)) { + left = expr->data.cond.expr1; + if (ENODE_IS(left, EINDIRECT)) { + left = left->data.monadic; + } else { + CError_FATAL(1759); + } + right = expr->data.cond.expr2; + } else { + left = expr->data.diadic.left; + right = expr->data.diadic.right; + } + + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + memclrw(&op2, sizeof(Operand)); + + if (TYPE_IS_8BYTES(type)) { + I8_gen_ASS(expr, outputReg, outputRegHi, output); + return; + } + + if (ENODE_IS(left, EOBJREF) && OBJECT_REG(left->data.objref)) { + vi = Registers_GetVarInfo(left->data.objref); + GEN_NODE_TO_REG(right, vi->reg, 0, &opright); + switch (vi->rclass) { + case RegClass_GPR: + ENSURE_GPR(&opright, type, vi->reg); + output->optype = OpndType_GPR; + break; + case RegClass_FPR: + ENSURE_FPR(&opright, type, vi->reg); + output->optype = OpndType_FPR; + break; + case RegClass_VR: + ENSURE_VR(&opright, type, vi->reg); + output->optype = OpndType_VR; + break; + default: + CError_FATAL(1810); + } + if (opright.reg != vi->reg) { + PCodeArg a, b; + a.kind = PCOp_REGISTER; + a.arg = vi->rclass; + a.data.reg.reg = vi->reg; + a.data.reg.effect = EffectWrite; + b.kind = PCOp_REGISTER; + b.arg = vi->rclass; + b.data.reg.reg = opright.reg; + b.data.reg.effect = EffectRead; + appendpcode(pclastblock, makecopyinstruction(&b, &a)); + } + output->reg = vi->reg; + return; + } + + if (IS_TYPE_FLOAT(type)) { + GEN_NODE_TO_FPR(right, &opright, right->rtype, 0); + if (ispostincrementopportunity(left, &opleft, &incval)) { + indirect(&opleft, expr); + store_fp(opright.reg, &opleft, type); + add_register_immediate(opleft.reg, opleft.reg, incval); + } else { + GEN_NODE(left, &opleft); + indirect(&opleft, expr); + store_fp(opright.reg, &opleft, type); + } + output->optype = OpndType_FPR; + output->reg = opright.reg; + return; + } + + if (IS_TYPE_VECTOR(type)) { + GEN_NODE(right, &opright); + if (opright.optype == OpndType_Absolute) + ENSURE_VR(&opright, type, 0); + else + ENSURE_VR(&opright, right->rtype, 0); + + if (ispostincrementopportunity(left, &opleft, &incval)) { + indirect(&opleft, expr); + store_v(opright.reg, &opleft, type); + add_register_immediate(opleft.reg, opleft.reg, incval); + } else { + GEN_NODE(left, &opleft); + indirect(&opleft, expr); + store_v(opright.reg, &opleft, type); + } + output->optype = OpndType_VR; + output->reg = opright.reg; + return; + } + + if (TYPE_FITS_IN_REGISTER(type)) { + GEN_NODE_TO_GPR(right, &opright, right->rtype, 0); + + if (ENODE_IS(left, EBITFIELD)) { + GEN_NODE(left->data.monadic, &opleft); + indirect(&opleft, expr); + + op2 = opleft; + ENSURE_GPR(&op2, type, 0); + insert_bitfield(opright.reg, &op2, TYPE_BITFIELD(left->rtype)); + store(op2.reg, &opleft, type); + + if (!expr->ignored) + extract_bitfield(&op2, TYPE_BITFIELD(left->rtype), opright.reg, &opleft); + } else if (ispostincrementopportunity(left, &opleft, &incval)) { + indirect(&opleft, expr); + store(opright.reg, &opleft, type); + add_register_immediate(opleft.reg, opleft.reg, incval); + } else { + GEN_NODE(left, &opleft); + indirect(&opleft, expr); + store(opright.reg, &opleft, type); + } + + output->optype = OpndType_GPR; + output->reg = opright.reg; + return; + } + + GEN_NODE(right, &opright); + GEN_NODE(left, output); + + indirect(output, expr); + if (output->object) { + if (output->object->datatype == DLOCAL && (output->object->u.var.info->flags & VarInfoFlag1)) + align = CMach_ArgumentAlignment(type); + else + align = CMach_AllocationAlignment(type, output->object->qual); + } else { + align = CMach_AllocationAlignment(type, 0); + } + if (opright.object) { + if (opright.object->datatype == DLOCAL && (opright.object->u.var.info->flags & VarInfoFlag1)) + align2 = CMach_ArgumentAlignment(type); + else + align2 = CMach_AllocationAlignment(type, opright.object->qual); + } else { + align2 = CMach_AllocationAlignment(type, 0); + } + + if (align2 < align) + align = align2; + + move_block(output, &opright, type->size, align); +} + +ENode *evaluate_and_skip_comma(ENode *expr) { + Operand op; + ENode *inner; + + memclrw(&op, sizeof(Operand)); + while (ENODE_IS(expr, ECOMMA)) { + inner = expr->data.diadic.left; + GEN_NODE(inner, &op); + if (ENODE_IS(inner, EINDIRECT) && (op.flags & OpndFlags_Volatile)) { + if (TYPE_FITS_IN_REGISTER_2(inner->rtype)) { + ENSURE_GPR(&op, inner->rtype, 0); + } else if (IS_TYPE_FLOAT(inner->rtype)) { + ENSURE_FPR(&op, inner->rtype, 0); + } + } + expr = expr->data.diadic.right; + } + return expr; +} + +void gen_COMMA(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *left; + ENode *right; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + GEN_NODE(left, output); + + if (ENODE_IS(left, EINDIRECT) && (output->flags & OpndFlags_Volatile)) { + if (TYPE_FITS_IN_REGISTER_2(left->rtype)) { + ENSURE_GPR(output, left->rtype, 0); + } else if (IS_TYPE_FLOAT(left->rtype)) { + ENSURE_FPR(output, left->rtype, 0); + } + } + + GEN_NODE_TO_REG(right, outputReg, 0, output); +} + +void gen_TYPCON(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *inner; + Type *srctype; + Type *dsttype; + + inner = expr->data.monadic; + srctype = inner->rtype; + dsttype = expr->rtype; + + if (TYPE_IS_8BYTES(srctype) || TYPE_IS_8BYTES(dsttype)) { + I8_gen_TYPCON(expr, outputReg, outputRegHi, output); + return; + } + + if (IS_TYPE_VOID(dsttype)) { + GEN_NODE(inner, output); + if (ENODE_IS(inner, EINDIRECT) && (output->flags & OpndFlags_Volatile)) { + if (TYPE_FITS_IN_REGISTER_2(srctype)) { + ENSURE_GPR(output, srctype, 0); + } else if (IS_TYPE_FLOAT(srctype)) { + ENSURE_FPR(output, srctype, 0); + } + } + } else if (IS_TYPE_INT_OR_ENUM(srctype)) { + if (IS_TYPE_FLOAT(dsttype)) { + GEN_NODE(inner, output); + if (srctype->size < 4) + extend32(output, srctype, 0); + ENSURE_GPR(output, srctype, 0); + + if (is_unsigned(srctype)) + convert_unsigned_to_floating(output, dsttype->size == 4, outputReg); + else + convert_integer_to_floating(output, dsttype->size == 4, outputReg); + } else if (IS_TYPE_VECTOR(dsttype)) { + GEN_NODE_TO_REG(inner, outputReg, 0, output); + ENSURE_VR(output, dsttype, outputReg); + } else if ( + srctype->size < dsttype->size && + !ENODE_IS_INDIRECT_TO(inner, EBITFIELD) && + !ENODE_IS_ASSIGN_TO(inner, EBITFIELD) && + !(ENODE_IS_RANGE(inner, EPOSTINC, EPREDEC) && ENODE_IS(inner->data.monadic->data.monadic, EBITFIELD)) + ) { + GEN_NODE(inner, output); + extend32(output, srctype, outputReg); + } else if (dsttype->size < srctype->size || dsttype->size < 4) { + GEN_NODE(inner, output); + ENSURE_GPR(output, srctype, 0); + extend32(output, dsttype, outputReg); + } else { + GEN_NODE_TO_REG(inner, outputReg, 0, output); + } + } else if (IS_TYPE_POINTER(srctype)) { + GEN_NODE_TO_REG(inner, outputReg, 0, output); + if (dsttype->size < srctype->size) + ENSURE_GPR(output, srctype, outputReg); + } else if (IS_TYPE_FLOAT(srctype)) { + if (IS_TYPE_FLOAT(dsttype)) { + GEN_NODE_TO_REG(inner, outputReg, 0, output); + ENSURE_FPR(output, srctype, outputReg); + + if (dsttype->size == 4 && srctype->size != 4) { + int tmp = outputReg ? outputReg : ALLOC_FPR(); + emitpcode(PC_FRSP, tmp, output->reg); + output->optype = OpndType_FPR; + output->reg = tmp; + } + } else if (is_unsigned(dsttype) && dsttype->size == 4) { + GEN_NODE_TO_REG(inner, 1, 0, output); + ENSURE_FPR(output, srctype, 1); + convert_floating_to_unsigned(output, outputReg); + } else { + GEN_NODE_TO_REG(inner, 0, 0, output); + ENSURE_FPR(output, srctype, 0); + convert_floating_to_integer(output, outputReg); + } + } else if (IS_TYPE_VECTOR(srctype) && IS_TYPE_VECTOR(dsttype)) { + GEN_NODE_TO_REG(inner, outputReg, 0, output); + ENSURE_VR(output, srctype, outputReg); + } else if (srctype->size == dsttype->size) { + GEN_NODE_TO_REG(inner, outputReg, 0, output); + } else { + CError_FATAL(2224); + } +} + +void gen_BITFIELD(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + CError_FATAL(2238); +} + +void gen_INTCONST(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + if (TYPE_IS_8BYTES(expr->rtype)) { + I8_gen_INTCONST(expr, outputReg, outputRegHi, output); + return; + } + + output->optype = OpndType_Absolute; + output->immediate = CInt64_GetULong(&expr->data.intval); +} + +void gen_FLOATCONST(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + CError_FATAL(2294); +} + +void gen_STRINGCONST(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + CError_FATAL(2308); +} + +static Boolean COND_is_ABS_MatchNodes(ENode *cond, ENode *expr1, ENode *expr2) { + if (cond->type != expr1->type || cond->type != expr2->type) + return 0; + + if (!(TYPE_FITS_IN_REGISTER(cond->rtype) && TYPE_FITS_IN_REGISTER(expr1->rtype) && TYPE_FITS_IN_REGISTER(expr2->rtype))) + return 0; + + if (cond->rtype->size != expr1->rtype->size || cond->rtype->size != expr2->rtype->size) + return 0; + + switch (cond->type) { + case EOBJREF: + if (cond->data.objref != expr1->data.objref || cond->data.objref != expr2->data.objref) + return 0; + return 1; + case EINDIRECT: + case ETYPCON: + return COND_is_ABS_MatchNodes(cond->data.monadic, expr1->data.monadic, expr2->data.monadic); + default: + return 0; + } +} + +static ENode *COND_is_ABS(ENode *cond, ENode *expr1, ENode *expr2) { + ENode *tmp; + + int parity = 0; + while (ENODE_IS(cond, ELOGNOT)) { + parity = (parity + 1) & 1; + cond = cond->data.monadic; + } + + if (parity) { + tmp = expr1; + expr1 = expr2; + expr2 = tmp; + } + + switch (cond->type) { + case ELESS: + case ELESSEQU: + tmp = expr1; + expr1 = expr2; + expr2 = tmp; + break; + case EGREATER: + case EGREATEREQU: + break; + default: + return NULL; + } + + if (IS_INT_CONST_ZERO(cond->data.diadic.right)) { + cond = cond->data.diadic.left; + } else if (IS_INT_CONST_ZERO(cond->data.diadic.left)) { + cond = cond->data.diadic.left; + tmp = expr1; + expr1 = expr2; + expr2 = tmp; + } else { + return NULL; + } + + if (ENODE_IS(expr1, EADD) && ENODE_IS(expr2, ESUB)) { + if (COND_is_ABS_MatchNodes(cond, expr1->data.diadic.right, expr2->data.diadic.right)) + return expr1; + else + return NULL; + } + + if (!ENODE_IS(expr2, EMONMIN)) + return NULL; + + expr2 = expr2->data.monadic; + if (COND_is_ABS_MatchNodes(cond, expr1, expr2)) + return expr1; + + return NULL; +} + +static int COND_has_const(ENode *expr1, ENode *expr2) { + SInt32 diff; + int result = 0; + + if (IS_INT_CONST(expr1)) + result += 1; + if (IS_INT_CONST(expr2)) + result += 2; + + if (result & 1) { + if (IS_INT_CONST_ZERO(expr1)) + return 5; + } + if (result & 2) { + if (IS_INT_CONST_ZERO(expr2)) + return 6; + } + + if (result == 3) { + diff = expr1->data.intval.lo - expr2->data.intval.lo; + if (diff == 1 || diff == -1) + return 4; + } + + return result; +} + +static Boolean COND_is_COMPARE(ENode *cond, ENode *expr1, ENode *expr2, short outputReg, Operand *output) { + SInt32 left; + SInt32 right; + int parity; + int negate; + ENodeType nt; + + while (ENODE_IS(expr1, ETYPCON) && TYPE_FITS_IN_REGISTER(expr1->rtype)) + expr1 = expr1->data.monadic; + while (ENODE_IS(expr2, ETYPCON) && TYPE_FITS_IN_REGISTER(expr2->rtype)) + expr2 = expr2->data.monadic; + + if (!(ENODE_IS(expr1, EINTCONST) && TYPE_FITS_IN_REGISTER(expr1->rtype) && CInt64_IsInRange(expr1->data.intval, 4))) + return 0; + if (!(ENODE_IS(expr2, EINTCONST) && TYPE_FITS_IN_REGISTER(expr2->rtype) && CInt64_IsInRange(expr2->data.intval, 4))) + return 0; + + left = CInt64_GetULong(&expr1->data.intval); + right = CInt64_GetULong(&expr2->data.intval); + parity = 0; + negate = 0; + switch (left) { + case 1: + if (right != 0) + return 0; + break; + case 0: + parity = 1; + if (right == -1) + negate = 1; + else if (right != 1) + return 0; + break; + case -1: + if (right != 0) + return 0; + negate = 1; + break; + default: + return 0; + } + + while (ENODE_IS(cond, ELOGNOT)) { + parity = (parity + 1) & 1; + cond = cond->data.monadic; + } + + if (parity) { + nt = invert_relop(cond->type); + if (nt == cond->type) + return 0; + cond->type = nt; + } + + if (negate) + gen_negated_condition_gpr(cond, output, outputReg); + else + gen_condition_gpr(cond, output, outputReg); + + return 1; +} + +void gen_COND(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *cond; + ENode *expr1; + ENode *expr2; + Type *type; + PCodeLabel *label1; + PCodeLabel *label2; + PCodeLabel *label3; + Operand op1; + Operand op2; + int has_const; + int reg1; + int reg2; + int reg3; + short align; + short max_align; + + expr1 = expr->data.cond.expr1; + expr2 = expr->data.cond.expr2; + type = expr->rtype; + + label1 = makepclabel(); + label2 = makepclabel(); + label3 = makepclabel(); + + memclrw(&op1, sizeof(Operand)); + memclrw(&op2, sizeof(Operand)); + + cond = evaluate_and_skip_comma(expr->data.cond.cond); + + if (TOC_use_fsel(expr)) { + ENode *left; + ENode *right; + ENode *tmp; + ENodeType nt; + Boolean flag; + Operand op; + int fneg_reg; + int fneg_reg2; + int fneg_reg3; + int fsel_reg; + int final_reg; + + left = cond->data.diadic.left; + right = cond->data.diadic.right; + nt = cond->type; + flag = 0; + memclrw(&op, sizeof(Operand)); + + switch (nt) { + case EGREATEREQU: + case EEQU: + break; + case EGREATER: + tmp = left; + left = right; + right = tmp; + case ELESS: + case ENOTEQU: + tmp = expr1; + expr1 = expr2; + expr2 = tmp; + break; + case ELESSEQU: + tmp = left; + left = right; + right = tmp; + break; + default: + CError_FATAL(2780); + } + + if (ENODE_IS(left, EFLOATCONST) && CMach_FloatIsZero(left->data.floatval)) { + GEN_NODE(right, &op); + ENSURE_FPR(&op, right->rtype, 0); + flag = 1; + } else if (ENODE_IS(right, EFLOATCONST) && CMach_FloatIsZero(right->data.floatval)) { + GEN_NODE(left, &op); + ENSURE_FPR(&op, left->rtype, 0); + } else { + fp_binary_operator((type->size == 4) ? PC_FSUBS : PC_FSUB, left, right, 0, &op); + } + + switch (cond->type) { + case EEQU: + case ENOTEQU: + if (flag) { + GEN_NODE_TO_FPR(expr1, &op1, expr1->rtype, 0); + GEN_NODE_TO_FPR(expr2, &op2, expr2->rtype, 0); + + fneg_reg = ALLOC_FPR(); + emitpcode(PC_FNEG, fneg_reg, op.reg); + fsel_reg = ALLOC_FPR(); + emitpcode(PC_FSEL, fsel_reg, op.reg, op1.reg, op2.reg); + final_reg = outputReg ? outputReg : ALLOC_FPR(); + emitpcode(PC_FSEL, final_reg, fneg_reg, fsel_reg, op2.reg); + } else { + GEN_NODE_TO_FPR(expr1, &op1, expr1->rtype, 0); + GEN_NODE_TO_FPR(expr2, &op2, expr2->rtype, 0); + + fneg_reg2 = ALLOC_FPR(); + emitpcode(PC_FNEG, fneg_reg2, op.reg); + fsel_reg = ALLOC_FPR(); + emitpcode(PC_FSEL, fsel_reg, op.reg, op1.reg, op2.reg); + final_reg = outputReg ? outputReg : ALLOC_FPR(); + emitpcode(PC_FSEL, final_reg, fneg_reg2, fsel_reg, op2.reg); + } + break; + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + GEN_NODE_TO_FPR(expr1, &op1, expr1->rtype, 0); + GEN_NODE_TO_FPR(expr2, &op2, expr2->rtype, 0); + + fneg_reg3 = op.reg; + if (flag) { + fneg_reg3 = ALLOC_FPR(); + emitpcode(PC_FNEG, fneg_reg3, op.reg); + } + + final_reg = outputReg ? outputReg : ALLOC_FPR(); + emitpcode(PC_FSEL, final_reg, fneg_reg3, op1.reg, op2.reg); + break; + default: + CError_FATAL(2862); + } + + output->optype = OpndType_FPR; + output->reg = final_reg; + return; + } + + if (TOC_use_isel(expr, 1)) { + Operand isel_op1; + Operand isel_op2; + ENode *x; + ENode *y; + ENode *abs_expr; + + memclrw(&isel_op1, sizeof(Operand)); + memclrw(&isel_op2, sizeof(Operand)); + + if (COND_is_COMPARE(cond, expr1, expr2, outputReg, output)) + return; + + if ((abs_expr = COND_is_ABS(cond, expr1, expr2))) { + if (ENODE_IS(expr1, EADD) && ENODE_IS(expr2, ESUB)) { + x = expr1->data.diadic.left; + y = expr2->data.diadic.right; + if (y->hascall) { + GEN_NODE(y, &op2); + ENSURE_GPR(&op2, y->rtype, 0); + + GEN_NODE(x, &op1); + if (op1.optype >= OpndType_IndirectGPR_ImmOffset) + ENSURE_GPR(&op1, x->rtype, 0); + } else { + GEN_NODE(x, &op1); + if (op1.optype >= OpndType_IndirectGPR_ImmOffset) + ENSURE_GPR(&op1, x->rtype, 0); + + GEN_NODE(y, &op2); + ENSURE_GPR(&op2, y->rtype, 0); + } + + reg1 = ALLOC_GPR(); + emitpcode(PC_SRAWI, reg1, op2.reg, 31); + reg2 = ALLOC_GPR(); + emitpcode(PC_XOR, reg2, reg1, op2.reg); + reg3 = ALLOC_GPR(); + emitpcode(PC_SUBF, reg3, reg1, reg2); + op2.optype = OpndType_GPR; + op2.reg = reg3; + combine(&op1, &op2, outputReg, output); + } else { + GEN_NODE(abs_expr, output); + ENSURE_GPR(output, abs_expr->rtype, 0); + + reg1 = ALLOC_GPR(); + emitpcode(PC_SRAWI, reg1, output->reg, 31); + reg2 = ALLOC_GPR(); + emitpcode(PC_XOR, reg2, reg1, output->reg); + reg3 = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBF, reg3, reg1, reg2); + output->optype = OpndType_GPR; + output->reg = reg3; + } + return; + } + + if ((has_const = COND_has_const(expr1, expr2))) { + switch (COND_has_const(expr1, expr2)) { + case 0: + case 2: + break; + case 3: + case 4: + if (has_const == 4) { + if (expr1->data.intval.lo < expr2->data.intval.lo) + gen_negated_condition_gpr(cond, &isel_op1, 0); + else + gen_condition_gpr(cond, &isel_op1, 0); + + GEN_NODE(expr1, &op1); + GEN_NODE(expr2, &op2); + reg1 = ALLOC_GPR(); + ENSURE_GPR(&op2, expr2->rtype, reg1); + emitpcode(PC_ADD, reg1, isel_op1.reg, op2.reg); + if (outputReg) { + emitpcode(PC_MR, reg2 = outputReg, reg1); + reg1 = reg2; + } + output->optype = OpndType_GPR; + output->reg = reg1; + return; + } + break; + case 5: + case 6: + gen_negated_condition_gpr(cond, &isel_op1, 0); + ENSURE_GPR(&isel_op1, TYPE(&stunsignedint), 0); + GEN_NODE(expr1, &op1); + GEN_NODE(expr2, &op2); + + reg1 = outputReg ? outputReg : ALLOC_GPR(); + if (op1.optype == OpndType_Absolute && op1.immediate == 0) { + ENSURE_GPR(&op2, expr2->rtype, 0); + emitpcode(PC_ANDC, reg1, op2.reg, isel_op1.reg); + } else if (op2.optype == OpndType_Absolute && op2.immediate == 0) { + ENSURE_GPR(&op1, expr1->rtype, 0); + emitpcode(PC_AND, reg1, op1.reg, isel_op1.reg); + } else { + CError_FATAL(3119); + } + + output->optype = OpndType_GPR; + output->reg = reg1; + return; + case 1: + reg2 = ALLOC_GPR(); + reg1 = reg2; + logical_expression_nobranch(cond, 0, &isel_op2); + + GEN_NODE_TO_REG(expr1, reg1, 0, &op1); + ENSURE_GPR(&op1, expr1->rtype, reg1); + if (op1.reg != reg1) + emitpcode(PC_MR, reg1, op1.reg); + + branch_conditional(isel_op2.reg, isel_op2.regOffset, 1, label2); + branch_label(label1); + + GEN_NODE_TO_REG(expr2, reg1, 0, &op2); + ENSURE_GPR(&op2, expr2->rtype, reg1); + if (op2.reg != reg1) + emitpcode(PC_MR, reg1, op2.reg); + + branch_label(label2); + + if (outputReg) { + emitpcode(PC_MR, reg2 = outputReg, reg1); + reg1 = reg2; + } + + output->optype = OpndType_GPR; + output->reg = reg1; + return; + + default: + CError_FATAL(3168); + } + } + + reg1 = ALLOC_GPR(); + logical_expression_nobranch(cond, 0, &isel_op2); + + GEN_NODE_TO_REG(expr2, reg1, 0, &op2); + ENSURE_GPR(&op2, expr2->rtype, reg1); + if (op2.reg != reg1) + emitpcode(PC_MR, reg1, op2.reg); + + branch_conditional(isel_op2.reg, isel_op2.regOffset, 0, label2); + branch_label(label1); + + GEN_NODE_TO_REG(expr1, reg1, 0, &op1); + ENSURE_GPR(&op1, expr1->rtype, reg1); + if (op1.reg != reg1) + emitpcode(PC_MR, reg1, op1.reg); + + branch_label(label2); + + if (outputReg) { + emitpcode(PC_MR, reg2 = outputReg, reg1); + reg1 = reg2; + } + + output->optype = OpndType_GPR; + output->reg = reg1; + return; + } + + logical_expression(cond, label1, label2, label1); + branch_label(label1); + + if (IS_TYPE_VOID(type) || expr->ignored) { + GEN_NODE(expr1, &op1); + branch_always(label3); + branch_label(label2); + GEN_NODE(expr2, &op2); + } else if (IS_TYPE_FLOAT(type)) { + if (expr1->hascall || expr2->hascall) + reg1 = ALLOC_FPR(); + else + reg1 = outputReg ? outputReg : ALLOC_FPR(); + + GEN_NODE_TO_REG(expr1, reg1, 0, &op1); + ENSURE_FPR(&op1, expr1->rtype, reg1); + if (op1.reg != reg1) + emitpcode(PC_FMR, reg1, op1.reg); + + branch_always(label3); + branch_label(label2); + + GEN_NODE_TO_REG(expr2, reg1, 0, &op2); + ENSURE_FPR(&op2, expr2->rtype, reg1); + if (op2.reg != reg1) + emitpcode(PC_FMR, reg1, op2.reg); + + output->optype = OpndType_FPR; + output->reg = reg1; + } else if (TYPE_IS_8BYTES(type)) { + if (expr1->hascall || expr2->hascall) { + reg1 = ALLOC_GPR(); + reg3 = ALLOC_GPR(); + reg2 = reg3; + } else { + reg1 = outputReg ? outputReg : ALLOC_GPR(); + reg3 = outputRegHi ? outputRegHi : ALLOC_GPR(); + reg2 = reg3; + } + + GEN_NODE_TO_REG(expr1, reg1, reg2, &op1); + coerce_to_register_pair(&op1, expr1->rtype, reg1, reg2); + + branch_always(label3); + branch_label(label2); + + GEN_NODE_TO_REG(expr2, reg1, reg2, &op2); + coerce_to_register_pair(&op2, expr2->rtype, reg1, reg2); + + output->optype = OpndType_GPRPair; + output->reg = reg1; + output->regHi = reg2; + } else if (TYPE_FITS_IN_REGISTER(type)) { + if (expr1->hascall || expr2->hascall) + reg1 = ALLOC_GPR(); + else + reg1 = outputReg ? outputReg : ALLOC_GPR(); + + GEN_NODE_TO_REG(expr1, reg1, 0, &op1); + ENSURE_GPR(&op1, expr1->rtype, reg1); + if (op1.reg != reg1) + emitpcode(PC_MR, reg1, op1.reg); + + branch_always(label3); + branch_label(label2); + + GEN_NODE_TO_REG(expr2, reg1, 0, &op2); + ENSURE_GPR(&op2, expr2->rtype, reg1); + if (op2.reg != reg1) + emitpcode(PC_MR, reg1, op2.reg); + + output->optype = OpndType_GPR; + output->reg = reg1; + } else if (IS_TYPE_VECTOR(type)) { + if (expr1->hascall || expr2->hascall) + reg1 = ALLOC_VR(); + else + reg1 = outputReg ? outputReg : ALLOC_VR(); + + GEN_NODE_TO_REG(expr1, reg1, 0, &op1); + ENSURE_VR(&op1, expr1->rtype, reg1); + if (op1.reg != reg1) + emitpcode(PC_VMR, reg1, op1.reg); + + branch_always(label3); + branch_label(label2); + + GEN_NODE_TO_REG(expr2, reg1, 0, &op2); + ENSURE_VR(&op2, expr2->rtype, reg1); + if (op2.reg != reg1) + emitpcode(PC_VMR, reg1, op2.reg); + + output->optype = OpndType_VR; + output->reg = reg1; + } else { + symbol_operand(output, maketemporary(type)); + indirect(output, NULL); + coerce_to_addressable(output); + + GEN_NODE(expr1, &op1); + + if (op1.object) { + if (op1.object->datatype == DLOCAL && (op1.object->u.var.info->flags & VarInfoFlag1)) + align = CMach_ArgumentAlignment(type); + else + align = CMach_AllocationAlignment(type, op1.object->qual); + } else { + align = CMach_AllocationAlignment(type, 0); + } + + max_align = CMach_AllocationAlignment(type, 0); + if (align > max_align) + align = max_align; + + move_block(output, &op1, type->size, align); + + branch_always(label3); + branch_label(label2); + + GEN_NODE(expr2, &op2); + + if (op2.object) { + if (op2.object->datatype == DLOCAL && (op2.object->u.var.info->flags & VarInfoFlag1)) + align = CMach_ArgumentAlignment(type); + else + align = CMach_AllocationAlignment(type, op2.object->qual); + } else { + align = CMach_AllocationAlignment(type, 0); + } + + if (align > max_align) + align = max_align; + + move_block(output, &op2, type->size, align); + } + + branch_label(label3); +} + +static Boolean CONDASS_is_ABS(ENode *cond, ENode *expr1, ENode *expr2) { + ENode *inner; + + int parity = 0; + while (ENODE_IS(cond, ELOGNOT)) { + parity = (parity + 1) & 1; + cond = cond->data.monadic; + } + + if (IS_INT_CONST_ZERO(cond->data.diadic.right)) { + inner = cond->data.diadic.left; + } else if (IS_INT_CONST_ZERO(cond->data.diadic.left)) { + inner = cond->data.diadic.left; + parity = (parity + 1) & 1; + } else { + return 0; + } + + switch (cond->type) { + case EGREATER: + case EGREATEREQU: + if (!parity) + return 0; + break; + case ELESS: + case ELESSEQU: + if (parity) + return 0; + break; + default: + return 0; + } + + if (!ENODE_IS(expr2, EMONMIN)) + return 0; + + expr2 = expr2->data.monadic; + if (ENODE_IS(inner, EASS)) { + inner = inner->data.diadic.left; + if (!ENODE_IS(expr2, EINDIRECT)) + return 0; + expr2 = expr2->data.monadic; + if (!ENODE_IS(expr1, EINDIRECT)) + return 0; + expr1 = expr1->data.monadic; + } + + return COND_is_ABS_MatchNodes(inner, expr1, expr2); +} + +static int CONDASS_is_OPASS_One(ENode *a, ENode *b, SInt32 *value, ENodeType *nodetype) { + Type *type; + + type = a->rtype; + if (!ENODE_IS(a, EINDIRECT)) + return 0; + a = a->data.monadic; + if (!ENODE_IS(a, EOBJREF)) + return 0; + + if (ENODE_IS(b, ETYPCON) && b->rtype == type) + b = b->data.monadic; + + if (b->type != EOR && b->type != EADD && b->type != ESUB) + return 0; + + *nodetype = b->type; + if (!IS_INT_CONST(b->data.diadic.right)) + return 0; + *value = b->data.diadic.right->data.intval.lo; + + if (*value != 1 && *value != -1) + return 0; + + b = b->data.diadic.left; + if (ENODE_IS(b, ETYPCON) && TYPE_FITS_IN_REGISTER(b->rtype)) + b = b->data.monadic; + + if (!ENODE_IS(b, EINDIRECT)) + return 0; + b = b->data.monadic; + if (!ENODE_IS(b, EOBJREF)) + return 0; + + if (a->data.objref == b->data.objref) + return 1; + return 0; +} + +void gen_CONDASS(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *cond; + ENode *expr1; + ENode *expr2; + Type *type; + PCodeLabel *label1; + PCodeLabel *label2; + Operand op1; + Operand op2; + Operand op3; + int reg1; + int reg2; + + expr1 = expr->data.cond.expr1; + expr2 = expr->data.cond.expr2; + type = expr->rtype; + + label1 = makepclabel(); + label2 = makepclabel(); + + memclrw(&op1, sizeof(Operand)); + memclrw(&op2, sizeof(Operand)); + memclrw(&op3, sizeof(Operand)); + + cond = evaluate_and_skip_comma(expr->data.cond.cond); + + if (TOC_use_fsel(expr)) { + ENode *left; + ENode *right; + ENode *tmp; + ENodeType nt; + Boolean flag; + Boolean flag2; + Operand op; + int tmpreg; + int fneg_reg; + int fsel_reg; + int final_reg; + + left = cond->data.diadic.left; + right = cond->data.diadic.right; + nt = cond->type; + flag = 0; + memclrw(&op, sizeof(Operand)); + + CError_ASSERT(3704, ENODE_IS(expr1, EINDIRECT)); + CError_ASSERT(3705, ENODE_IS(expr1->data.monadic, EOBJREF)); + + tmpreg = OBJECT_REG(expr1->data.monadic->data.objref); + final_reg = outputReg ? tmpreg : ALLOC_FPR(); + + switch (nt) { + case EGREATER: + tmp = left; + left = right; + right = tmp; + case ELESS: + case ENOTEQU: + tmp = expr1; + expr1 = expr2; + expr2 = tmp; + flag2 = 1; + break; + case ELESSEQU: + tmp = left; + left = right; + right = tmp; + flag2 = 0; + break; + case EGREATEREQU: + case EEQU: + flag2 = 0; + break; + default: + CError_FATAL(3744); + } + + if (ENODE_IS(left, EFLOATCONST) && CMach_FloatIsZero(left->data.floatval)) { + GEN_NODE(right, &op); + ENSURE_FPR(&op, right->rtype, 0); + flag = 1; + } else if (ENODE_IS(right, EFLOATCONST) && CMach_FloatIsZero(right->data.floatval)) { + GEN_NODE(left, &op); + ENSURE_FPR(&op, left->rtype, 0); + } else { + fp_binary_operator((type->size == 4) ? PC_FSUBS : PC_FSUB, left, right, 0, &op); + } + + switch (cond->type) { + case EEQU: + case ENOTEQU: + if (flag) { + GEN_NODE(expr1, &op1); + op3 = op1; + ENSURE_FPR(&op1, expr1->rtype, 0); + + GEN_NODE_TO_FPR(expr2, &op2, expr2->rtype, 0); + + fneg_reg = ALLOC_FPR(); + emitpcode(PC_FNEG, fneg_reg, op.reg); + fsel_reg = ALLOC_FPR(); + emitpcode(PC_FSEL, fsel_reg, op.reg, op2.reg, op1.reg); + emitpcode(PC_FSEL, final_reg, fneg_reg, op2.reg, fsel_reg); + } else { + GEN_NODE(expr1, &op1); + op3 = op1; + ENSURE_FPR(&op1, expr1->rtype, 0); + + GEN_NODE_TO_FPR(expr2, &op2, expr2->rtype, 0); + + fneg_reg = ALLOC_FPR(); + emitpcode(PC_FNEG, fneg_reg, op.reg); + fsel_reg = ALLOC_FPR(); + emitpcode(PC_FSEL, fsel_reg, op.reg, op2.reg, op1.reg); + emitpcode(PC_FSEL, final_reg, fneg_reg, op2.reg, fsel_reg); + } + break; + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + GEN_NODE(expr1, &op1); + GEN_NODE(expr2, &op2); + op3 = flag2 ? op2 : op1; + + ENSURE_FPR(&op1, expr1->rtype, 0); + ENSURE_FPR(&op2, expr2->rtype, 0); + + fneg_reg = op.reg; + if (flag) { + fneg_reg = ALLOC_FPR(); + emitpcode(PC_FNEG, fneg_reg, op.reg); + } + + emitpcode(PC_FSEL, final_reg, fneg_reg, op2.reg, op1.reg); + break; + default: + CError_FATAL(2862); + } + + if (op3.optype != OpndType_FPR) + store_fp(final_reg, &op3, type); + + output->optype = OpndType_FPR; + output->reg = final_reg; + return; + } + + if (TOC_use_isel(expr, 1)) { + Operand isel_op; + ENode *x; + ENode *y; + ENode *abs_expr; + + memclrw(&isel_op, sizeof(Operand)); + CError_ASSERT(3966, ENODE_IS(expr1, EINDIRECT)); + CError_ASSERT(3968, ENODE_IS(expr1->data.monadic, EOBJREF)); + + if (CONDASS_is_ABS(cond, expr1, expr2)) { + if (ENODE_IS(cond->data.diadic.left, EASS)) + GEN_NODE(cond->data.diadic.left, &isel_op); + else if (ENODE_IS(cond->data.diadic.right, EASS)) + GEN_NODE(cond->data.diadic.right, &isel_op); + + outputReg = OBJECT_REG(expr1->data.monadic->data.objref); + CError_ASSERT(3979, outputReg); + + GEN_NODE(expr1, &op1); + op3 = op1; + + CError_ASSERT(3986, op3.optype == OpndType_GPR && op3.reg == outputReg); + + ENSURE_GPR(&op1, expr1->rtype, 0); + if (expr1->rtype->size < 4) + extend32(output, expr1->rtype, op3.reg); + + reg1 = ALLOC_GPR(); + reg2 = ALLOC_GPR(); + emitpcode(PC_SRAWI, reg1, op1.reg, 31); + emitpcode(PC_XOR, reg2, reg1, op1.reg); + emitpcode(PC_SUBF, outputReg, reg1, reg2); + output->optype = OpndType_GPR; + output->reg = op3.reg; + + if (expr1->rtype->size < 4) + extend32(output, expr1->rtype, op3.reg); + + return; + } + } + + logical_expression(cond, label1, label2, label1); + branch_label(label1); + gen_ASS(expr, outputReg, outputRegHi, output); + branch_label(label2); +} + +void gen_FUNCCALL(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + if (is_intrinsic_function_call(expr)) + call_intrinsic_function(expr, outputReg, output); + else + call_function(expr, output); +} + +void gen_OBJREF(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + symbol_operand(output, expr->data.objref); +} + +void gen_UNEXPECTED(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + CError_FATAL(4160); +} + +static int small(ENode *expr) { + Type *type; + + type = expr->rtype; + if (!ENODE_IS(expr, ETYPCON)) + return 0; + + do { + expr = expr->data.monadic; + } while (ENODE_IS(expr, ETYPCON) && (type = expr->rtype)->size == 4); + + return IS_TYPE_INT_OR_ENUM(type) && ((type->size < 2) || (type->size == 2 && !is_unsigned(type))); +} + +void binary_operator(Opcode opcode, ENode *left, ENode *right, short outputReg, Operand *output) { + Operand opleft; + Operand opright; + int reg; + + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE_TO_GPR(right, &opright, right->rtype, 0); + GEN_NODE_TO_GPR(left, &opleft, left->rtype, 0); + } else { + GEN_NODE_TO_GPR(left, &opleft, left->rtype, 0); + GEN_NODE_TO_GPR(right, &opright, right->rtype, 0); + } + + reg = outputReg ? outputReg : ALLOC_GPR(); + + if (opcode == PC_MULLW && small(left)) + emitpcode(opcode, reg, opright.reg, opleft.reg); + else + emitpcode(opcode, reg, opleft.reg, opright.reg); + + output->optype = OpndType_GPR; + output->reg = reg; +} + +static void binary_immediate(Opcode opcode, ENode *left, SInt32 value, short outputReg, Operand *output) { + Operand opleft; + int reg; + + memclrw(&opleft, sizeof(Operand)); + GEN_NODE_TO_GPR(left, &opleft, left->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_GPR(); + + if (opcode == PC_MULLI && value == 0) + emitpcode(PC_LI, reg, 0); + else if (opcode == PC_MULLI && value == 1) + emitpcode(PC_MR, reg, opleft.reg); + else + emitpcode(opcode, reg, opleft.reg, value); + + output->optype = OpndType_GPR; + output->reg = reg; +} + +void unary_operator(Opcode opcode, ENode *expr, short outputReg, Operand *output) { + Operand op; + int reg; + + memclrw(&op, sizeof(Operand)); + GEN_NODE_TO_GPR(expr, &op, expr->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(opcode, reg, op.reg); + + output->optype = OpndType_GPR; + output->reg = reg; +} + +static void or_xor_immediate(Opcode opcode, ENode *expr, SInt32 value, short outputReg, Operand *output) { + Operand op; + int reg; + + memclrw(&op, sizeof(Operand)); + GEN_NODE_TO_GPR(expr, &op, expr->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_GPR(); + + if (expr->rtype->size > 2 && value != (value & 0xFFFF)) { + if (value & 0xFFFF) { + emitpcode((opcode == PC_ORI) ? PC_ORIS : PC_XORIS, reg, op.reg, value >> 16); + emitpcode(opcode, reg, reg, value & 0xFFFF); + } else { + emitpcode((opcode == PC_ORI) ? PC_ORIS : PC_XORIS, reg, op.reg, value >> 16); + } + } else { + emitpcode(opcode, reg, op.reg, value & 0xFFFF); + } + + output->optype = OpndType_GPR; + output->reg = reg; +} + +static void shift_left_immediate(ENode *expr, short shift, short negate, short outputReg, Operand *output) { + Operand op; + int reg; + + memclrw(&op, sizeof(Operand)); + GEN_NODE_TO_GPR(expr, &op, expr->rtype, 0); + + if (negate) + reg = ALLOC_GPR(); + else + reg = outputReg ? outputReg : ALLOC_GPR(); + + emitpcode(PC_RLWINM, reg, op.reg, shift & 31, 0, 31 - (shift & 31)); + + if (negate) { + int tmp = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_NEG, tmp, reg); + reg = tmp; + } + + output->optype = OpndType_GPR; + output->reg = reg; +} + +static void shift_right_immediate(ENode *expr, Type *type, short shift, short outputReg, Operand *output) { + Operand op; + int reg; + + memclrw(&op, sizeof(Operand)); + GEN_NODE_TO_GPR(expr, &op, expr->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_GPR(); + + if (is_unsigned(type)) + emitpcode(PC_RLWINM, reg, op.reg, (32 - (shift & 31)) & 31, (shift & 31) + (32 - (type->size * 8)), 31); + else + emitpcode(PC_SRAWI, reg, op.reg, shift & 31); + + output->optype = OpndType_GPR; + output->reg = reg; +} + +static void signed_divide_by_power_of_2(ENode *expr, int shift, int negate, short outputReg, Operand *output) { + Operand op; + int reg; + int tmpreg1; + int tmpreg2; + + memclrw(&op, sizeof(Operand)); + GEN_NODE_TO_GPR(expr, &op, expr->rtype, 0); + + if (!copts.optimizesize && shift == 1) { + tmpreg1 = ALLOC_GPR(); + emitpcode(PC_RLWINM, tmpreg1, op.reg, 1, 31, 31); + tmpreg2 = ALLOC_GPR(); + emitpcode(PC_ADD, tmpreg2, tmpreg1, op.reg); + reg = (outputReg && !negate) ? outputReg : ALLOC_GPR(); + emitpcode(PC_SRAWI, reg, tmpreg2, 1); + } else { + tmpreg1 = ALLOC_GPR(); + emitpcode(PC_SRAWI, tmpreg1, op.reg, shift); + reg = (outputReg && !negate) ? outputReg : ALLOC_GPR(); + emitpcode(PC_ADDZE, reg, tmpreg1); + } + + if (negate) { + int prevreg = reg; + reg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_NEG, reg, prevreg); + } + + output->optype = OpndType_GPR; + output->reg = reg; +} + +static void signed_mod_by_power_of_2(ENode *expr, int shift, int negate, short outputReg, Operand *output) { + Operand op; + int reg; + int tmpreg1; + int tmpreg2; + int tmpreg3; + int tmpreg4; + + memclrw(&op, sizeof(Operand)); + GEN_NODE_TO_GPR(expr, &op, expr->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_GPR(); + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + tmpreg3 = ALLOC_GPR(); + + if (shift == 1) { + emitpcode(PC_RLWINM, tmpreg1, op.reg, 1, 31, 31); + emitpcode(PC_RLWINM, tmpreg2, op.reg, 0, 31, 31); + emitpcode(PC_XOR, tmpreg3, tmpreg2, tmpreg1); + emitpcode(PC_SUBF, reg, tmpreg1, tmpreg3); + } else { + tmpreg4 = ALLOC_GPR(); + emitpcode(PC_RLWINM, tmpreg1, op.reg, 32 - shift, 0, 31 - (32 - shift)); + emitpcode(PC_RLWINM, tmpreg2, op.reg, 1, 31, 31); + emitpcode(PC_SUBF, tmpreg3, tmpreg2, tmpreg1); + emitpcode(PC_RLWINM, tmpreg4, tmpreg3, shift, 0, 31); + emitpcode(PC_ADD, reg, tmpreg4, tmpreg2); + } + + output->optype = OpndType_GPR; + output->reg = reg; +} + +static void fp_binary_operator(Opcode opcode, ENode *left, ENode *right, short outputReg, Operand *output) { + Operand opleft; + Operand opright; + int reg; + + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE_TO_FPR(right, &opright, right->rtype, 0); + GEN_NODE_TO_FPR(left, &opleft, left->rtype, 0); + } else { + GEN_NODE_TO_FPR(left, &opleft, left->rtype, 0); + GEN_NODE_TO_FPR(right, &opright, right->rtype, 0); + } + + reg = outputReg ? outputReg : ALLOC_FPR(); + emitpcode(opcode, reg, opleft.reg, opright.reg); + + output->optype = OpndType_FPR; + output->reg = reg; +} + +void fp_unary_operator(Opcode opcode, ENode *expr, short outputReg, Operand *output) { + Operand op; + int reg; + + memclrw(&op, sizeof(Operand)); + GEN_NODE_TO_FPR(expr, &op, expr->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_FPR(); + emitpcode(opcode, reg, op.reg); + + output->optype = OpndType_FPR; + output->reg = reg; +} + +void fp_multiply_add(Opcode opcode, ENode *a, ENode *b, ENode *c, short outputReg, Operand *output) { + Operand opA; + Operand opB; + Operand opC; + int reg; + + memclrw(&opA, sizeof(Operand)); + memclrw(&opB, sizeof(Operand)); + memclrw(&opC, sizeof(Operand)); + + if (c->hascall) { + GEN_NODE_TO_FPR(c, &opC, c->rtype, 0); + if (b->hascall) { + GEN_NODE_TO_FPR(b, &opB, b->rtype, 0); + GEN_NODE_TO_FPR(a, &opA, a->rtype, 0); + } else { + GEN_NODE_TO_FPR(a, &opA, a->rtype, 0); + GEN_NODE_TO_FPR(b, &opB, b->rtype, 0); + } + } else { + if (b->hascall) { + GEN_NODE_TO_FPR(b, &opB, b->rtype, 0); + GEN_NODE_TO_FPR(a, &opA, a->rtype, 0); + GEN_NODE_TO_FPR(c, &opC, c->rtype, 0); + } else { + GEN_NODE_TO_FPR(a, &opA, a->rtype, 0); + GEN_NODE_TO_FPR(b, &opB, b->rtype, 0); + GEN_NODE_TO_FPR(c, &opC, c->rtype, 0); + } + } + + reg = outputReg ? outputReg : ALLOC_FPR(); + emitpcode(opcode, reg, opA.reg, opB.reg, opC.reg); + + output->optype = OpndType_FPR; + output->reg = reg; +} + +void gen_COMPARE(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + expr = evaluate_and_skip_comma(expr); + if (TYPE_IS_8BYTES(expr->data.diadic.right->rtype) || TYPE_IS_8BYTES(expr->data.diadic.left->rtype)) + I8_gen_condition(expr, output, 1); + else + gen_condition_gpr(expr, output, outputReg); +} + +void gen_LOGICAL(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *inner; + ENodeType op; + + expr = evaluate_and_skip_comma(expr); + inner = evaluate_and_skip_comma(expr->data.monadic); + expr->data.monadic = inner; + + if (ENODE_IS(expr, ELOGNOT) && !ENODE_IS2(inner, ELAND, ELOR)) { + op = inner->type; + if (ENODE_IS(inner, ELOGNOT)) { + switch (inner->data.monadic->type) { + case ELOGNOT: + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + case EEQU: + case ENOTEQU: + case ELAND: + case ELOR: + GEN_NODE(inner->data.monadic, output); + if (expr->data.monadic->rtype->size < 4) + extend32(output, expr->data.monadic->rtype, 0); + ENSURE_GPR(output, expr->data.monadic->rtype, 0); + return; + } + } + + if (ENODE_IS(inner, ENOTEQU) && !TYPE_IS_8BYTES(inner->data.diadic.left->rtype) && ENODE_IS(inner->data.diadic.right, EINTCONST) && inner->data.diadic.right->data.intval.lo == 0) { + int tmpreg1; + int tmpreg2; + GEN_NODE(inner->data.diadic.left, output); + if (inner->data.diadic.left->rtype->size < 4) + extend32(output, inner->data.diadic.left->rtype, 0); + ENSURE_GPR(output, inner->data.diadic.left->rtype, 0); + + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + CError_ASSERT(4853, output->optype == OpndType_GPR); + + emitpcode(PC_CNTLZW, tmpreg2, output->reg); + emitpcode(PC_RLWINM, tmpreg1, tmpreg2, 27, 5, 31); + output->optype = OpndType_GPR; + output->reg = tmpreg1; + } else { + int tmpreg1; + int tmpreg2; + ENodeType inverted; + + inverted = invert_relop(op); + if (op != inverted && !IS_TYPE_FLOAT(inner->data.diadic.left->rtype)) { + inner->type = inverted; + gen_COMPARE(inner, 0, 0, output); + inner->type = inverted; + return; + } + + GEN_NODE(inner, output); + if (inner->rtype->size < 4) + extend32(output, inner->rtype, 0); + ENSURE_GPR(output, inner->rtype, 0); + + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + CError_ASSERT(4883, output->optype == OpndType_GPR); + + emitpcode(PC_CNTLZW, tmpreg2, output->reg); + emitpcode(PC_RLWINM, tmpreg1, tmpreg2, 27, 5, 31); + output->optype = OpndType_GPR; + output->reg = tmpreg1; + } + } else { + PCodeLabel *label1; + PCodeLabel *label2; + int tmpreg; + + label1 = makepclabel(); + label2 = makepclabel(); + + tmpreg = ALLOC_GPR(); + emitpcode(PC_LI, tmpreg, 0); + logical_expression(expr, label1, label2, label1); + branch_label(label1); + emitpcode(PC_LI, tmpreg, 1); + branch_label(label2); + output->optype = OpndType_GPR; + output->reg = tmpreg; + } +} + +void gen_NULLCHECK(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *left; + ENode *right; + Operand opleft; + Operand opright; + PrecomputedOperand *precomp; + int flag; + int reg; + PCodeLabel *label; + + left = expr->data.nullcheck.nullcheckexpr; + right = expr->data.nullcheck.condexpr; + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + flag = !IS_TYPE_VOID(expr->rtype) && !expr->ignored; + + GEN_NODE(left, &opleft); + if (left->rtype->size < 4) + extend32(&opleft, left->rtype, 0); + ENSURE_GPR(&opleft, left->rtype, 0); + + precomp = lalloc(sizeof(PrecomputedOperand)); + precomp->precompid = expr->data.nullcheck.precompid; + precomp->operand = opleft; + precomp->next = precomputedoperands; + precomputedoperands = precomp; + + emitpcode(PC_CMPI, 0, opleft.reg, 0); + if (flag) { + emitpcode(PC_MR, reg = ALLOC_GPR(), opleft.reg); + } + + label = makepclabel(); + branch_conditional(0, EEQU, 1, label); + GEN_NODE(right, &opright); + precomputedoperands = precomputedoperands->next; + + if (flag) { + ENSURE_GPR(&opright, right->rtype, reg); + if (opright.reg != reg) + emitpcode(PC_MR, reg, opright.reg); + output->optype = OpndType_GPR; + output->reg = reg; + } + + branch_label(label); +} + +void gen_PRECOMP(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + PrecomputedOperand *precomp; + + for (precomp = precomputedoperands; precomp; precomp = precomp->next) { + if (precomp->precompid == expr->data.precompid) + break; + } + + *output = precomp->operand; +} + +void logical_expression(ENode *cond, PCodeLabel *if_true, PCodeLabel *if_false, PCodeLabel *end) { + PCodeLabel *label; + Operand op; + memclrw(&op, sizeof(Operand)); + + cond = evaluate_and_skip_comma(cond); + switch (cond->type) { + case ELAND: + label = makepclabel(); + logical_expression(cond->data.diadic.left, label, if_false, label); + branch_label(label); + logical_expression(cond->data.diadic.right, if_true, if_false, end); + break; + case ELOR: + label = makepclabel(); + logical_expression(cond->data.diadic.left, if_true, label, label); + branch_label(label); + logical_expression(cond->data.diadic.right, if_true, if_false, end); + break; + case ELOGNOT: + logical_expression(cond->data.monadic, if_false, if_true, end); + break; + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + case EEQU: + case ENOTEQU: + if (TYPE_IS_8BYTES(cond->data.diadic.right->rtype) || TYPE_IS_8BYTES(cond->data.diadic.left->rtype)) + I8_gen_condition(cond, &op, 0); + else + gen_condition(cond, &op); + + if (end == if_true) + branch_conditional(op.reg, op.regOffset, 0, if_false); + else + branch_conditional(op.reg, op.regOffset, 1, if_true); + break; + default: + CError_FATAL(5160); + } +} + +static void logical_expression_nobranch(ENode *cond, Boolean invert, Operand *output) { + cond = evaluate_and_skip_comma(cond); + switch (cond->type) { + case ELOGNOT: + logical_expression_nobranch(cond->data.monadic, 1, output); + break; + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + case EEQU: + case ENOTEQU: + if (invert) { + ENodeType nt = invert_relop(cond->type); + CError_ASSERT(5190, nt != cond->type); + cond->type = nt; + } + + if (TYPE_IS_8BYTES(cond->data.diadic.right->rtype) || TYPE_IS_8BYTES(cond->data.diadic.left->rtype)) + I8_gen_condition(cond, output, 0); + else + gen_condition(cond, output); + + break; + default: + CError_FATAL(5206); + } +} + +static ENodeType invert_relop(ENodeType nt) { + switch (nt) { + case ELESS: return EGREATEREQU; + case EGREATER: return ELESSEQU; + case ELESSEQU: return EGREATER; + case EGREATEREQU: return ELESS; + case EEQU: return ENOTEQU; + case ENOTEQU: return EEQU; + default: return nt; + } +} + +static int reverse_relop(int nt) { + switch (nt) { + case ELESS: return EGREATER; + case EGREATER: return ELESS; + case ELESSEQU: return EGREATEREQU; + case EGREATEREQU: return ELESSEQU; + default: return nt; + } +} + +void gen_condition(ENode *cond, Operand *output) { + ENode *left; + ENode *right; + + left = cond->data.diadic.left; + right = cond->data.diadic.right; + if (IS_TYPE_FLOAT(left->rtype)) { + compare_floating(cond->type, left, right, output); + return; + } + + if (ENODE_IS(right, EINTCONST)) { + if (is_unsigned(left->rtype)) { + UInt32 val = right->data.intval.lo; + if (FITS_IN_USHORT(val)) { + compare_immediate(cond->type, left, val, output); + return; + } else if (ENODE_IS2(cond, EEQU, ENOTEQU)) { + compare_immediate_long(cond->type, left, val, output); + return; + } + } else { + UInt32 val = right->data.intval.lo; + if (FITS_IN_SHORT(val)) { + compare_immediate(cond->type, left, val, output); + return; + } else if (ENODE_IS2(cond, EEQU, ENOTEQU)) { + compare_immediate_long(cond->type, left, val, output); + return; + } + } + } else if (ENODE_IS(left, EINTCONST)) { + if (is_unsigned(right->rtype)) { + UInt32 val = left->data.intval.lo; + if (FITS_IN_USHORT(val)) { + compare_immediate(reverse_relop(cond->type), right, val, output); + return; + } else if (ENODE_IS2(cond, EEQU, ENOTEQU)) { + compare_immediate_long(reverse_relop(cond->type), right, val, output); + return; + } + } else { + UInt32 val = left->data.intval.lo; + if (FITS_IN_SHORT(val)) { + compare_immediate(reverse_relop(cond->type), right, val, output); + return; + } else if (ENODE_IS2(cond, EEQU, ENOTEQU)) { + compare_immediate_long(reverse_relop(cond->type), right, val, output); + return; + } + } + } + + compare_integer(cond->type, left, right, output); +} + +void gen_condition_gpr(ENode *cond, Operand *output, short outputReg) { + ENode *left; + ENode *right; + Operand condOp; + int finalReg; + int tmpReg; + int tmpReg2; + int tmpReg3; + int tmpReg4; + int a; + int b; + + left = cond->data.diadic.left; + right = cond->data.diadic.right; + memclrw(&condOp, sizeof(Operand)); + + if (!IS_TYPE_FLOAT(left->rtype)) { + Operand op1; + Operand op2; + Operand opTmp; + memclrw(&op1, sizeof(Operand)); + memclrw(&op2, sizeof(Operand)); + memclrw(&opTmp, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE(right, &op2); + if (!IS_INT_CONST_ZERO(right)) { + if (right->rtype->size < 4) + extend32(&op2, right->rtype, 0); + ENSURE_GPR(&op2, right->rtype, 0); + } + + GEN_NODE(left, &op1); + if (left->rtype->size < 4) + extend32(&op1, left->rtype, 0); + ENSURE_GPR(&op1, left->rtype, 0); + } else { + GEN_NODE(left, &op1); + ENSURE_GPR(&op1, left->rtype, 0); + if (left->rtype->size < 4) + extend32(&op1, left->rtype, 0); + + GEN_NODE(right, &op2); + if (!IS_INT_CONST_ZERO(right)) { + if (right->rtype->size < 4) + extend32(&op2, right->rtype, 0); + ENSURE_GPR(&op2, right->rtype, 0); + } + } + + switch (cond->type) { + case EEQU: + if ( + copts.peephole && + IS_INT_CONST(right) && + pclastblock->pcodeCount > 0 && + pclastblock->lastPCode->op == PC_RLWINM && + pclastblock->lastPCode->args[0].data.reg.reg == op1.reg + ) + { + PCode *pc = pclastblock->lastPCode; + SInt32 a = pc->args[2].data.imm.value; + SInt32 b = pc->args[3].data.imm.value; + SInt32 value = right->data.intval.lo; + if (b == pc->args[4].data.imm.value) { + finalReg = outputReg ? outputReg : ALLOC_GPR(); + + if (value != (value & 1)) { + emitpcode(PC_LI, finalReg, 0); + } else if (value == 0) { + tmpReg = ALLOC_GPR(); + emitpcode( + PC_RLWINM, tmpReg, + pc->args[1].data.reg.reg, + (a + b + 1) & 31, 31, 31); + emitpcode(PC_XORI, finalReg, tmpReg, 1); + } else if (value == 1) { + emitpcode( + PC_RLWINM, finalReg, + pc->args[1].data.reg.reg, + (a + b + 1) & 31, 31, 31); + } else { + CError_FATAL(5434); + } + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + } + + if (IS_INT_CONST_ZERO(right)) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_CNTLZW, tmpReg, op1.reg, 0); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, finalReg, tmpReg, 27, 5, 31); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } else { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg, op1.reg, op2.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_CNTLZW, tmpReg2, tmpReg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, finalReg, tmpReg2, 27, 5, 31); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + case ENOTEQU: + if ( + copts.peephole && + IS_INT_CONST(right) && + pclastblock->pcodeCount > 0 && + pclastblock->lastPCode->op == PC_RLWINM && + pclastblock->lastPCode->args[0].data.reg.reg == op1.reg + ) + { + PCode *pc = pclastblock->lastPCode; + SInt32 a = pc->args[2].data.imm.value; + SInt32 b = pc->args[3].data.imm.value; + SInt32 value = right->data.intval.lo; + if (b == pc->args[4].data.imm.value) { + finalReg = outputReg ? outputReg : ALLOC_GPR(); + + if (value != (value & 1)) { + emitpcode(PC_LI, finalReg, 1); + } else if (value == 0) { + emitpcode( + PC_RLWINM, finalReg, + pc->args[1].data.reg.reg, + (a + b + 1) & 31, 31, 31); + } else if (value == 1) { + tmpReg = ALLOC_GPR(); + emitpcode( + PC_RLWINM, tmpReg, + pc->args[1].data.reg.reg, + (a + b + 1) & 31, 31, 31); + emitpcode(PC_XORI, finalReg, tmpReg, 1); + } else { + CError_FATAL(5503); + } + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + } + + if (IS_INT_CONST_ZERO(right)) { + if (copts.optimizesize) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_ADDIC, tmpReg, op1.reg, -1); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBFE, finalReg, tmpReg, op1.reg); + } else { + tmpReg = ALLOC_GPR(); + emitpcode(PC_NEG, tmpReg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_OR, tmpReg2, tmpReg, op1.reg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, finalReg, tmpReg2, 1, 31, 31); + } + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + if (copts.optimizesize) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg, op1.reg, op2.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_ADDIC, tmpReg2, tmpReg, -1); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBFE, finalReg, tmpReg2, tmpReg); + } else { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg, op1.reg, op2.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg2, op2.reg, op1.reg); + tmpReg3 = ALLOC_GPR(); + emitpcode(PC_OR, tmpReg3, tmpReg, tmpReg2); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, finalReg, tmpReg3, 1, 31, 31); + } + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + + case EGREATEREQU: + if (!is_unsigned(left->rtype) && IS_INT_CONST_ZERO(right)) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_RLWINM, tmpReg, op1.reg, 1, 31, 31); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_XORI, finalReg, tmpReg, 1); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + opTmp = op2; + op2 = op1; + op1 = opTmp; + + case ELESSEQU: + if (is_unsigned(left->rtype)) { + if (copts.optimizesize) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_LI, tmpReg, -1); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_SUBFC, tmpReg2, op1.reg, op2.reg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBFZE, finalReg, tmpReg); + } else { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg, op1.reg, op2.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_ORC, tmpReg2, op2.reg, op1.reg); + tmpReg3 = ALLOC_GPR(); + emitpcode(PC_RLWINM, tmpReg3, tmpReg, 31, 1, 31); + tmpReg4 = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg4, tmpReg3, tmpReg2); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, finalReg, tmpReg4, 1, 31, 31); + } + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + if (IS_INT_CONST_ZERO(right)) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_LI, tmpReg, 1); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_CNTLZW, tmpReg2, op1.reg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWNM, finalReg, tmpReg, tmpReg2, 31, 31); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_SRAWI, tmpReg2, op2.reg, 31); + tmpReg = ALLOC_GPR(); + emitpcode(PC_RLWINM, tmpReg, op1.reg, 1, 31, 31); + tmpReg3 = ALLOC_GPR(); + emitpcode(PC_SUBFC, tmpReg3, op1.reg, op2.reg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_ADDE, finalReg, tmpReg2, tmpReg); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + + case EGREATER: + if (!is_unsigned(left->rtype) && IS_INT_CONST_ZERO(right)) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_NEG, tmpReg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_ANDC, tmpReg2, tmpReg, op1.reg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, finalReg, tmpReg2, 1, 31, 31); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + opTmp = op2; + op2 = op1; + op1 = opTmp; + + case ELESS: + if (is_unsigned(left->rtype)) { + if (left->rtype->size <= 2) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg, op2.reg, op1.reg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, finalReg, tmpReg, 1, 31, 31); + output->optype = OpndType_GPR; + output->reg = finalReg; + } else { + if (copts.optimizesize) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBFC, tmpReg, op2.reg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_SUBFE, tmpReg2, tmpReg, tmpReg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_NEG, finalReg, tmpReg2); + } else { + tmpReg = ALLOC_GPR(); + emitpcode(PC_XOR, tmpReg, op2.reg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_CNTLZW, tmpReg2, tmpReg); + tmpReg3 = ALLOC_GPR(); + emitpcode(PC_SLW, tmpReg3, op2.reg, tmpReg2); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, finalReg, tmpReg3, 1, 31, 31); + } + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + return; + } + + if (IS_INT_CONST_ZERO(right)) { + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, finalReg, op1.reg, 1, 31, 31); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + tmpReg = ALLOC_GPR(); + emitpcode(PC_XOR, tmpReg, op2.reg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_SRAWI, tmpReg2, tmpReg, 1); + tmpReg3 = ALLOC_GPR(); + emitpcode(PC_AND, tmpReg3, tmpReg, op2.reg); + tmpReg4 = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg4, tmpReg3, tmpReg2); + + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, finalReg, tmpReg4, 1, 31, 31); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + + default: + CError_FATAL(5777); + } + } + + gen_condition(cond, &condOp); + emitpcode(PC_MFCR, tmpReg = used_virtual_registers[RegClass_GPR]++); + a = 0; + b = condOp.reg * 4; + switch (condOp.regOffset) { + case ENOTEQU: + a = 1; + case EEQU: + b += 2; + break; + case EGREATEREQU: + a = 1; + break; + case ELESSEQU: + a = 1; + case EGREATER: + b += 1; + break; + } + + finalReg = outputReg ? outputReg : ALLOC_GPR(); + if (a) { + emitpcode(PC_RLWINM, tmpReg, tmpReg, b + 1, 31, 31); + emitpcode(PC_XORI, finalReg, tmpReg, 1); + } else { + emitpcode(PC_RLWINM, finalReg, tmpReg, b + 1, 31, 31); + } + output->optype = OpndType_GPR; + output->reg = finalReg; +} + +void gen_negated_condition_gpr(ENode *cond, Operand *output, short outputReg) { + ENode *left; + ENode *right; + Operand op1; + Operand op2; + Operand opTmp; + int finalReg; + int tmpReg; + int tmpReg2; + int tmpReg3; + + left = cond->data.diadic.left; + right = cond->data.diadic.right; + CError_ASSERT(5843, TYPE_FITS_IN_REGISTER(left->rtype) && TYPE_FITS_IN_REGISTER(right->rtype)); + + memclrw(&op1, sizeof(Operand)); + memclrw(&op2, sizeof(Operand)); + memclrw(&opTmp, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE(right, &op2); + if (!IS_INT_CONST_ZERO(right)) { + if (right->rtype->size < 4) + extend32(&op2, right->rtype, 0); + ENSURE_GPR(&op2, right->rtype, 0); + } + + GEN_NODE(left, &op1); + if (left->rtype->size < 4) + extend32(&op1, left->rtype, 0); + ENSURE_GPR(&op1, left->rtype, 0); + } else { + GEN_NODE(left, &op1); + ENSURE_GPR(&op1, left->rtype, 0); + if (left->rtype->size < 4) + extend32(&op1, left->rtype, 0); + + GEN_NODE(right, &op2); + if (!IS_INT_CONST_ZERO(right)) { + if (right->rtype->size < 4) + extend32(&op2, right->rtype, 0); + ENSURE_GPR(&op2, right->rtype, 0); + } + } + + switch (cond->type) { + case EEQU: + if ( + copts.peephole && + IS_INT_CONST(right) && + pclastblock->pcodeCount > 0 && + pclastblock->lastPCode->op == PC_RLWINM && + pclastblock->lastPCode->args[0].data.reg.reg == op1.reg + ) + { + PCode *pc = pclastblock->lastPCode; + SInt32 a = pc->args[2].data.imm.value; + SInt32 b = pc->args[3].data.imm.value; + SInt32 value = right->data.intval.lo; + if (b == pc->args[4].data.imm.value) { + finalReg = outputReg ? outputReg : ALLOC_GPR(); + + if (value != (value & 1)) { + emitpcode(PC_LI, finalReg, 0); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + if (value == 0) { + tmpReg = ALLOC_GPR(); + emitpcode( + PC_RLWINM, tmpReg, + pc->args[1].data.reg.reg, + (a + b + 1) & 31, 31, 31); + emitpcode(PC_ADDI, finalReg, tmpReg, 0, -1); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + if (value == 1) { + tmpReg = ALLOC_GPR(); + emitpcode( + PC_RLWINM, + tmpReg, + pc->args[1].data.reg.reg, + (a + b + 1) & 31, + 31, + 31); + emitpcode(PC_NEG, finalReg, tmpReg); + output->optype = OpndType_GPR; + output->reg = tmpReg; // bug??? + return; + } + + CError_FATAL(5923); + } + } + + if (IS_INT_CONST_ZERO(right)) { + if (copts.optimizesize) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_ADDIC, tmpReg, op1.reg, -1); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBFE, finalReg, tmpReg, tmpReg); + } else { + tmpReg = ALLOC_GPR(); + emitpcode(PC_CNTLZW, tmpReg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_RLWINM, tmpReg2, tmpReg, 27, 31, 31); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_NEG, finalReg, tmpReg2); + } + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + if (copts.optimizesize) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg, op2.reg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_ADDIC, tmpReg2, tmpReg, -1); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBFE, finalReg, tmpReg2, tmpReg2); + } else { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg, op2.reg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg2, op1.reg, op2.reg); + tmpReg3 = ALLOC_GPR(); + emitpcode(PC_NOR, tmpReg3, tmpReg, tmpReg2); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SRAWI, finalReg, tmpReg3, 31); + } + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + + case ENOTEQU: + if ( + copts.peephole && + IS_INT_CONST(right) && + pclastblock->pcodeCount > 0 && + pclastblock->lastPCode->op == PC_RLWINM && + pclastblock->lastPCode->args[0].data.reg.reg == op1.reg + ) + { + PCode *pc = pclastblock->lastPCode; + SInt32 a = pc->args[2].data.imm.value; + SInt32 b = pc->args[3].data.imm.value; + SInt32 value = right->data.intval.lo; + if (b == pc->args[4].data.imm.value) { + finalReg = outputReg ? outputReg : ALLOC_GPR(); + + if (value != (value & 1)) { + emitpcode(PC_LI, finalReg, -1); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + if (value == 0) { + tmpReg = ALLOC_GPR(); + emitpcode( + PC_RLWINM, tmpReg, + pc->args[1].data.reg.reg, + (a + b + 1) & 31, 31, 31); + emitpcode(PC_NEG, finalReg, tmpReg); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + if (value == 1) { + tmpReg = ALLOC_GPR(); + emitpcode( + PC_RLWINM, tmpReg, + pc->args[1].data.reg.reg, + (a + b + 1) & 31, 31, 31); + emitpcode(PC_ADDI, finalReg, tmpReg, 0, -1); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + CError_FATAL(6031); + } + } + + if (IS_INT_CONST_ZERO(right)) { + if (copts.optimizesize) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBFIC, tmpReg, op1.reg, 0); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBFE, finalReg, tmpReg, tmpReg); + } else { + tmpReg = ALLOC_GPR(); + emitpcode(PC_NEG, tmpReg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_OR, tmpReg2, tmpReg, op1.reg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SRAWI, finalReg, tmpReg2, 31); + } + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + if (copts.optimizesize) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg, op2.reg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_SUBFIC, tmpReg2, tmpReg, 0); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBFE, finalReg, tmpReg2, tmpReg2); + } else { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg, op2.reg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg2, op1.reg, op2.reg); + tmpReg3 = ALLOC_GPR(); + emitpcode(PC_OR, tmpReg3, tmpReg, tmpReg2); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SRAWI, finalReg, tmpReg3, 31); + } + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + + case EGREATEREQU: + if (!is_unsigned(left->rtype) && IS_INT_CONST_ZERO(right)) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_RLWINM, tmpReg, op1.reg, 1, 31, 31); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_ADDI, finalReg, tmpReg, 0, -1); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + opTmp = op2; + op2 = op1; + op1 = opTmp; + + case ELESSEQU: + if (is_unsigned(left->rtype)) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBFC, tmpReg, op1.reg, op2.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_ADDZE, tmpReg2, op1.reg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBF, finalReg, tmpReg2, op1.reg); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + if (IS_INT_CONST_ZERO(right)) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_NEG, tmpReg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_ORC, tmpReg2, op1.reg, tmpReg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SRAWI, finalReg, tmpReg2, 31); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + tmpReg = ALLOC_GPR(); + emitpcode(PC_XORIS, tmpReg, op1.reg, 0x8000); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_SUBF, tmpReg2, op1.reg, op2.reg); + tmpReg3 = ALLOC_GPR(); + emitpcode(PC_ADDC, tmpReg3, tmpReg2, tmpReg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBFE, finalReg, tmpReg3, tmpReg3); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + + case EGREATER: + if (!is_unsigned(left->rtype) && IS_INT_CONST_ZERO(right)) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_NEG, tmpReg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_ANDC, tmpReg2, tmpReg, op1.reg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SRAWI, finalReg, tmpReg2, 31); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + opTmp = op2; + op2 = op1; + op1 = opTmp; + + case ELESS: + if (is_unsigned(left->rtype)) { + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBFC, tmpReg, op2.reg, op1.reg); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBFE, finalReg, tmpReg, tmpReg); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + if (IS_INT_CONST_ZERO(right)) { + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SRAWI, finalReg, op1.reg, 31); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + } + + tmpReg = ALLOC_GPR(); + emitpcode(PC_SUBFC, tmpReg, op2.reg, op1.reg); + tmpReg2 = ALLOC_GPR(); + emitpcode(PC_RLWINM, tmpReg2, op2.reg, 1, 31, 31); + tmpReg3 = ALLOC_GPR(); + emitpcode(PC_RLWINM, tmpReg3, op1.reg, 1, 31, 31); + finalReg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_SUBFE, finalReg, tmpReg3, tmpReg2); + output->optype = OpndType_GPR; + output->reg = finalReg; + return; + + default: + CError_FATAL(6240); + } +} + +void compare_floating(short nt, ENode *left, ENode *right, Operand *output) { + Operand opleft; + Operand opright; + + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE_TO_FPR(right, &opright, right->rtype, 0); + GEN_NODE_TO_FPR(left, &opleft, left->rtype, 0); + } else { + GEN_NODE_TO_FPR(left, &opleft, left->rtype, 0); + GEN_NODE_TO_FPR(right, &opright, right->rtype, 0); + } + + emitpcode((nt == EEQU || nt == ENOTEQU) ? PC_FCMPU : PC_FCMPO, 0, opleft.reg, opright.reg); + if (nt == ELESSEQU) { + emitpcode(PC_CROR, 0, 2, 0, 0, 0, 2); + nt = EEQU; + } else if (nt == EGREATEREQU) { + emitpcode(PC_CROR, 0, 2, 0, 1, 0, 2); + nt = EEQU; + } + + output->optype = OpndType_CRField; + output->reg = 0; + output->regOffset = nt; +} + +void compare_integer(short nt, ENode *left, ENode *right, Operand *output) { + Operand opleft; + Operand opright; + + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE(right, &opright); + if (right->rtype->size < 4) + extend32(&opright, right->rtype, 0); + ENSURE_GPR(&opright, right->rtype, 0); + + GEN_NODE(left, &opleft); + if (left->rtype->size < 4) + extend32(&opleft, left->rtype, 0); + ENSURE_GPR(&opleft, left->rtype, 0); + } else { + GEN_NODE(left, &opleft); + ENSURE_GPR(&opleft, left->rtype, 0); + if (left->rtype->size < 4) + extend32(&opleft, left->rtype, 0); + + GEN_NODE(right, &opright); + if (right->rtype->size < 4) + extend32(&opright, right->rtype, 0); + ENSURE_GPR(&opright, right->rtype, 0); + } + + emitpcode(is_unsigned(left->rtype) ? PC_CMPL : PC_CMP, 0, opleft.reg, opright.reg); + + output->optype = OpndType_CRField; + output->reg = 0; + output->regOffset = nt; +} + +void compare_immediate(short nt, ENode *left, SInt32 value, Operand *output) { + int postIncFlag; + short postIncReg; + Operand op; + SInt32 postIncValue; + + memclrw(&op, sizeof(Operand)); + + postIncFlag = ispostincrementopportunity(left, &op, &postIncValue); + if (!postIncFlag) { + GEN_NODE(left, &op); + if (op.optype != OpndType_CRField) { + if (left->rtype->size < 4) + extend32(&op, left->rtype, 0); + else + ENSURE_GPR(&op, left->rtype, 0); + } + } else { + postIncReg = op.reg; + if (left->rtype->size < 4) + extend32(&op, left->rtype, 0); + ENSURE_GPR(&op, left->rtype, 0); + } + + if (op.optype == OpndType_CRField) { + if ( + (nt == EEQU && value == 1) || + (nt == ENOTEQU && value == 0) || + (nt == EGREATER && value == 0) || + (nt == EGREATEREQU && value == 1) + ) + { + *output = op; + return; + } + + if ( + (nt == EEQU && value == 0) || + (nt == ENOTEQU && value == 1) || + (nt == ELESS && value == 1) || + (nt == ELESSEQU && value == 0) + ) + { + *output = op; + switch (op.regOffset) { + case EEQU: + output->regOffset = ENOTEQU; + return; + case ENOTEQU: + output->regOffset = EEQU; + return; + case ELESS: + output->regOffset = EGREATEREQU; + return; + case EGREATER: + output->regOffset = ELESSEQU; + return; + case ELESSEQU: + output->regOffset = EGREATER; + return; + case EGREATEREQU: + output->regOffset = ELESS; + return; + } + } + + ENSURE_GPR(&op, left->rtype, 0); + } + + if ( + copts.peephole && + value == 0 && + pclastblock->pcodeCount > 0 && + pclastblock->lastPCode->op != PC_RLWINM && + (PCODE_FLAG_SET_F(pclastblock->lastPCode) & (fIsMove | fSideEffects | fCanSetRecordBit | fOpTypeGPR)) == (fCanSetRecordBit | fOpTypeGPR) && + pclastblock->lastPCode->args[0].data.reg.reg == op.reg && + (!is_unsigned(left->rtype) || nt == EEQU || nt == ENOTEQU) + ) + { + pcsetrecordbit(pclastblock->lastPCode); + } else { + emitpcode(is_unsigned(left->rtype) ? PC_CMPLI : PC_CMPI, 0, op.reg, value); + } + + if (postIncFlag) + add_register_immediate(postIncReg, postIncReg, postIncValue); + + output->optype = OpndType_CRField; + output->reg = 0; + output->regOffset = nt; +} + +void compare_immediate_long(short nt, ENode *left, SInt32 value, Operand *output) { + int postIncFlag; + short postIncReg; + int outputReg; + Operand op; + SInt32 postIncValue; + + memclrw(&op, sizeof(Operand)); + + postIncFlag = ispostincrementopportunity(left, &op, &postIncValue); + if (!postIncFlag) { + GEN_NODE(left, &op); + } else { + postIncReg = op.reg; + } + + if (left->rtype->size < 4) + extend32(&op, left->rtype, 0); + ENSURE_GPR(&op, left->rtype, 0); + + outputReg = ALLOC_GPR(); + emitpcode(PC_ADDIS, outputReg, op.reg, 0, (SInt16) (~(value >> 16) + 1)); + emitpcode(PC_CMPLI, 0, outputReg, value & 0xFFFF); + + if (postIncFlag) + add_register_immediate(postIncReg, postIncReg, postIncValue); + + output->optype = OpndType_CRField; + output->reg = 0; + output->regOffset = nt; +} + +static int ismask(SInt32 value, short *first, short *last) { + int start, end, bit; + start = end = -1; + for (bit = 31; bit >= 0; bit--) { + if (value & 1) { + if (start != -1) + return 0; + if (end == -1) + end = bit; + } else { + if (end != -1 && start == -1) + start = bit + 1; + } + value >>= 1; + } + + if (end == -1) + return 0; + if (start == -1) + start = 0; + *first = start; + *last = end; + return 1; +} + +int ismaskconstant(SInt32 value, short *first, short *last) { + short my_first; + short my_last; + if (ismask(value, first, last)) + return 1; + + if (value && ismask(~value, &my_first, &my_last)) { + *first = my_last + 1; + *last = my_first - 1; + return 1; + } else { + return 0; + } +} + +static void shift_and_mask(ENode *expr, short a, short b, short c, short outputReg, Operand *output) { + Operand op; + int reg; + + memclrw(&op, sizeof(Operand)); + GEN_NODE(expr, &op); + ENSURE_GPR(&op, expr->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, reg, op.reg, a, b, c); + + output->optype = OpndType_GPR; + output->reg = reg; +} + +int ispostincrementopportunity(ENode *expr, Operand *op, SInt32 *value) { + Type *type; + int reg; + + type = expr->rtype; + if (!ENODE_IS2(expr, EPOSTINC, EPOSTDEC)) + return 0; + if (!ENODE_IS(expr->data.monadic, EINDIRECT)) + return 0; + if (!ENODE_IS(expr->data.monadic->data.monadic, EOBJREF)) + return 0; + + reg = OBJECT_REG(expr->data.monadic->data.monadic->data.objref); + if (!reg) + return 0; + + if (IS_TYPE_POINTER(type)) { + if (ENODE_IS(expr, EPOSTINC)) + *value = TPTR_TARGET(type)->size; + else + *value = -TPTR_TARGET(type)->size; + } else { + if (ENODE_IS(expr, EPOSTINC)) + *value = 1; + else + *value = -1; + } + + op->optype = OpndType_GPR; + op->reg = reg; + return 1; +} + +void add_register_immediate(short regA, short regB, SInt32 value) { + if (!FITS_IN_SHORT(value)) { + emitpcode(PC_ADDIS, regA, regB, 0, HIGH_PART(value)); + if (LOW_PART(value)) + emitpcode(PC_ADDI, regA, regA, 0, LOW_PART(value)); + } else { + emitpcode(PC_ADDI, regA, regB, 0, value); + } +} + +static int ispowerof2(SInt32 val) { + int bit = getbit(val); + return (bit > 0 && bit < 31) ? bit : 0; +} + +void I8_gen_ADD(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + int is_uns; + ENode *left; + ENode *right; + short reg; + short regHi; + short tmpreg1; + short tmpreg2; + SInt32 skipleft; + SInt32 skipright; + Operand opleft; + Operand opright; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE(right, &opright); + if (TYPE_IS_8BYTES(right->rtype)) + coerce_to_register_pair(&opright, right->rtype, 0, 0); + else + ENSURE_GPR(&opright, right->rtype, 0); + + GEN_NODE(left, &opleft); + if (TYPE_IS_8BYTES(left->rtype)) + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + else + ENSURE_GPR(&opleft, left->rtype, 0); + } else { + GEN_NODE(left, &opleft); + if (TYPE_IS_8BYTES(left->rtype)) + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + else + ENSURE_GPR(&opleft, left->rtype, 0); + + GEN_NODE(right, &opright); + if (TYPE_IS_8BYTES(right->rtype)) + coerce_to_register_pair(&opright, right->rtype, 0, 0); + else + ENSURE_GPR(&opright, right->rtype, 0); + } + + reg = ALLOC_GPR(); + regHi = ALLOC_GPR(); + is_uns = is_unsigned(expr->rtype) != 0; + skipleft = GetSizeSkip(left); + skipright = GetSizeSkip(right); + + if (skipleft < skipright) { + Operand tmpop; + SInt32 tmp; + + expr->data.diadic.left = right; + expr->data.diadic.right = left; + left = expr->data.diadic.left; + right = expr->data.diadic.right; + + tmpop = opright; + opright = opleft; + opleft = tmpop; + + tmp = skipleft; + skipleft = skipright; + skipright = tmp; + } + + switch (skipleft + skipright) { + case 1 + 1: + case 1 + 2: + case 2 + 2: + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT2(left->data.intval.lo)) { + emitpcode(PC_ADDIC, reg, opright.reg, LOW_PART(left->data.intval.lo)); + } else if (ENODE_IS(right, EINTCONST) && FITS_IN_SHORT2(right->data.intval.lo)) { + emitpcode(PC_ADDIC, reg, opleft.reg, LOW_PART(right->data.intval.lo)); + } else { + emitpcode(PC_ADDC, reg, opleft.reg, opright.reg); + } + if (is_uns) + emitpcode(PC_LI, regHi, 0); + else + emitpcode(PC_SRAWI, regHi, reg, 31); + break; + case 1 + 4: + case 2 + 4: + case 4 + 4: + if (!is_uns) { + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + emitpcode(PC_SRAWI, tmpreg2, opleft.reg, 31); + emitpcode(PC_SRAWI, tmpreg1, opright.reg, 31); + } + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT2(left->data.intval.lo)) { + emitpcode(PC_ADDIC, reg, opright.reg, LOW_PART(left->data.intval.lo)); + } else if (ENODE_IS(right, EINTCONST) && FITS_IN_SHORT2(right->data.intval.lo)) { + emitpcode(PC_ADDIC, reg, opleft.reg, LOW_PART(right->data.intval.lo)); + } else { + emitpcode(PC_ADDC, reg, opleft.reg, opright.reg); + } + if (is_uns) { + tmpreg1 = ALLOC_GPR(); + emitpcode(PC_LI, tmpreg1, 0); + emitpcode(PC_ADDZE, regHi, tmpreg1); + } else { + emitpcode(PC_ADDE, regHi, tmpreg1, tmpreg2); + } + break; + case 1 + 8: + case 2 + 8: + case 4 + 8: + CError_ASSERT(6933, skipleft == 8); + if (!is_uns) { + tmpreg2 = ALLOC_GPR(); + emitpcode(PC_SRAWI, tmpreg2, opright.reg, 31); + } + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT2(left->data.intval.lo) && left->data.intval.hi == 0) { + emitpcode(PC_ADDIC, reg, opright.reg, LOW_PART(left->data.intval.lo)); + } else if (ENODE_IS(right, EINTCONST) && FITS_IN_SHORT2(right->data.intval.lo) && right->data.intval.hi == 0) { + emitpcode(PC_ADDIC, reg, opleft.reg, LOW_PART(right->data.intval.lo)); + } else { + emitpcode(PC_ADDC, reg, opleft.reg, opright.reg); + } + if (is_uns) + emitpcode(PC_ADDZE, regHi, opleft.regHi); + else + emitpcode(PC_ADDE, regHi, opleft.regHi, tmpreg2); + break; + case 8 + 8: + emitpcode(PC_ADDC, reg, opleft.reg, opright.reg); + emitpcode(PC_ADDE, regHi, opleft.regHi, opright.regHi); + break; + default: + CError_FATAL(6979); + } + + output->optype = OpndType_GPRPair; + output->reg = reg; + output->regHi = regHi; +} + +void I8_gen_INTCONST(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + short reg; + short regHi; + + reg = outputReg ? outputReg : ALLOC_GPR(); + regHi = outputRegHi ? outputRegHi : ALLOC_GPR(); + + load_immediate(reg, expr->data.intval.lo); + load_immediate(regHi, expr->data.intval.hi); + + output->optype = OpndType_GPRPair; + output->reg = reg; + output->regHi = regHi; +} + +void I8_gen_SUB(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + int is_uns; + ENode *left; + ENode *right; + short reg; + short regHi; + short tmpreg1; + short tmpreg2; + short tmpreg3; + SInt32 skipleft; + SInt32 skipright; + Operand opleft; + Operand opright; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE(right, &opright); + if (TYPE_IS_8BYTES(right->rtype)) + coerce_to_register_pair(&opright, right->rtype, 0, 0); + else + ENSURE_GPR(&opright, right->rtype, 0); + + GEN_NODE(left, &opleft); + if (TYPE_IS_8BYTES(left->rtype)) + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + else + ENSURE_GPR(&opleft, left->rtype, 0); + } else { + GEN_NODE(left, &opleft); + if (TYPE_IS_8BYTES(left->rtype)) + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + else + ENSURE_GPR(&opleft, left->rtype, 0); + + GEN_NODE(right, &opright); + if (TYPE_IS_8BYTES(right->rtype)) + coerce_to_register_pair(&opright, right->rtype, 0, 0); + else + ENSURE_GPR(&opright, right->rtype, 0); + } + + reg = ALLOC_GPR(); + regHi = ALLOC_GPR(); + is_uns = is_unsigned(expr->rtype) != 0; + skipleft = GetSizeSkip(left); + skipright = GetSizeSkip(right); + + switch (skipleft + skipright) { + case 1 + 1: + case 1 + 2: + case 2 + 2: + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT2(left->data.intval.lo)) { + emitpcode(PC_SUBFIC, reg, opright.reg, LOW_PART(left->data.intval.lo)); + } else { + emitpcode(PC_SUBFC, reg, opright.reg, opleft.reg); + } + if (is_uns) + emitpcode(PC_LI, regHi, 0); + else + emitpcode(PC_SRAWI, regHi, reg, 31); + break; + case 1 + 4: + case 2 + 4: + case 4 + 4: + if (!is_uns) { + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + emitpcode(PC_SRAWI, tmpreg2, opleft.reg, 31); + emitpcode(PC_SRAWI, tmpreg1, opright.reg, 31); + } + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT2(left->data.intval.lo)) { + emitpcode(PC_SUBFIC, reg, opright.reg, LOW_PART(left->data.intval.lo)); + } else { + emitpcode(PC_SUBFC, reg, opright.reg, opleft.reg); + } + if (is_uns) { + tmpreg1 = ALLOC_GPR(); + emitpcode(PC_LI, tmpreg1, 0); + emitpcode(PC_SUBFZE, regHi, tmpreg1); + } else { + emitpcode(PC_SUBFE, regHi, tmpreg1, tmpreg2); + } + break; + case 1 + 8: + case 2 + 8: + case 4 + 8: + if (skipleft < skipright) { + emitpcode(PC_SUBFC, reg, opright.reg, opleft.reg); + emitpcode(PC_SUBFE, regHi, opright.regHi, opleft.regHi); + } else { + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT2(left->data.intval.lo) && left->data.intval.hi == 0) { + emitpcode(PC_SUBFIC, reg, opright.reg, LOW_PART(left->data.intval.lo)); + } else { + emitpcode(PC_SUBFC, reg, opright.reg, opleft.reg); + } + tmpreg1 = ALLOC_GPR(); + emitpcode(PC_LI, tmpreg1, 0); + emitpcode(PC_SUBFE, regHi, tmpreg1, opleft.regHi); + } + break; + case 8 + 8: + emitpcode(PC_SUBFC, reg, opright.reg, opleft.reg); + emitpcode(PC_SUBFE, regHi, opright.regHi, opleft.regHi); + break; + default: + CError_FATAL(7211); + } + + output->optype = OpndType_GPRPair; + output->reg = reg; + output->regHi = regHi; +} + +void I8_gen_XOR(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *left; + ENode *right; + short reg; + short regHi; + Operand opleft; + Operand opright; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE(right, &opright); + coerce_to_register_pair(&opright, right->rtype, 0, 0); + + GEN_NODE(left, &opleft); + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + } else { + GEN_NODE(left, &opleft); + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + + GEN_NODE(right, &opright); + coerce_to_register_pair(&opright, right->rtype, 0, 0); + } + + CError_ASSERT(7254, opleft.optype == OpndType_GPRPair && opright.optype == OpndType_GPRPair); + + reg = outputReg ? outputReg : ALLOC_GPR(); + regHi = outputRegHi ? outputRegHi : ALLOC_GPR(); + + emitpcode(PC_XOR, reg, opleft.reg, opright.reg); + emitpcode(PC_XOR, regHi, opleft.regHi, opright.regHi); + + output->optype = OpndType_GPRPair; + output->reg = reg; + output->regHi = regHi; +} + +void I8_gen_OR(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *left; + ENode *right; + short reg; + short regHi; + Operand opleft; + Operand opright; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE(right, &opright); + coerce_to_register_pair(&opright, right->rtype, 0, 0); + + GEN_NODE(left, &opleft); + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + } else { + GEN_NODE(left, &opleft); + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + + GEN_NODE(right, &opright); + coerce_to_register_pair(&opright, right->rtype, 0, 0); + } + + CError_ASSERT(7304, opleft.optype == OpndType_GPRPair && opright.optype == OpndType_GPRPair); + + reg = outputReg ? outputReg : ALLOC_GPR(); + regHi = outputRegHi ? outputRegHi : ALLOC_GPR(); + + emitpcode(PC_OR, reg, opleft.reg, opright.reg); + emitpcode(PC_OR, regHi, opleft.regHi, opright.regHi); + + output->optype = OpndType_GPRPair; + output->reg = reg; + output->regHi = regHi; +} + +void I8_gen_AND(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *left; + ENode *right; + short reg; + short regHi; + Operand opleft; + Operand opright; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE(right, &opright); + coerce_to_register_pair(&opright, right->rtype, 0, 0); + + GEN_NODE(left, &opleft); + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + } else { + GEN_NODE(left, &opleft); + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + + GEN_NODE(right, &opright); + coerce_to_register_pair(&opright, right->rtype, 0, 0); + } + + CError_ASSERT(7354, opleft.optype == OpndType_GPRPair && opright.optype == OpndType_GPRPair); + + reg = outputReg ? outputReg : ALLOC_GPR(); + regHi = outputRegHi ? outputRegHi : ALLOC_GPR(); + + emitpcode(PC_AND, reg, opleft.reg, opright.reg); + emitpcode(PC_AND, regHi, opleft.regHi, opright.regHi); + + output->optype = OpndType_GPRPair; + output->reg = reg; + output->regHi = regHi; +} + +int I8_getbit(UInt64 val) { + switch (val) { + case 0: return -1; + case 1ULL << 0: return 0; + case 1ULL << 1: return 1; + case 1ULL << 2: return 2; + case 1ULL << 3: return 3; + case 1ULL << 4: return 4; + case 1ULL << 5: return 5; + case 1ULL << 6: return 6; + case 1ULL << 7: return 7; + case 1ULL << 8: return 8; + case 1ULL << 9: return 9; + case 1ULL << 10: return 10; + case 1ULL << 11: return 11; + case 1ULL << 12: return 12; + case 1ULL << 13: return 13; + case 1ULL << 14: return 14; + case 1ULL << 15: return 15; + case 1ULL << 16: return 16; + case 1ULL << 17: return 17; + case 1ULL << 18: return 18; + case 1ULL << 19: return 19; + case 1ULL << 20: return 20; + case 1ULL << 21: return 21; + case 1ULL << 22: return 22; + case 1ULL << 23: return 23; + case 1ULL << 24: return 24; + case 1ULL << 25: return 25; + case 1ULL << 26: return 26; + case 1ULL << 27: return 27; + case 1ULL << 28: return 28; + case 1ULL << 29: return 29; + case 1ULL << 30: return 30; + case 1ULL << 31: return 31; + case 1ULL << 32: return 32; + case 1ULL << 33: return 33; + case 1ULL << 34: return 34; + case 1ULL << 35: return 35; + case 1ULL << 36: return 36; + case 1ULL << 37: return 37; + case 1ULL << 38: return 38; + case 1ULL << 39: return 39; + case 1ULL << 40: return 40; + case 1ULL << 41: return 41; + case 1ULL << 42: return 42; + case 1ULL << 43: return 43; + case 1ULL << 44: return 44; + case 1ULL << 45: return 45; + case 1ULL << 46: return 46; + case 1ULL << 47: return 47; + case 1ULL << 48: return 48; + case 1ULL << 49: return 49; + case 1ULL << 50: return 50; + case 1ULL << 51: return 51; + case 1ULL << 52: return 52; + case 1ULL << 53: return 53; + case 1ULL << 54: return 54; + case 1ULL << 55: return 55; + case 1ULL << 56: return 56; + case 1ULL << 57: return 57; + case 1ULL << 58: return 58; + case 1ULL << 59: return 59; + case 1ULL << 60: return 60; + case 1ULL << 61: return 61; + case 1ULL << 62: return 62; + case 1ULL << 63: return 63; + default: return -2; + } +} + +int I8_log2n(UInt64 val) { + int bit = I8_getbit(val); + return (bit > 0 && bit < 63) ? bit : 0; +} + +void I8_ShiftLeftImmediate(Operand opnd, SInt32 value, int is_unsigned, SInt32 size, short reg, short regHi) { + if (opnd.reg == reg || opnd.regHi == regHi || opnd.reg == regHi || opnd.regHi == reg) + CError_FATAL(7703); + + if (value < 32) { + emitpcode(PC_RLWINM, reg, opnd.reg, value, 0, 31 - value); + if (size > 4) { + emitpcode(PC_RLWINM, regHi, opnd.regHi, value, 0, 31 - value); + emitpcode(PC_RLWIMI, regHi, opnd.reg, value, 32 - value, 31); + } else { + emitpcode(PC_RLWINM, regHi, opnd.reg, value, 32 - value, 31); + } + } else if (value <= 63) { + if (value == 32) + emitpcode(PC_MR, regHi, opnd.reg); + else + emitpcode(PC_RLWINM, regHi, opnd.reg, value - 32, 0, 63 - value); + emitpcode(PC_LI, reg, 0); + } else { + CError_FATAL(7732); + } +} + +void I8_ShiftRightImmediate(Operand opnd, SInt32 value, int is_unsigned, short reg, short regHi, int unk) { + short tmpreg1; + short tmpreg2; + short tmpreg3; + short tmpreg4; + + if (opnd.reg == reg || opnd.regHi == regHi || opnd.reg == regHi || opnd.regHi == reg) + CError_FATAL(7756); + + if (value < 32) { + emitpcode(PC_RLWINM, reg, opnd.reg, 32 - value, 0, 31); + emitpcode(PC_RLWIMI, reg, opnd.regHi, 32 - value, 0, value - 1); + if (is_unsigned) { + emitpcode(PC_RLWINM, regHi, opnd.regHi, 32 - value, value, 31); + } else if (unk) { + tmpreg1 = ALLOC_GPR(); + emitpcode(PC_MR, tmpreg1, opnd.regHi); + emitpcode(PC_RLWIMI, tmpreg1, opnd.reg, 0, 31 - (value - 1), 31); + emitpcode(PC_SRAWI, regHi, tmpreg1, value); + } else { + emitpcode(PC_SRAWI, regHi, opnd.regHi, value); + } + } else if (value == 32) { + if (is_unsigned) { + emitpcode(PC_MR, reg, opnd.regHi); + emitpcode(PC_LI, regHi, 0); + } else if (unk) { + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + tmpreg3 = ALLOC_GPR(); + emitpcode(PC_NEG, tmpreg1, opnd.reg); + emitpcode(PC_OR, tmpreg2, tmpreg1, opnd.reg); + emitpcode(PC_RLWINM, tmpreg3, tmpreg2, 1, 31, 31); + emitpcode(PC_RLWIMI, tmpreg3, opnd.regHi, 0, 0, 0); + emitpcode(PC_MR, reg, opnd.regHi); + emitpcode(PC_SRAWI, regHi, value, 31); + } else { + emitpcode(PC_MR, reg, opnd.regHi); + emitpcode(PC_SRAWI, regHi, opnd.regHi, 31); + } + } else if (value <= 63) { + if (is_unsigned) { + emitpcode(PC_RLWINM, reg, opnd.regHi, 64 - value, value - 32, 31); + emitpcode(PC_LI, regHi, 0); + } else if (unk) { + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + tmpreg3 = ALLOC_GPR(); + tmpreg4 = ALLOC_GPR(); + emitpcode(PC_NEG, tmpreg1, opnd.reg); + emitpcode(PC_OR, tmpreg2, tmpreg1, opnd.reg); + emitpcode(PC_RLWINM, tmpreg3, tmpreg2, 1, 31, 31); + emitpcode(PC_OR, tmpreg4, opnd.regHi, tmpreg3); + emitpcode(PC_SRAWI, regHi, opnd.regHi, 31); + emitpcode(PC_SRAWI, reg, tmpreg4, value - 32); + } else { + emitpcode(PC_SRAWI, reg, opnd.regHi, value - 32); + emitpcode(PC_SRAWI, regHi, opnd.regHi, 31); + } + } else { + CError_FATAL(7866); + } +} + +void I8_gen_MUL(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + int is_uns; + ENode *left; + ENode *right; + short reg; + short regHi; + short tmpreg1; + short tmpreg2; + short tmpreg3; + short tmpreg4; + SInt32 skipleft; + SInt32 skipright; + Operand opleft; + Operand opright; + SInt64 leftval; + SInt64 rightval; + int shift; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + skipleft = GetSizeSkip(left); + skipright = GetSizeSkip(right); + + if (ENODE_IS(right, EINTCONST) && ENODE_IS(left, EINTCONST)) + CError_FATAL(7900); + + if (ENODE_IS(left, EINTCONST)) + leftval = left->data.intval.lo + (((SInt64) ((skipleft < 8) ? 0 : left->data.intval.hi)) << 32); + if (ENODE_IS(right, EINTCONST)) + rightval = right->data.intval.lo + (((SInt64) ((skipright < 8) ? 0 : right->data.intval.hi)) << 32); + + if (right->hascall) { + GEN_NODE(right, &opright); + if (TYPE_IS_8BYTES(right->rtype)) + coerce_to_register_pair(&opright, right->rtype, 0, 0); + else + ENSURE_GPR(&opright, right->rtype, 0); + + if (!(ENODE_IS(left, EINTCONST) && I8_log2n(leftval) > 0)) { + GEN_NODE(left, &opleft); + if (TYPE_IS_8BYTES(left->rtype)) + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + else + ENSURE_GPR(&opleft, left->rtype, 0); + } + } else { + if (!(ENODE_IS(left, EINTCONST) && I8_log2n(leftval) > 0)) { + GEN_NODE(left, &opleft); + if (TYPE_IS_8BYTES(left->rtype)) + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + else + ENSURE_GPR(&opleft, left->rtype, 0); + } + + if (!(ENODE_IS(right, EINTCONST) && I8_log2n(rightval) > 0)) { + GEN_NODE(right, &opright); + if (TYPE_IS_8BYTES(right->rtype)) + coerce_to_register_pair(&opright, right->rtype, 0, 0); + else + ENSURE_GPR(&opright, right->rtype, 0); + } + } + + is_uns = is_unsigned(expr->rtype) != 0; + + if (skipleft < skipright) { + Operand tmpop; + SInt64 tmp64; + SInt32 tmp; + + expr->data.diadic.left = right; + expr->data.diadic.right = left; + left = expr->data.diadic.left; + right = expr->data.diadic.right; + + tmpop = opright; + opright = opleft; + opleft = tmpop; + + tmp64 = leftval; + leftval = rightval; + rightval = tmp64; + + tmp = skipleft; + skipleft = skipright; + skipright = tmp; + } + + reg = ALLOC_GPR(); + regHi = ALLOC_GPR(); + + if (ENODE_IS(left, EINTCONST) && (shift = I8_log2n(leftval)) > 0) { + I8_ShiftLeftImmediate(opright, shift, is_uns, skipright, reg, regHi); + } else if (ENODE_IS(right, EINTCONST) && (shift = I8_log2n(rightval)) > 0) { + I8_ShiftLeftImmediate(opleft, shift, is_uns, skipleft, reg, regHi); + } else { + switch (skipleft + skipright) { + case 1 + 1: + case 1 + 2: + case 2 + 2: + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT2(left->data.intval.lo)) { + emitpcode(PC_MULLI, reg, opright.reg, LOW_PART(left->data.intval.lo)); + } else if (ENODE_IS(right, EINTCONST) && FITS_IN_SHORT2(right->data.intval.lo)) { + emitpcode(PC_MULLI, reg, opleft.reg, LOW_PART(right->data.intval.lo)); + } else { + emitpcode(PC_MULLW, reg, opleft.reg, opright.reg); + } + if (is_uns) + emitpcode(PC_LI, regHi, 0); + else + emitpcode(PC_SRAWI, regHi, reg, 31); + break; + case 1 + 4: + case 2 + 4: + case 4 + 4: + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT2(left->data.intval.lo)) { + emitpcode(PC_MULLI, reg, opright.reg, LOW_PART(left->data.intval.lo)); + } else if (ENODE_IS(right, EINTCONST) && FITS_IN_SHORT2(right->data.intval.lo)) { + emitpcode(PC_MULLI, reg, opleft.reg, LOW_PART(right->data.intval.lo)); + } else { + emitpcode(PC_MULLW, reg, opleft.reg, opright.reg); + } + if (is_uns) + emitpcode(PC_MULHWU, regHi, opleft.reg, opright.reg); + else + emitpcode(PC_MULHW, regHi, opleft.reg, opright.reg); + break; + case 1 + 8: + case 2 + 8: + case 4 + 8: + CError_ASSERT(8097, skipleft == 8); + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT2(left->data.intval.lo) && left->data.intval.hi == 0) { + emitpcode(PC_MULLI, reg, opright.reg, LOW_PART(left->data.intval.lo)); + if (is_uns) + emitpcode(PC_MULHWU, regHi, opright.reg, opleft.reg); + else + emitpcode(PC_MULHW, regHi, opright.reg, opleft.reg); + } else if (ENODE_IS(right, EINTCONST) && FITS_IN_SHORT2(right->data.intval.lo) && right->data.intval.hi == 0) { + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + if (is_uns) { + emitpcode(PC_MULHWU, tmpreg2, opleft.reg, opright.reg); + emitpcode(PC_MULLI, tmpreg1, opleft.regHi, LOW_PART(right->data.intval.lo)); + emitpcode(PC_MULLI, reg, opleft.reg, LOW_PART(right->data.intval.lo)); + emitpcode(PC_ADD, regHi, tmpreg2, tmpreg1); + } else { + tmpreg3 = ALLOC_GPR(); + tmpreg4 = ALLOC_GPR(); + emitpcode(PC_SRAWI, tmpreg4, opright.reg, 31); + emitpcode(PC_MULHWU, tmpreg2, opleft.reg, opright.reg); + emitpcode(PC_MULLI, tmpreg1, opleft.regHi, LOW_PART(right->data.intval.lo)); + emitpcode(PC_MULLI, reg, opleft.reg, LOW_PART(right->data.intval.lo)); + emitpcode(PC_MULLW, tmpreg3, opleft.reg, tmpreg4); + emitpcode(PC_ADD, regHi, tmpreg2, tmpreg1); + emitpcode(PC_ADD, regHi, regHi, tmpreg3); + } + } else { + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + if (is_uns) { + emitpcode(PC_MULHWU, tmpreg2, opleft.reg, opright.reg); + emitpcode(PC_MULLW, tmpreg1, opleft.regHi, opright.reg); + emitpcode(PC_MULLW, reg, opleft.reg, opright.reg); + emitpcode(PC_ADD, regHi, tmpreg2, tmpreg1); + } else { + tmpreg3 = ALLOC_GPR(); + tmpreg4 = ALLOC_GPR(); + emitpcode(PC_SRAWI, tmpreg4, opright.reg, 31); + emitpcode(PC_MULHWU, tmpreg2, opleft.reg, opright.reg); + emitpcode(PC_MULLW, tmpreg1, opleft.regHi, opright.reg); + emitpcode(PC_MULLW, reg, opleft.reg, opright.reg); + emitpcode(PC_MULLW, tmpreg3, opleft.reg, tmpreg4); + emitpcode(PC_ADD, regHi, tmpreg2, tmpreg1); + emitpcode(PC_ADD, regHi, regHi, tmpreg3); + } + } + break; + case 8 + 8: + if (ENODE_IS(left, EINTCONST) && FITS_IN_SHORT2(left->data.intval.lo) && left->data.intval.hi == 0) { + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + emitpcode(PC_MULHWU, tmpreg1, opright.reg, opleft.reg); + emitpcode(PC_MULLW, tmpreg2, opright.reg, opleft.regHi); + emitpcode(PC_MULLI, reg, opright.reg, LOW_PART(left->data.intval.lo)); + emitpcode(PC_ADD, regHi, tmpreg1, tmpreg2); + } else if (ENODE_IS(right, EINTCONST) && FITS_IN_SHORT2(right->data.intval.lo) && right->data.intval.hi == 0) { + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + emitpcode(PC_MULHWU, tmpreg2, opleft.reg, opright.reg); + emitpcode(PC_MULLW, tmpreg1, opleft.regHi, opright.reg); + emitpcode(PC_MULLI, reg, opleft.reg, LOW_PART(right->data.intval.lo)); + emitpcode(PC_ADD, regHi, tmpreg2, tmpreg1); + } else { + tmpreg1 = ALLOC_GPR(); + tmpreg2 = ALLOC_GPR(); + tmpreg3 = ALLOC_GPR(); + tmpreg4 = ALLOC_GPR(); + emitpcode(PC_MULHWU, tmpreg2, opleft.reg, opright.reg); + emitpcode(PC_MULLW, tmpreg1, opleft.regHi, opright.reg); + emitpcode(PC_ADD, tmpreg3, tmpreg2, tmpreg1); + emitpcode(PC_MULLW, tmpreg4, opleft.reg, opright.regHi); + emitpcode(PC_MULLW, reg, opleft.reg, opright.reg); + emitpcode(PC_ADD, regHi, tmpreg3, tmpreg4); + } + break; + default: + CError_FATAL(8218); + } + } + + output->optype = OpndType_GPRPair; + output->reg = reg; + output->regHi = regHi; +} + +void I8_gen_BINNOT(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *inner; + Operand op; + int reg; + int regHi; + + inner = expr->data.monadic; + memclrw(&op, sizeof(Operand)); + GEN_NODE(inner, &op); + coerce_to_register_pair(&op, inner->rtype, 0, 0); + + reg = outputReg ? outputReg : ALLOC_GPR(); + regHi = outputRegHi ? outputRegHi : ALLOC_GPR(); + emitpcode(PC_NOR, reg, op.reg, op.reg); + emitpcode(PC_NOR, regHi, op.regHi, op.regHi); + + output->optype = OpndType_GPRPair; + output->reg = reg; + output->regHi = regHi; +} + +void I8_gen_MONMIN(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *inner; + Operand op; + int reg; + int regHi; + + inner = expr->data.monadic; + memclrw(&op, sizeof(Operand)); + GEN_NODE(inner, &op); + coerce_to_register_pair(&op, inner->rtype, 0, 0); + + reg = outputReg ? outputReg : ALLOC_GPR(); + regHi = outputRegHi ? outputRegHi : ALLOC_GPR(); + emitpcode(PC_SUBFIC, reg, op.reg, 0); + emitpcode(PC_SUBFZE, regHi, op.regHi); + + output->optype = OpndType_GPRPair; + output->reg = reg; + output->regHi = regHi; +} + +void I8_gen_ASS(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + Type *type; + ENode *left; + ENode *right; + Operand opleft; + Operand opright; + VarInfo *vi; + + type = expr->rtype; + if (ENODE_IS(expr, ECONDASS)) { + left = expr->data.cond.expr1; + if (ENODE_IS(left, EINDIRECT)) + left = left->data.monadic; + else + CError_FATAL(8328); + right = expr->data.cond.expr2; + } else { + left = expr->data.diadic.left; + right = expr->data.diadic.right; + } + + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + if (ENODE_IS(left, EOBJREF) && OBJECT_REG(left->data.objref)) { + vi = Registers_GetVarInfo(left->data.objref); + GEN_NODE_TO_REG(right, vi->reg, vi->regHi, &opright); + if (vi->rclass != RegClass_GPR) { + CError_FATAL(8348); + } else { + coerce_to_register_pair(&opright, type, vi->reg, vi->regHi); + *output = opright; + } + return; + } + + if (TYPE_FITS_IN_REGISTER(type)) { + GEN_NODE(right, &opright); + coerce_to_register_pair(&opright, right->rtype, 0, 0); + + if (ENODE_IS(left, EBITFIELD)) { + CError_FATAL(8376); + } else { + GEN_NODE(left, &opleft); + indirect(&opleft, left); + store_pair(opright.reg, opright.regHi, &opleft, type); + } + + output->optype = OpndType_GPRPair; + output->reg = opright.reg; + output->regHi = opright.regHi; + } +} + +void I8_gen_POSTINCDEC(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *inner; + Type *type; + int flag; + int reg; + int regHi; + Operand op1; + Operand op2; + Operand op3; + + inner = expr->data.monadic->data.monadic; + type = expr->rtype; + flag = 0; + memclrw(&op1, sizeof(Operand)); + memclrw(&op2, sizeof(Operand)); + + if (ENODE_IS(inner, EOBJREF) && (reg = OBJECT_REG(inner->data.objref))) { + regHi = Registers_GetVarInfo(inner->data.objref)->regHi; + output->optype = OpndType_GPRPair; + output->reg = (outputReg && outputReg != reg && outputReg != regHi) ? outputReg : ALLOC_GPR(); + output->regHi = (outputRegHi && outputRegHi != regHi && outputRegHi != reg) ? outputRegHi : ALLOC_GPR(); + emitpcode(PC_MR, output->reg, reg); + emitpcode(PC_MR, output->regHi, regHi); + if (ENODE_IS(expr, EPOSTINC)) { + emitpcode(PC_ADDIC, reg, reg, 1); + emitpcode(PC_ADDME, regHi, regHi); + } else { + emitpcode(PC_ADDIC, reg, reg, -1); + emitpcode(PC_ADDZE, regHi, regHi); + } + return; + } + + CError_ASSERT(8446, !ENODE_IS(inner, EBITFIELD)); + + GEN_NODE(inner, &op1); + indirect(&op1, inner); + op2 = op1; + coerce_to_register_pair(&op2, type, 0, 0); + + output->optype = OpndType_GPRPair; + output->reg = ALLOC_GPR(); + output->regHi = ALLOC_GPR(); + + emitpcode(PC_MR, output->reg, op2.reg); + emitpcode(PC_MR, output->regHi, op2.regHi); + + reg = ALLOC_GPR(); + regHi = ALLOC_GPR(); + + if (ENODE_IS(expr, EPOSTINC)) { + emitpcode(PC_ADDIC, reg, op2.reg, 1); + emitpcode(PC_ADDZE, regHi, op2.regHi); + } else { + emitpcode(PC_ADDIC, reg, op2.reg, -1); + emitpcode(PC_ADDME, regHi, op2.regHi); + } + store_pair(reg, regHi, &op1, type); +} + +void I8_gen_INDIRECT(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + ENode *inner; + VarInfo *vi; + + inner = expr->data.monadic; + if (ENODE_IS(inner, EOBJREF) && OBJECT_REG(inner->data.objref)) { + vi = Registers_GetVarInfo(inner->data.objref); + switch (vi->rclass) { + case RegClass_GPR: + output->optype = OpndType_GPRPair; + break; + case RegClass_FPR: + output->optype = OpndType_FPR; + break; + default: + CError_FATAL(8511); + } + + output->reg = vi->reg; + output->regHi = vi->regHi; + output->object = NULL; + return; + } + + if (ENODE_IS(inner, EBITFIELD)) { + CError_FATAL(8529); + return; + } + + GEN_NODE(inner, output); + indirect(output, inner); +} + +void I8_gen_condition(ENode *cond, Operand *output, int write_to_gpr) { + ENode *left; + ENode *right; + Operand opleft; + Operand opright; + Operand tmpop; + int reg1; + int reg2; + int reg3; + int reg4; + int reg5; + + left = cond->data.diadic.left; + right = cond->data.diadic.right; + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + memclrw(&tmpop, sizeof(Operand)); + + if (right->hascall) { + GEN_NODE(right, &opright); + if (right->rtype->size < 4) + extend32(&opright, right->rtype, 0); + ENSURE_GPR(&opright, right->rtype, 0); + + if (right->rtype->size < 8) { + short tmp = ALLOC_GPR(); + if (is_unsigned(right->rtype)) + load_immediate(tmp, 0); + else + emitpcode(PC_SRAWI, tmp, opright.reg, 31); + opright.optype = OpndType_GPRPair; + opright.regHi = tmp; + } + + GEN_NODE(left, &opleft); + if (left->rtype->size < 4) + extend32(&opleft, left->rtype, 0); + ENSURE_GPR(&opleft, left->rtype, 0); + + if (left->rtype->size < 8) { + short tmp = ALLOC_GPR(); + // this looks like a bug??? surely this should check left->rtype + if (is_unsigned(right->rtype)) + load_immediate(tmp, 0); + else + emitpcode(PC_SRAWI, tmp, opleft.reg, 31); + opleft.optype = OpndType_GPRPair; + opleft.regHi = tmp; + } + } else { + GEN_NODE(left, &opleft); + ENSURE_GPR(&opleft, left->rtype, 0); + if (left->rtype->size < 4) + extend32(&opleft, left->rtype, 0); + + if (left->rtype->size < 8) { + short tmp = ALLOC_GPR(); + if (is_unsigned(right->rtype)) + load_immediate(tmp, 0); + else + emitpcode(PC_SRAWI, tmp, opleft.reg, 31); + opleft.optype = OpndType_GPRPair; + opleft.regHi = tmp; + } + + GEN_NODE(right, &opright); + if (right->rtype->size < 4) + extend32(&opright, right->rtype, 0); + ENSURE_GPR(&opright, right->rtype, 0); + + if (right->rtype->size < 8) { + short tmp = ALLOC_GPR(); + if (is_unsigned(right->rtype)) + load_immediate(tmp, 0); + else + emitpcode(PC_SRAWI, tmp, opright.reg, 31); + opright.optype = OpndType_GPRPair; + opright.regHi = tmp; + } + } + + CError_ASSERT(8704, opleft.optype == OpndType_GPRPair && opright.optype == OpndType_GPRPair); + + switch (cond->type) { + case EEQU: + case ENOTEQU: + reg1 = ALLOC_GPR(); + reg2 = ALLOC_GPR(); + emitpcode(PC_XOR, reg1, opleft.reg, opright.reg); + emitpcode(PC_XOR, reg2, opleft.regHi, opright.regHi); + emitpcode(PC_OR, reg2, reg1, reg2); + if (write_to_gpr) { + if (ENODE_IS(cond, EEQU)) { + emitpcode(PC_CNTLZW, reg2, reg2); + emitpcode(PC_RLWINM, reg2, reg2, 27, 5, 31); + } else { + emitpcode(PC_ADDIC, reg1, reg2, -1); + emitpcode(PC_SUBFE, reg2, reg1, reg2); + } + output->optype = OpndType_GPR; + output->reg = reg2; + output->regHi = 0; + } else { + emitpcode(PC_CMPI, 0, reg2, 0); + output->optype = OpndType_CRField; + output->reg = 0; + output->regOffset = cond->type; + } + break; + case EGREATER: + tmpop = opleft; + opleft = opright; + opright = tmpop; + case ELESS: + reg1 = ALLOC_GPR(); + reg2 = ALLOC_GPR(); + reg3 = ALLOC_GPR(); + if (left->rtype != TYPE(&stunsignedlonglong) && right->rtype != TYPE(&stunsignedlonglong)) { + emitpcode(PC_XORIS, reg1, opleft.regHi, 0x8000); + emitpcode(PC_XORIS, reg2, opright.regHi, 0x8000); + reg4 = reg1; + reg5 = reg2; + } else { + reg4 = opleft.regHi; + reg5 = opright.regHi; + } + emitpcode(PC_SUBFC, reg3, opright.reg, opleft.reg); + emitpcode(PC_SUBFE, reg2, reg5, reg4); + emitpcode(PC_SUBFE, reg2, reg1, reg1); + emitpcode(PC_NEG, reg2, reg2); + if (write_to_gpr) { + output->optype = OpndType_GPR; + output->reg = reg2; + output->regHi = 0; + } else { + emitpcode(PC_CMPI, 0, reg2, 0); + output->optype = OpndType_CRField; + output->reg = 0; + output->regOffset = ENOTEQU; + } + break; + case ELESSEQU: + tmpop = opleft; + opleft = opright; + opright = tmpop; + case EGREATEREQU: + reg1 = ALLOC_GPR(); + reg2 = ALLOC_GPR(); + reg3 = ALLOC_GPR(); + if (left->rtype != TYPE(&stunsignedlonglong) && right->rtype != TYPE(&stunsignedlonglong)) { + emitpcode(PC_XORIS, reg1, opleft.regHi, 0x8000); + emitpcode(PC_XORIS, reg2, opright.regHi, 0x8000); + reg4 = reg1; + reg5 = reg2; + } else { + reg4 = opleft.regHi; + reg5 = opright.regHi; + } + emitpcode(PC_SUBFC, reg3, opright.reg, opleft.reg); + emitpcode(PC_SUBFE, reg2, reg5, reg4); + emitpcode(PC_SUBFE, reg2, reg1, reg1); + emitpcode(PC_NEG, reg2, reg2); + if (write_to_gpr) { + emitpcode(PC_SUBFIC, reg2, reg2, 1); + output->optype = OpndType_GPR; + output->reg = reg2; + output->regHi = 0; + } else { + emitpcode(PC_CMPI, 0, reg2, 0); + output->optype = OpndType_CRField; + output->reg = 0; + output->regOffset = EEQU; + } + break; + default: + CError_FATAL(8814); + } +} + +void I8_gen_SHL_SHR(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + static UInt32 used_regs[RegClassMax] = {0, 0, 0, 0, (1 << 3) | (1 << 4) | (1 << 5)}; + + ENode *left; + ENode *right; + Operand opleft; + Operand opright; + int is_uns; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + GEN_NODE(left, &opleft); + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + + output->optype = OpndType_GPRPair; + output->reg = ALLOC_GPR(); + output->regHi = ALLOC_GPR(); + + is_uns = is_unsigned(expr->rtype) != 0; + + if (ENODE_IS(right, EINTCONST)) { + if (ENODE_IS(expr, ESHL)) { + I8_ShiftLeftImmediate(opleft, right->data.intval.lo, is_uns, expr->rtype->size, output->reg, output->regHi); + } else { + I8_ShiftRightImmediate(opleft, right->data.intval.lo, is_uns, output->reg, output->regHi, 0); + } + return; + } + + GEN_NODE(right, &opright); + ENSURE_GPR(&opright, right->rtype, 0); + if (opright.optype == OpndType_GPRPair) { + opright.regHi = 0; + opright.optype = OpndType_GPR; + } + + CError_ASSERT(8890, opleft.optype == OpndType_GPRPair && opright.optype == OpndType_GPR); + + if (opleft.regHi != high_reg) + emitpcode(PC_MR, high_reg, opleft.regHi); + if (opleft.reg != low_reg) + emitpcode(PC_MR, low_reg, opleft.reg); + if (opright.reg != 5) + emitpcode(PC_MR, 5, opright.reg); + + if (ENODE_IS(expr, ESHR)) { + if (is_unsigned(left->rtype)) + branch_subroutine(rt_shr2u, 0, used_regs); + else + branch_subroutine(rt_shr2i, 0, used_regs); + } else if (ENODE_IS(expr, ESHL)) { + branch_subroutine(rt_shl2i, 0, used_regs); + } else { + CError_FATAL(8909); + } + + emitpcode(PC_MR, output->reg, low_reg); + emitpcode(PC_MR, output->regHi, high_reg); +} + +void I8_gen_DIV_MOD(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + static UInt32 used_regs[RegClassMax] = {0, 0, 0, 0, (1 << 3) | (1 << 4) | (1 << 5) | (1 << 6)}; + + int is_uns; + ENode *left; + ENode *right; + Operand opleft; + Operand opright; + SInt64 constval; + int shift; + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + memclrw(&opleft, sizeof(Operand)); + memclrw(&opright, sizeof(Operand)); + + GEN_NODE(left, &opleft); + coerce_to_register_pair(&opleft, left->rtype, 0, 0); + + output->optype = OpndType_GPRPair; + output->reg = ALLOC_GPR(); + output->regHi = ALLOC_GPR(); + + is_uns = is_unsigned(expr->rtype) != 0; + + if (ENODE_IS(right, EINTCONST)) + constval = (((SInt64) right->data.intval.hi) << 32) + right->data.intval.lo; + if (ENODE_IS(right, EINTCONST) && ((shift = I8_log2n(constval)) > 0)) { + CError_ASSERT(8976, opleft.optype == OpndType_GPRPair); + if (ENODE_IS(expr, EDIV)) { + I8_ShiftRightImmediate(opleft, shift, is_uns, output->reg, output->regHi, 1); + if (!is_uns) { + emitpcode(PC_ADDZE, output->reg, output->reg); + emitpcode(PC_ADDZE, output->regHi, output->regHi); + } + } else { + if (is_uns) { + if (shift < 32) { + emitpcode(PC_LI, output->regHi, 0); + emitpcode(PC_RLWINM, output->reg, opleft.reg, 0, 32 - shift, 31); + } else if (shift == 32) { + emitpcode(PC_LI, output->regHi, 0); + emitpcode(PC_MR, output->reg, opleft.reg); + } else if (shift <= 63) { + emitpcode(PC_RLWINM, output->regHi, opleft.regHi, 0, 32 - (shift - 32), 31); + emitpcode(PC_MR, output->reg, opleft.reg); + } else { + CError_FATAL(9018); + } + } else { + short tmpreg1 = ALLOC_GPR(); + short tmpreg2 = ALLOC_GPR(); + I8_ShiftRightImmediate(opleft, shift, is_uns, output->reg, output->regHi, 1); + emitpcode(PC_ADDZE, output->reg, output->reg); + emitpcode(PC_ADDZE, output->regHi, output->regHi); + I8_ShiftLeftImmediate(*output, shift, is_uns, expr->rtype->size, tmpreg1, tmpreg2); + emitpcode(PC_SUBFC, output->reg, tmpreg1, opleft.reg); + emitpcode(PC_SUBFE, output->regHi, tmpreg2, opleft.regHi); + } + } + return; + } + + GEN_NODE(right, &opright); + coerce_to_register_pair(&opright, right->rtype, 0, 0); + + CError_ASSERT(9048, opleft.optype == OpndType_GPRPair && opright.optype == OpndType_GPRPair); + + if (opleft.regHi != high_reg) + emitpcode(PC_MR, high_reg, opleft.regHi); + if (opleft.reg != low_reg) + emitpcode(PC_MR, low_reg, opleft.reg); + if (opright.regHi != high_reg2) + emitpcode(PC_MR, high_reg2, opright.regHi); + if (opright.reg != low_reg2) + emitpcode(PC_MR, low_reg2, opright.reg); + + if (ENODE_IS(expr, EDIV)) { + if (is_unsigned(left->rtype) || is_unsigned(right->rtype)) + branch_subroutine(rt_div2u, 0, used_regs); + else + branch_subroutine(rt_div2i, 0, used_regs); + } else if (ENODE_IS(expr, EMODULO)) { + if (is_unsigned(left->rtype) || is_unsigned(right->rtype)) + branch_subroutine(rt_mod2u, 0, used_regs); + else + branch_subroutine(rt_mod2i, 0, used_regs); + } else { + CError_FATAL(9074); + } + + emitpcode(PC_MR, output->reg, low_reg); + emitpcode(PC_MR, output->regHi, high_reg); +} + +void I8_gen_TYPCON(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + Type *dsttype; + ENode *inner; + Type *srctype; + short regHi; + short reg; + + static UInt32 used_regs[RegClassMax] = {0, 0, 0, 0, (1 << 3) | (1 << 4)}; + static UInt32 used_regs_f1[RegClassMax] = {0, 0, 0, (1 << 1), 0}; + + inner = expr->data.monadic; + srctype = inner->rtype; + dsttype = expr->rtype; + + if (IS_TYPE_VOID(dsttype)) { + GEN_NODE(inner, output); + if (ENODE_IS(inner, EINDIRECT) && (output->flags & OpndFlags_Volatile)) + coerce_to_register_pair(output, inner->rtype, 0, 0); + output->optype = OpndType_Absolute; + output->immediate = 0; + return; + } + + if (IS_TYPE_INT_OR_ENUM(srctype)) { + if (IS_TYPE_FLOAT(dsttype)) { + GEN_NODE(inner, output); + coerce_to_register_pair(output, srctype, 0, 0); + if (output->regHi != high_reg) + emitpcode(PC_MR, high_reg, output->regHi); + if (output->reg != low_reg) + emitpcode(PC_MR, low_reg, output->reg); + + if (is_unsigned(srctype)) { + branch_subroutine( + (dsttype->size == 4) ? rt_cvt_ull_flt : rt_cvt_ull_dbl, + 0, + used_regs); + } else { + branch_subroutine( + (dsttype->size == 4) ? rt_cvt_sll_flt : rt_cvt_sll_dbl, + 0, + used_regs); + } + + output->optype = OpndType_FPR; + output->reg = ALLOC_FPR(); + emitpcode(PC_FMR, output->reg, 1); + return; + } + + if (srctype->size < dsttype->size) { + CError_ASSERT(9171, TYPE_IS_8BYTES(dsttype)); + + GEN_NODE(inner, output); + if (srctype->size < 4 && + !ENODE_IS_INDIRECT_TO(inner, EBITFIELD) && + !((ENODE_IS_ASSIGN(inner) || ENODE_IS_RANGE(inner, EPOSTINC, EPREDEC)) && ENODE_IS(inner->data.monadic->data.monadic, EBITFIELD)) + ) { + extend32(output, srctype, outputReg); + } + extend64(output, srctype, outputReg, outputRegHi); + } else { + GEN_NODE_TO_REG(inner, outputReg, 0, output); + if (dsttype->size < srctype->size) { + coerce_to_register_pair(output, srctype, outputReg, outputRegHi); + output->optype = OpndType_GPR; + output->regHi = 0; + } + } + return; + } + + if (IS_TYPE_POINTER(srctype)) { + GEN_NODE_TO_REG(inner, outputReg, 0, output); + CError_ASSERT(9200, TYPE_IS_8BYTES(expr->rtype)); + GEN_NODE_TO_REG(inner, outputReg, 0, output); + + regHi = outputRegHi ? outputRegHi : ALLOC_GPR(); + if (regHi == output->reg) { + reg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_MR, reg, output->reg); + output->reg = reg; + } + if (is_unsigned(inner->rtype)) + load_immediate(regHi, 0); + else + emitpcode(PC_SRAWI, regHi, output->reg, 31); + output->optype = OpndType_GPRPair; + output->regHi = regHi; + return; + } + + if (IS_TYPE_FLOAT(srctype)) { + if (IS_TYPE_FLOAT(dsttype)) { + CError_FATAL(9222); + return; + } + + GEN_NODE(inner, output); + ENSURE_FPR(output, srctype, 0); + if (output->reg != 1) + emitpcode(PC_FMR, 1, output->reg); + + branch_subroutine(rt_cvt_dbl_usll, 0, used_regs_f1); + + output->optype = OpndType_GPRPair; + output->reg = ALLOC_GPR(); + output->regHi = ALLOC_GPR(); + emitpcode(PC_MR, output->reg, low_reg); + emitpcode(PC_MR, output->regHi, high_reg); + return; + } + + if (IS_TYPE_STRUCT(srctype)) { + GEN_NODE_TO_REG(inner, outputReg, 0, output); + + if (TYPE_IS_8BYTES(expr->rtype) && dsttype->size == srctype->size) { + coerce_to_register_pair(output, srctype, outputReg, outputRegHi); + } else { + CError_FATAL(9256); + } + return; + } + + CError_FATAL(9261); +} + +void gen_VECTOR128CONST(ENode *expr, short outputReg, short outputRegHi, Operand *output) { + int gpr; + int vr; + COVCResult result; + + vr = outputReg ? outputReg : ALLOC_VR(); + if (!canoptimizevectorconst(&expr->data.vector128val, expr->rtype, &result)) + CError_FATAL(9282); + + if (result.op1 != -1) { + emitpcode(result.op1, vr, result.arg); + output->optype = OpndType_VR; + output->reg = vr; + return; + } + + if (result.op2 != -1) { + gpr = ALLOC_GPR(); + emitpcode(PC_LI, gpr, result.arg); + emitpcode(result.op2, vr, 0, gpr); + output->optype = OpndType_VR; + output->reg = vr; + return; + } + + CError_FATAL(9298); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Intrinsics.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Intrinsics.c new file mode 100644 index 0000000..49334b8 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Intrinsics.c @@ -0,0 +1,4894 @@ +#include "compiler/Intrinsics.h" +#include "compiler/CError.h" +#include "compiler/CExpr.h" +#include "compiler/CInt64.h" +#include "compiler/CMachine.h" +#include "compiler/CParser.h" +#include "compiler/CScope.h" +#include "compiler/CompilerTools.h" +#include "compiler/FunctionCalls.h" +#include "compiler/InstrSelection.h" +#include "compiler/PCode.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/PPCError.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StackFrame.h" +#include "compiler/StructMoves.h" +#include "compiler/objects.h" +#include "compiler/scopes.h" +#include "compiler/types.h" + +Object *__memcpy_object; +static Object *intrinsics[311]; +int VectorConditions; // unused? +static Object *cur_intrinsic_object; + +static TypePointer stvectorunsignedchar_ptr = {TYPEPOINTER, 4, TYPE(&stvectorunsignedchar), 0}; +static TypePointer stvectorsignedchar_ptr = {TYPEPOINTER, 4, TYPE(&stvectorsignedchar), 0}; +static TypePointer stvectorboolchar_ptr = {TYPEPOINTER, 4, TYPE(&stvectorboolchar), 0}; +static TypePointer stvectorunsignedshort_ptr = {TYPEPOINTER, 4, TYPE(&stvectorunsignedshort), 0}; +static TypePointer stvectorsignedshort_ptr = {TYPEPOINTER, 4, TYPE(&stvectorsignedshort), 0}; +static TypePointer stvectorboolshort_ptr = {TYPEPOINTER, 4, TYPE(&stvectorboolshort), 0}; +static TypePointer stvectorunsignedlong_ptr = {TYPEPOINTER, 4, TYPE(&stvectorunsignedlong), 0}; +static TypePointer stvectorsignedlong_ptr = {TYPEPOINTER, 4, TYPE(&stvectorsignedlong), 0}; +static TypePointer stvectorboollong_ptr = {TYPEPOINTER, 4, TYPE(&stvectorboollong), 0}; +static TypePointer stvectorfloat_ptr = {TYPEPOINTER, 4, TYPE(&stvectorfloat), 0}; +static TypePointer stvectorpixel_ptr = {TYPEPOINTER, 4, TYPE(&stvectorpixel), 0}; +static TypePointer stunsignedchar_ptr = {TYPEPOINTER, 4, TYPE(&stunsignedchar), 0}; +static TypePointer stsignedchar_ptr = {TYPEPOINTER, 4, TYPE(&stsignedchar), 0}; +static TypePointer stunsignedshort_ptr = {TYPEPOINTER, 4, TYPE(&stunsignedshort), 0}; +static TypePointer stsignedshort_ptr = {TYPEPOINTER, 4, TYPE(&stsignedshort), 0}; +static TypePointer stunsignedlong_ptr = {TYPEPOINTER, 4, TYPE(&stunsignedlong), 0}; +static TypePointer stsignedlong_ptr = {TYPEPOINTER, 4, TYPE(&stsignedlong), 0}; +static TypePointer stunsignedint_ptr = {TYPEPOINTER, 4, TYPE(&stunsignedint), 0}; +static TypePointer stsignedint_ptr = {TYPEPOINTER, 4, TYPE(&stsignedint), 0}; +static TypePointer stfloat_ptr = {TYPEPOINTER, 4, TYPE(&stfloat), 0}; + +// Verify1VectorArg2Ops +typedef struct TypeTable22 { + Type *rtype; + Type *arg1; + int opcode1; + int opcode2; +} TypeTable22; + +// VerifyNoVectorArgs +typedef struct TypeTable11 { + Type *rtype; + int opcode; +} TypeTable11; + +// Verify1VectorArg +typedef struct TypeTable21 { + Type *rtype; + Type *arg1; + int opcode; +} TypeTable21; + +// Verify2VectorArgs +typedef struct TypeTable31 { + Type *rtype; + Type *arg1; + Type *arg2; + int opcode; +} TypeTable31; + +// Verify3VectorArgs +typedef struct TypeTable41 { + Type *rtype; + Type *arg1; + Type *arg2; + Type *arg3; + int opcode; +} TypeTable41; + +static TypeTable31 vector_add_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VADDUBM, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VADDUBM, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VADDUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VADDUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VADDUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VADDUBM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VADDUHM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VADDUHM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VADDUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VADDUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VADDUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VADDUHM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VADDUWM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VADDUWM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VADDUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VADDUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VADDUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VADDUWM, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VADDFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_addc_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VADDCUW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_adds_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VADDUBS, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VADDUBS, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VADDUBS, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VADDSBS, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VADDSBS, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VADDSBS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VADDUHS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VADDUHS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VADDUHS, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VADDSHS, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VADDSHS, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VADDSHS, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VADDUWS, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VADDUWS, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VADDUWS, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VADDSWS, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VADDSWS, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VADDSWS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_and_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VAND, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VAND, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VAND, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VAND, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VAND, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VAND, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VAND, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VAND, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VAND, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VAND, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VAND, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VAND, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VAND, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VAND, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VAND, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VAND, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VAND, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VAND, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VAND, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VAND, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VAND, + TYPE(&stvectorfloat), TYPE(&stvectorboollong), TYPE(&stvectorfloat), PC_VAND, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorboollong), PC_VAND, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VAND, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_andc_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VANDC, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VANDC, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VANDC, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VANDC, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VANDC, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VANDC, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VANDC, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VANDC, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VANDC, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VANDC, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VANDC, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VANDC, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VANDC, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VANDC, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VANDC, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VANDC, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VANDC, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VANDC, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VANDC, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VANDC, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VANDC, + TYPE(&stvectorfloat), TYPE(&stvectorboollong), TYPE(&stvectorfloat), PC_VANDC, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorboollong), PC_VANDC, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VANDC, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_avg_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VAVGUB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VAVGSB, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VAVGUH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VAVGSH, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VAVGUW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VAVGSW, + NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_ceil_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VRFIP, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_cmpb_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPBFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_cmpeq_type_table[] = { + TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPEQUB, + TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPEQUB, + TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPEQUH, + TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPEQUH, + TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPEQUW, + TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPEQUW, + TYPE(&stvectorboollong), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPEQFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_cmpge_type_table[] = { + TYPE(&stvectorboollong), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGEFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_cmpgt_type_table[] = { + TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stvectorboollong), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGTFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_ctf_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), PC_VCFUX, + TYPE(&stvectorfloat), TYPE(&stvectorsignedlong), TYPE(&stsignedint), PC_VCFSX, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_cts_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorfloat), TYPE(&stsignedint), PC_VCTSXS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_ctu_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorfloat), TYPE(&stsignedint), PC_VCTUXS, + NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_dss_type_table[] = { + TYPE(&stvoid), TYPE(&stsignedint), PC_DSS, PC_B, + NULL, NULL, 0 +}; +static TypeTable11 vector_dssall_type_table[] = { + TYPE(&stvoid), PC_DSSALL, + NULL, 0 +}; +static TypeTable41 vector_datastream_type_table[] = { + TYPE(&stvoid), TYPE(&stvectorunsignedchar_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stvectorsignedchar_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stvectorboolchar_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stvectorunsignedshort_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stvectorsignedshort_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stvectorboolshort_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stvectorpixel_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stvectorunsignedlong_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stvectorsignedlong_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stvectorboollong_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stvectorfloat_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stunsignedchar_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stsignedchar_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stunsignedshort_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stsignedshort_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stunsignedint_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stsignedint_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stunsignedlong_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stsignedlong_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + TYPE(&stvoid), TYPE(&stfloat_ptr), TYPE(&stsignedint), TYPE(&stsignedint), PC_DST, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_expte_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VEXPTEFP, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_floor_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VRFIM, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_load_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stvectorunsignedchar_ptr), PC_LVX, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_LVX, + TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stvectorsignedchar_ptr), PC_LVX, + TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_LVX, + TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stvectorboolchar_ptr), PC_LVX, + TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stvectorunsignedshort_ptr), PC_LVX, + TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_LVX, + TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stvectorsignedshort_ptr), PC_LVX, + TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_LVX, + TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stvectorboolshort_ptr), PC_LVX, + TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stvectorpixel_ptr), PC_LVX, + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stvectorunsignedlong_ptr), PC_LVX, + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_LVX, + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_LVX, + TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stvectorsignedlong_ptr), PC_LVX, + TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_LVX, + TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_LVX, + TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stvectorboollong_ptr), PC_LVX, + TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stvectorfloat_ptr), PC_LVX, + TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stfloat_ptr), PC_LVX, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_loade_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_LVEBX, + TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_LVEBX, + TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_LVEHX, + TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_LVEHX, + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_LVEWX, + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_LVEWX, + TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_LVEWX, + TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_LVEWX, + TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stfloat_ptr), PC_LVEWX, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_loadl_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stvectorunsignedchar_ptr), PC_LVXL, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_LVXL, + TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stvectorsignedchar_ptr), PC_LVXL, + TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_LVXL, + TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stvectorboolchar_ptr), PC_LVXL, + TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stvectorunsignedshort_ptr), PC_LVXL, + TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_LVXL, + TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stvectorsignedshort_ptr), PC_LVXL, + TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_LVXL, + TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stvectorboolshort_ptr), PC_LVXL, + TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stvectorpixel_ptr), PC_LVXL, + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stvectorunsignedlong_ptr), PC_LVXL, + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_LVXL, + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_LVXL, + TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stvectorsignedlong_ptr), PC_LVXL, + TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_LVXL, + TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_LVXL, + TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stvectorboollong_ptr), PC_LVXL, + TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stvectorfloat_ptr), PC_LVXL, + TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stfloat_ptr), PC_LVXL, + NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_loge_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VLOGEFP, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_lvsl_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_LVSL, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_LVSL, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_LVSL, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_LVSL, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_LVSL, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_LVSL, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_LVSL, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_LVSL, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stfloat_ptr), PC_LVSL, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_lvsr_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_LVSR, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_LVSR, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_LVSR, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_LVSR, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_LVSR, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_LVSR, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_LVSR, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_LVSR, + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stfloat_ptr), PC_LVSR, + NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_madd_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VMADDFP, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_madds_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMHADDSHS, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_max_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMAXUB, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VMAXUB, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VMAXUB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMAXSB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VMAXSB, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VMAXSB, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMAXUH, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VMAXUH, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VMAXUH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMAXSH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VMAXSH, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VMAXSH, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VMAXUW, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VMAXUW, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VMAXUW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VMAXSW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VMAXSW, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VMAXSW, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VMAXFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_mergeh_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMRGHB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMRGHB, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VMRGHB, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMRGHH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMRGHH, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VMRGHH, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorpixel), PC_VMRGHH, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VMRGHW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VMRGHW, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VMRGHW, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VMRGHW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_mergel_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMRGLB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMRGLB, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VMRGLB, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMRGLH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMRGLH, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VMRGLH, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorpixel), PC_VMRGLH, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VMRGLW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VMRGLW, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VMRGLW, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VMRGLW, + NULL, NULL, NULL, 0 +}; +static TypeTable11 vector_mfvscr_type_table[] = { + TYPE(&stvectorunsignedshort), PC_MFVSCR, + NULL, 0 +}; +static TypeTable31 vector_min_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMINUB, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VMINUB, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VMINUB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMINSB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VMINSB, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VMINSB, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMINUH, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VMINUH, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VMINUH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMINSH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VMINSH, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VMINSH, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VMINUW, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VMINUW, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VMINUW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VMINSW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VMINSW, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VMINSW, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VMINFP, + NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_mladd_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMLADDUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMLADDUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMLADDUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMLADDUHM, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_mradds_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMHRADDSHS, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_msum_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedlong), PC_VMSUMUBM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), PC_VMSUMUHM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorsignedlong), PC_VMSUMMBM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedlong), PC_VMSUMSHM, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_msums_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), PC_VMSUMUHS, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedlong), PC_VMSUMSHS, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable21 vector_mtvscr_type_table[] = { + TYPE(&stvoid), TYPE(&stvectorunsignedchar), PC_MTVSCR, + TYPE(&stvoid), TYPE(&stvectorsignedchar), PC_MTVSCR, + TYPE(&stvoid), TYPE(&stvectorboolchar), PC_MTVSCR, + TYPE(&stvoid), TYPE(&stvectorunsignedshort), PC_MTVSCR, + TYPE(&stvoid), TYPE(&stvectorsignedshort), PC_MTVSCR, + TYPE(&stvoid), TYPE(&stvectorboolshort), PC_MTVSCR, + TYPE(&stvoid), TYPE(&stvectorpixel), PC_MTVSCR, + TYPE(&stvoid), TYPE(&stvectorunsignedlong), PC_MTVSCR, + TYPE(&stvoid), TYPE(&stvectorsignedlong), PC_MTVSCR, + TYPE(&stvoid), TYPE(&stvectorboollong), PC_MTVSCR, + NULL, NULL, 0 +}; +static TypeTable31 vector_mule_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMULEUB, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMULESB, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMULEUH, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMULESH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_mulo_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMULOUB, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMULOSB, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMULOUH, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMULOSH, + NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_nmsub_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VNMSUBFP, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_nor_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VNOR, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VNOR, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VNOR, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VNOR, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VNOR, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VNOR, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VNOR, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VNOR, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VNOR, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VNOR, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_or_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VOR, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VOR, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VOR, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VOR, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VOR, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VOR, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VOR, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VOR, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VOR, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VOR, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VOR, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VOR, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VOR, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VOR, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VOR, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VOR, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VOR, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VOR, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VOR, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VOR, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VOR, + TYPE(&stvectorfloat), TYPE(&stvectorboollong), TYPE(&stvectorfloat), PC_VOR, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorboollong), PC_VOR, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VOR, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_pack_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VPKUHUM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VPKUHUM, + TYPE(&stvectorboolchar), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VPKUHUM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VPKUWUM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VPKUWUM, + TYPE(&stvectorboolshort), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VPKUWUM, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_packpx_type_table[] = { + TYPE(&stvectorpixel), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VPKPX, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_packs_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VPKUHUS, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VPKSHSS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VPKUWUS, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VPKSWSS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_packsu_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VPKUHUS, + TYPE(&stvectorunsignedchar), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VPKSHUS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VPKUWUS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VPKSWUS, + NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_perm_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VPERM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VPERM, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VPERM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), PC_VPERM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedchar), PC_VPERM, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedchar), PC_VPERM, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorunsignedchar), PC_VPERM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedchar), PC_VPERM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedchar), PC_VPERM, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedchar), PC_VPERM, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorunsignedchar), PC_VPERM, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_re_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VREFP, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_rl_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VRLB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VRLB, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VRLH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VRLH, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VRLW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VRLW, + NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_round_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VRFIN, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_rsqrte_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VRSQRTEFP, PC_B, + NULL, NULL, 0 +}; +static TypeTable41 vector_sel_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSEL, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VSEL, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSEL, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VSEL, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VSEL, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VSEL, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSEL, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VSEL, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VSEL, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VSEL, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VSEL, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VSEL, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSEL, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VSEL, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VSEL, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VSEL, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VSEL, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VSEL, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorunsignedlong), PC_VSEL, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorboollong), PC_VSEL, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_sl_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSLB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSLB, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSLH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VSLH, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSLW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VSLW, + NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_sld_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stsignedint), PC_VSLDOI, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stsignedint), PC_VSLDOI, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stsignedint), PC_VSLDOI, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stsignedint), PC_VSLDOI, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stsignedint), PC_VSLDOI, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), PC_VSLDOI, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stsignedint), PC_VSLDOI, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stsignedint), PC_VSLDOI, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_sll_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSL, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedshort), PC_VSL, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedlong), PC_VSL, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSL, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedshort), PC_VSL, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedlong), PC_VSL, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VSL, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedshort), PC_VSL, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedlong), PC_VSL, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), PC_VSL, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSL, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), PC_VSL, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedchar), PC_VSL, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VSL, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedlong), PC_VSL, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedchar), PC_VSL, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VSL, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedlong), PC_VSL, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorunsignedchar), PC_VSL, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorunsignedshort), PC_VSL, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorunsignedlong), PC_VSL, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedchar), PC_VSL, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), PC_VSL, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSL, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedchar), PC_VSL, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedshort), PC_VSL, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VSL, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedchar), PC_VSL, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedshort), PC_VSL, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VSL, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_slo_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSLO, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorsignedchar), PC_VSLO, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSLO, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VSLO, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), PC_VSLO, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorsignedchar), PC_VSLO, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedchar), PC_VSLO, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedchar), PC_VSLO, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorunsignedchar), PC_VSLO, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorsignedchar), PC_VSLO, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedchar), PC_VSLO, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorsignedchar), PC_VSLO, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedchar), PC_VSLO, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedchar), PC_VSLO, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorunsignedchar), PC_VSLO, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorsignedchar), PC_VSLO, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_splat_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stsignedint), PC_VSPLTB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stsignedint), PC_VSPLTB, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stsignedint), PC_VSPLTB, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stsignedint), PC_VSPLTH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stsignedint), PC_VSPLTH, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stsignedint), PC_VSPLTH, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stsignedint), PC_VSPLTH, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), PC_VSPLTW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stsignedint), PC_VSPLTW, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stsignedint), PC_VSPLTW, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stsignedint), PC_VSPLTW, + NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_splat_s8_type_table[] = { + TYPE(&stvectorsignedchar), TYPE(&stsignedint), PC_VSPLTISB, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_splat_s16_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stsignedint), PC_VSPLTISH, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_splat_s32_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stsignedint), PC_VSPLTISW, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_splat_u8_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), PC_VSPLTISB, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_splat_u16_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stsignedint), PC_VSPLTISH, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_splat_u32_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), PC_VSPLTISW, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_sr_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSRB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSRB, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSRH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VSRH, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSRW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VSRW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_sra_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSRAB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSRAB, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSRAH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VSRAH, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSRAW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VSRAW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_srl_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSR, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedshort), PC_VSR, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedlong), PC_VSR, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSR, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedshort), PC_VSR, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedlong), PC_VSR, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VSR, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedshort), PC_VSR, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedlong), PC_VSR, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), PC_VSR, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSR, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), PC_VSR, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedchar), PC_VSR, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VSR, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedlong), PC_VSR, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedchar), PC_VSR, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VSR, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedlong), PC_VSR, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorunsignedchar), PC_VSR, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorunsignedshort), PC_VSR, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorunsignedlong), PC_VSR, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedchar), PC_VSR, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), PC_VSR, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSR, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedchar), PC_VSR, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedshort), PC_VSR, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VSR, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedchar), PC_VSR, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedshort), PC_VSR, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VSR, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_sro_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSRO, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorsignedchar), PC_VSRO, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSRO, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VSRO, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), PC_VSRO, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorsignedchar), PC_VSRO, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedchar), PC_VSRO, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedchar), PC_VSRO, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorunsignedchar), PC_VSRO, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorsignedchar), PC_VSRO, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedchar), PC_VSRO, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorsignedchar), PC_VSRO, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedchar), PC_VSRO, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedchar), PC_VSRO, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorunsignedchar), PC_VSRO, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorsignedchar), PC_VSRO, + NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_st_type_table[] = { + TYPE(&stvoid), TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stvectorunsignedchar_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stvectorsignedchar_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stvectorunsignedshort_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stvectorsignedshort_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stvectorunsignedlong_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stvectorsignedlong_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stvectorfloat_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stfloat_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stvectorpixel_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stvectorboolchar_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stvectorboolshort_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stvectorboollong_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_STVX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_STVX, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_ste_type_table[] = { + TYPE(&stvoid), TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_STVEBX, + TYPE(&stvoid), TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_STVEBX, + TYPE(&stvoid), TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_STVEHX, + TYPE(&stvoid), TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVEHX, + TYPE(&stvoid), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stfloat_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVEHX, + TYPE(&stvoid), TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_STVEHX, + TYPE(&stvoid), TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_STVEBX, + TYPE(&stvoid), TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_STVEBX, + TYPE(&stvoid), TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVEHX, + TYPE(&stvoid), TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_STVEHX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_STVEWX, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_stl_type_table[] = { + TYPE(&stvoid), TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stvectorunsignedchar_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stvectorsignedchar_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stvectorunsignedshort_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stvectorsignedshort_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stvectorunsignedlong_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stvectorsignedlong_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stvectorfloat_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stfloat_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stvectorpixel_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stvectorboolchar_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stvectorboolshort_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stvectorboollong_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_STVXL, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_STVXL, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_sub_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSUBUBM, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VSUBUBM, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VSUBUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VSUBUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VSUBUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VSUBUBM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSUBUHM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VSUBUHM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VSUBUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VSUBUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VSUBUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VSUBUHM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSUBUWM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VSUBUWM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VSUBUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VSUBUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VSUBUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VSUBUWM, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VSUBFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_subc_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSUBCUW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_subs_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSUBUBS, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VSUBUBS, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VSUBUBS, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VSUBSBS, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VSUBSBS, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VSUBSBS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSUBUHS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VSUBUHS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VSUBUHS, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VSUBSHS, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VSUBSHS, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VSUBSHS, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSUBUWS, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VSUBUWS, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VSUBUWS, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VSUBSWS, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VSUBSWS, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VSUBSWS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_sum4s_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedlong), PC_VSUM4UBS, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedlong), PC_VSUM4SBS, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedlong), PC_VSUM4SHS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_sum2s_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VSUM2SWS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_sums_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VSUMSWS, + NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_trunc_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VRFIZ, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_unpack2sh_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMRGHB, + TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMRGHH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_unpack2sl_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMRGLB, + TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMRGLH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_unpack2uh_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMRGHB, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMRGHH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_unpack2ul_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMRGLB, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMRGLH, + NULL, NULL, NULL, 0 +}; +static TypeTable21 vector_unpackh_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedchar), PC_VUPKHSB, + TYPE(&stvectorboolshort), TYPE(&stvectorboolchar), PC_VUPKHSB, + TYPE(&stvectorunsignedlong), TYPE(&stvectorpixel), PC_VUPKHPX, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), PC_VUPKHSH, + TYPE(&stvectorboollong), TYPE(&stvectorboolshort), PC_VUPKHSH, + NULL, NULL, 0 +}; +static TypeTable21 vector_unpackl_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedchar), PC_VUPKLSB, + TYPE(&stvectorboolshort), TYPE(&stvectorboolchar), PC_VUPKLSB, + TYPE(&stvectorunsignedlong), TYPE(&stvectorpixel), PC_VUPKLPX, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), PC_VUPKLSH, + TYPE(&stvectorboollong), TYPE(&stvectorboolshort), PC_VUPKLSH, + NULL, NULL, 0 +}; +static TypeTable31 vector_xor_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VXOR, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VXOR, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VXOR, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VXOR, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VXOR, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VXOR, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VXOR, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VXOR, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VXOR, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VXOR, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VXOR, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VXOR, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VXOR, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VXOR, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VXOR, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VXOR, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VXOR, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VXOR, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VXOR, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VXOR, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VXOR, + TYPE(&stvectorfloat), TYPE(&stvectorboollong), TYPE(&stvectorfloat), PC_VXOR, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorboollong), PC_VXOR, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VXOR, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_all_eq_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorpixel), TYPE(&stvectorpixel), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPEQFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_all_ge_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGEFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_all_gt_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGTFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_all_in_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPBFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_all_le_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGEFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_all_lt_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGTFP +}; +static TypeTable22 vector_all_nan_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), PC_VCMPEQFP, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_all_ne_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorpixel), TYPE(&stvectorpixel), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPEQFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_all_nge_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGEFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_all_ngt_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGTFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_all_nle_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGEFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_all_nlt_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGTFP, + NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_all_numeric_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), PC_VCMPEQFP, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_any_eq_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorpixel), TYPE(&stvectorpixel), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPEQFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_any_ge_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGEFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_any_gt_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGTFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_any_le_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGEFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_any_lt_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGTFP, + NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_any_nan_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), PC_VCMPEQFP, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_any_ne_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VCMPEQUB, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorpixel), TYPE(&stvectorpixel), PC_VCMPEQUH, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VCMPEQUW, + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPEQFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_any_nge_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGEFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_any_ngt_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGTFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_any_nle_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGEFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_any_nlt_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGTFP, + NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_any_numeric_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), PC_VCMPEQFP, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_any_out_type_table[] = { + TYPE(&stsignedint), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPBFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vaddubm_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VADDUBM, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VADDUBM, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VADDUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VADDUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VADDUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VADDUBM, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vadduhm_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VADDUHM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VADDUHM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VADDUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VADDUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VADDUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VADDUHM, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vadduwm_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VADDUWM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VADDUWM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VADDUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VADDUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VADDUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VADDUWM, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vaddfp_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VADDFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vaddubs_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VADDUBS, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VADDUBS, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VADDUBS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vaddsbs_type_table[] = { + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VADDSBS, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VADDSBS, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VADDSBS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vadduhs_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VADDUHS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VADDUHS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VADDUHS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vaddshs_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VADDSHS, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VADDSHS, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VADDSHS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vadduws_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VADDUWS, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VADDUWS, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VADDUWS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vaddsws_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VADDSWS, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VADDSWS, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VADDSWS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vavgub_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VAVGUB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vavgsb_type_table[] = { + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VAVGSB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vavguh_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VAVGUH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vavgsh_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VAVGSH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vavguw_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VAVGUW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vavgsw_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VAVGSW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpequb_type_table[] = { + TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPEQUB, + TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPEQUB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpequh_type_table[] = { + TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPEQUH, + TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPEQUH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpequw_type_table[] = { + TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPEQUW, + TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPEQUW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpeqfp_type_table[] = { + TYPE(&stvectorboollong), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPEQFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpgtub_type_table[] = { + TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VCMPGTUB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpgtsb_type_table[] = { + TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VCMPGTSB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpgtuh_type_table[] = { + TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VCMPGTUH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpgtsh_type_table[] = { + TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VCMPGTSH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpgtuw_type_table[] = { + TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VCMPGTUW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpgtsw_type_table[] = { + TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VCMPGTSW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcmpgtfp_type_table[] = { + TYPE(&stvectorboollong), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VCMPGTFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcfux_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), PC_VCFUX, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vcfsx_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorsignedlong), TYPE(&stsignedint), PC_VCFSX, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_lvebx_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_LVEBX, + TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_LVEBX, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_lvehx_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_LVEHX, + TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_LVEHX, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_lvewx_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_LVEWX, + TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_LVEWX, + TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_LVEWX, + TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_LVEWX, + TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stfloat_ptr), PC_LVEWX, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmaxub_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMAXUB, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VMAXUB, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VMAXUB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmaxsb_type_table[] = { + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMAXSB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VMAXSB, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VMAXSB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmaxuh_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMAXUH, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VMAXUH, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VMAXUH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmaxsh_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMAXSH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VMAXSH, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VMAXSH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmaxuw_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VMAXUW, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VMAXUW, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VMAXUW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmaxsw_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VMAXSW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VMAXSW, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VMAXSW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmaxfp_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VMAXFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmrghb_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMRGHB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMRGHB, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VMRGHB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmrghh_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMRGHH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMRGHH, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VMRGHH, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorpixel), PC_VMRGHH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmrghw_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VMRGHW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VMRGHW, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VMRGHW, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VMRGHW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmrglb_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMRGLB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMRGLB, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), PC_VMRGLB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmrglh_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMRGLH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMRGLH, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VMRGLH, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stvectorpixel), PC_VMRGLH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmrglw_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VMRGLW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VMRGLW, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VMRGLW, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VMRGLW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vminub_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMINUB, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VMINUB, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VMINUB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vminsb_type_table[] = { + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMINSB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VMINSB, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VMINSB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vminuh_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMINUH, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VMINUH, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VMINUH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vminsh_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMINSH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VMINSH, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VMINSH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vminuw_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VMINUW, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VMINUW, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VMINUW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vminsw_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VMINSW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VMINSW, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VMINSW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vminfp_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VMINFP, + NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_vmsumubm_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedlong), PC_VMSUMUBM, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_vmsumuhm_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), PC_VMSUMUHM, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_vmsummbm_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorsignedlong), PC_VMSUMMBM, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_vmsumshm_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedlong), PC_VMSUMSHM, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_vmsumuhs_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), PC_VMSUMUHS, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_vmsumshs_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedlong), PC_VMSUMSHS, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmuleub_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMULEUB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmulesb_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMULESB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmuleuh_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMULEUH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmulesh_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMULESH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmuloub_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VMULOUB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmulosb_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VMULOSB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmulouh_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VMULOUH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vmulosh_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VMULOSH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vpkuhum_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VPKUHUM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VPKUHUM, + TYPE(&stvectorboolchar), TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), PC_VPKUHUM, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vpkuwum_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VPKUWUM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VPKUWUM, + TYPE(&stvectorboolshort), TYPE(&stvectorboollong), TYPE(&stvectorboollong), PC_VPKUWUM, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vpkuhus_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VPKUHUS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vpkshss_type_table[] = { + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VPKSHSS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vpkuwus_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VPKUWUS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vpkswss_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VPKSWSS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vpkshus_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VPKSHUS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vpkswus_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VPKSWUS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vrlb_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VRLB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VRLB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vrlh_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VRLH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VRLH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vrlw_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VRLW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VRLW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vslb_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSLB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSLB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vslh_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSLH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VSLH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vslw_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSLW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VSLW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vspltb_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stsignedint), PC_VSPLTB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stsignedint), PC_VSPLTB, + TYPE(&stvectorboolchar), TYPE(&stvectorboolchar), TYPE(&stsignedint), PC_VSPLTB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsplth_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stsignedint), PC_VSPLTH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stsignedint), PC_VSPLTH, + TYPE(&stvectorboolshort), TYPE(&stvectorboolshort), TYPE(&stsignedint), PC_VSPLTH, + TYPE(&stvectorpixel), TYPE(&stvectorpixel), TYPE(&stsignedint), PC_VSPLTH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vspltw_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), PC_VSPLTW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stsignedint), PC_VSPLTW, + TYPE(&stvectorboollong), TYPE(&stvectorboollong), TYPE(&stsignedint), PC_VSPLTW, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stsignedint), PC_VSPLTW, + NULL, NULL, NULL, 0 +}; +static TypeTable22 vector_vspltisb_type_table[] = { + TYPE(&stvectorsignedchar), TYPE(&stsignedint), PC_VSPLTISB, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_vspltish_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stsignedint), PC_VSPLTISH, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_vspltisw_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stsignedint), PC_VSPLTISW, PC_B, + NULL, NULL, 0 +}; +static TypeTable31 vector_vsrb_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSRB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSRB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsrh_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSRH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VSRH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsrw_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSRW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VSRW, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsrab_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSRAB, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorunsignedchar), PC_VSRAB, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsrah_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSRAH, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorunsignedshort), PC_VSRAH, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsraw_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSRAW, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorunsignedlong), PC_VSRAW, + NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_stvebx_type_table[] = { + TYPE(&stvoid), TYPE(&stvectorunsignedchar), TYPE(&stsignedint), TYPE(&stunsignedchar_ptr), PC_STVEBX, + TYPE(&stvoid), TYPE(&stvectorsignedchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_STVEBX, + TYPE(&stvoid), TYPE(&stvectorboolchar), TYPE(&stsignedint), TYPE(&stsignedchar_ptr), PC_STVEBX, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_stvehx_type_table[] = { + TYPE(&stvoid), TYPE(&stvectorunsignedshort), TYPE(&stsignedint), TYPE(&stunsignedshort_ptr), PC_STVEHX, + TYPE(&stvoid), TYPE(&stvectorsignedshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVEHX, + TYPE(&stvoid), TYPE(&stvectorboolshort), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVEHX, + TYPE(&stvoid), TYPE(&stvectorpixel), TYPE(&stsignedint), TYPE(&stsignedshort_ptr), PC_STVEHX, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable41 vector_stvewx_type_table[] = { + TYPE(&stvoid), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedint_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorunsignedlong), TYPE(&stsignedint), TYPE(&stunsignedlong_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorsignedlong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorfloat), TYPE(&stsignedint), TYPE(&stfloat_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stsignedint_ptr), PC_STVEWX, + TYPE(&stvoid), TYPE(&stvectorboollong), TYPE(&stsignedint), TYPE(&stsignedlong_ptr), PC_STVEWX, + NULL, NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsububm_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSUBUBM, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VSUBUBM, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VSUBUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VSUBUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VSUBUBM, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VSUBUBM, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsubuhm_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSUBUHM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VSUBUHM, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VSUBUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VSUBUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VSUBUHM, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VSUBUHM, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsubuwm_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSUBUWM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VSUBUWM, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VSUBUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VSUBUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VSUBUWM, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VSUBUWM, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VSUBFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsubfp_type_table[] = { + TYPE(&stvectorfloat), TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VSUBFP, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsububs_type_table[] = { + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), PC_VSUBUBS, + TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), PC_VSUBUBS, + TYPE(&stvectorunsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorunsignedchar), PC_VSUBUBS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsubsbs_type_table[] = { + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VSUBSBS, + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), PC_VSUBSBS, + TYPE(&stvectorsignedchar), TYPE(&stvectorboolchar), TYPE(&stvectorsignedchar), PC_VSUBSBS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsubuhs_type_table[] = { + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), PC_VSUBUHS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), PC_VSUBUHS, + TYPE(&stvectorunsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorunsignedshort), PC_VSUBUHS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsubshs_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VSUBSHS, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), PC_VSUBSHS, + TYPE(&stvectorsignedshort), TYPE(&stvectorboolshort), TYPE(&stvectorsignedshort), PC_VSUBSHS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsubuws_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), PC_VSUBUWS, + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), PC_VSUBUWS, + TYPE(&stvectorunsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorunsignedlong), PC_VSUBUWS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsubsws_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VSUBSWS, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), PC_VSUBSWS, + TYPE(&stvectorsignedlong), TYPE(&stvectorboollong), TYPE(&stvectorsignedlong), PC_VSUBSWS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsum4ubs_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorunsignedchar), TYPE(&stvectorunsignedlong), PC_VSUM4UBS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsum4sbs_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedchar), TYPE(&stvectorsignedlong), PC_VSUM4SBS, + NULL, NULL, NULL, 0 +}; +static TypeTable31 vector_vsum4shs_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), TYPE(&stvectorsignedlong), PC_VSUM4SHS, + NULL, NULL, NULL, 0 +}; +static TypeTable21 vector_vupkhsb_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedchar), PC_VUPKHSB, + TYPE(&stvectorboolshort), TYPE(&stvectorboolchar), PC_VUPKHSB, + NULL, NULL, 0 +}; +static TypeTable21 vector_vupklsb_type_table[] = { + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedchar), PC_VUPKLSB, + TYPE(&stvectorboolshort), TYPE(&stvectorboolchar), PC_VUPKLSB, + NULL, NULL, 0 +}; +static TypeTable22 vector_vupkhpx_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorpixel), PC_VUPKHPX, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_vupklpx_type_table[] = { + TYPE(&stvectorunsignedlong), TYPE(&stvectorpixel), PC_VUPKLPX, PC_B, + NULL, NULL, 0 +}; +static TypeTable21 vector_vupkhsh_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), PC_VUPKHSH, + TYPE(&stvectorboollong), TYPE(&stvectorboolshort), PC_VUPKHSH, + NULL, NULL, 0 +}; +static TypeTable21 vector_vupklsh_type_table[] = { + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedshort), PC_VUPKLSH, + TYPE(&stvectorboollong), TYPE(&stvectorboolshort), PC_VUPKLSH, + NULL, NULL, 0 +}; +static TypeTable22 vector_abs_type_table[] = { + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VSUBUBM, PC_VMAXSB, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VSUBUHM, PC_VMAXSH, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VSUBUWM, PC_VMAXSW, + TYPE(&stvectorfloat), TYPE(&stvectorfloat), PC_VANDC, PC_B, + NULL, NULL, 0 +}; +static TypeTable22 vector_abss_type_table[] = { + TYPE(&stvectorsignedchar), TYPE(&stvectorsignedchar), PC_VSUBSBS, PC_VMAXSB, + TYPE(&stvectorsignedshort), TYPE(&stvectorsignedshort), PC_VSUBSHS, PC_VMAXSH, + TYPE(&stvectorsignedlong), TYPE(&stvectorsignedlong), PC_VSUBSWS, PC_VMAXSW, + NULL, NULL, 0 +}; +static void *typeTable[] = { + vector_add_type_table, + vector_addc_type_table, + vector_adds_type_table, + vector_and_type_table, + vector_andc_type_table, + vector_avg_type_table, + vector_ceil_type_table, + vector_cmpb_type_table, + vector_cmpeq_type_table, + vector_cmpge_type_table, + vector_cmpge_type_table, + vector_cmpgt_type_table, + vector_cmpgt_type_table, + vector_ctf_type_table, + vector_cts_type_table, + vector_ctu_type_table, + vector_dss_type_table, + vector_dssall_type_table, + vector_datastream_type_table, + vector_datastream_type_table, + vector_datastream_type_table, + vector_datastream_type_table, + vector_expte_type_table, + vector_floor_type_table, + vector_load_type_table, + vector_loade_type_table, + vector_loadl_type_table, + vector_loge_type_table, + vector_lvsl_type_table, + vector_lvsr_type_table, + vector_madd_type_table, + vector_madds_type_table, + vector_max_type_table, + vector_mergeh_type_table, + vector_mergel_type_table, + vector_mfvscr_type_table, + vector_min_type_table, + vector_mladd_type_table, + vector_mradds_type_table, + vector_msum_type_table, + vector_msums_type_table, + vector_mtvscr_type_table, + vector_mule_type_table, + vector_mulo_type_table, + vector_nmsub_type_table, + vector_nor_type_table, + vector_or_type_table, + vector_pack_type_table, + vector_packpx_type_table, + vector_packs_type_table, + vector_packsu_type_table, + vector_perm_type_table, + vector_re_type_table, + vector_rl_type_table, + vector_round_type_table, + vector_rsqrte_type_table, + vector_sel_type_table, + vector_sl_type_table, + vector_sld_type_table, + vector_sll_type_table, + vector_slo_type_table, + vector_splat_type_table, + vector_splat_s8_type_table, + vector_splat_s16_type_table, + vector_splat_s32_type_table, + vector_splat_u8_type_table, + vector_splat_u16_type_table, + vector_splat_u32_type_table, + vector_sr_type_table, + vector_sra_type_table, + vector_srl_type_table, + vector_sro_type_table, + vector_st_type_table, + vector_ste_type_table, + vector_stl_type_table, + vector_sub_type_table, + vector_subc_type_table, + vector_subs_type_table, + vector_sum4s_type_table, + vector_sum2s_type_table, + vector_sums_type_table, + vector_trunc_type_table, + vector_unpack2sh_type_table, + vector_unpack2sl_type_table, + vector_unpack2uh_type_table, + vector_unpack2ul_type_table, + vector_unpackh_type_table, + vector_unpackl_type_table, + vector_xor_type_table, + vector_all_eq_type_table, + vector_all_ge_type_table, + vector_all_gt_type_table, + vector_all_in_type_table, + vector_all_le_type_table, + vector_all_lt_type_table, + vector_all_nan_type_table, + vector_all_ne_type_table, + vector_all_nge_type_table, + vector_all_ngt_type_table, + vector_all_nle_type_table, + vector_all_nlt_type_table, + vector_all_numeric_type_table, + vector_any_eq_type_table, + vector_any_ge_type_table, + vector_any_gt_type_table, + vector_any_le_type_table, + vector_any_lt_type_table, + vector_any_nan_type_table, + vector_any_ne_type_table, + vector_any_nge_type_table, + vector_any_ngt_type_table, + vector_any_nle_type_table, + vector_any_nlt_type_table, + vector_any_numeric_type_table, + vector_any_out_type_table, + vector_vaddubm_type_table, + vector_vadduhm_type_table, + vector_vadduwm_type_table, + vector_vaddfp_type_table, + vector_addc_type_table, + vector_vaddubs_type_table, + vector_vaddsbs_type_table, + vector_vadduhs_type_table, + vector_vaddshs_type_table, + vector_vadduws_type_table, + vector_vaddsws_type_table, + vector_and_type_table, + vector_andc_type_table, + vector_vavgub_type_table, + vector_vavgsb_type_table, + vector_vavguh_type_table, + vector_vavgsh_type_table, + vector_vavguw_type_table, + vector_vavgsw_type_table, + vector_ceil_type_table, + vector_cmpb_type_table, + vector_vcmpequb_type_table, + vector_vcmpequh_type_table, + vector_vcmpequw_type_table, + vector_vcmpeqfp_type_table, + vector_cmpge_type_table, + vector_vcmpgtub_type_table, + vector_vcmpgtsb_type_table, + vector_vcmpgtuh_type_table, + vector_vcmpgtsh_type_table, + vector_vcmpgtuw_type_table, + vector_vcmpgtsw_type_table, + vector_vcmpgtfp_type_table, + vector_vcfux_type_table, + vector_vcfsx_type_table, + vector_cts_type_table, + vector_ctu_type_table, + vector_expte_type_table, + vector_floor_type_table, + vector_load_type_table, + vector_lvebx_type_table, + vector_lvehx_type_table, + vector_lvewx_type_table, + vector_loadl_type_table, + vector_loge_type_table, + vector_madd_type_table, + vector_madds_type_table, + vector_vmaxub_type_table, + vector_vmaxsb_type_table, + vector_vmaxuh_type_table, + vector_vmaxsh_type_table, + vector_vmaxuw_type_table, + vector_vmaxsw_type_table, + vector_vmaxfp_type_table, + vector_vmrghb_type_table, + vector_vmrghh_type_table, + vector_vmrghw_type_table, + vector_vmrglb_type_table, + vector_vmrglh_type_table, + vector_vmrglw_type_table, + vector_vminub_type_table, + vector_vminsb_type_table, + vector_vminuh_type_table, + vector_vminsh_type_table, + vector_vminuw_type_table, + vector_vminsw_type_table, + vector_vminfp_type_table, + vector_mladd_type_table, + vector_mradds_type_table, + vector_vmsumubm_type_table, + vector_vmsumuhm_type_table, + vector_vmsummbm_type_table, + vector_vmsumshm_type_table, + vector_vmsumuhs_type_table, + vector_vmsumshs_type_table, + vector_vmuleub_type_table, + vector_vmulesb_type_table, + vector_vmuleuh_type_table, + vector_vmulesh_type_table, + vector_vmuloub_type_table, + vector_vmulosb_type_table, + vector_vmulouh_type_table, + vector_vmulosh_type_table, + vector_nmsub_type_table, + vector_nor_type_table, + vector_or_type_table, + vector_vpkuhum_type_table, + vector_vpkuwum_type_table, + vector_packpx_type_table, + vector_vpkuhus_type_table, + vector_vpkshss_type_table, + vector_vpkuwus_type_table, + vector_vpkswss_type_table, + vector_vpkshus_type_table, + vector_vpkswus_type_table, + vector_perm_type_table, + vector_re_type_table, + vector_vrlb_type_table, + vector_vrlh_type_table, + vector_vrlw_type_table, + vector_round_type_table, + vector_rsqrte_type_table, + vector_sel_type_table, + vector_vslb_type_table, + vector_vslh_type_table, + vector_vslw_type_table, + vector_sld_type_table, + vector_sll_type_table, + vector_slo_type_table, + vector_vspltb_type_table, + vector_vsplth_type_table, + vector_vspltw_type_table, + vector_vspltisb_type_table, + vector_vspltish_type_table, + vector_vspltisw_type_table, + vector_vsrb_type_table, + vector_vsrh_type_table, + vector_vsrw_type_table, + vector_vsrab_type_table, + vector_vsrah_type_table, + vector_vsraw_type_table, + vector_srl_type_table, + vector_sro_type_table, + vector_st_type_table, + vector_stvebx_type_table, + vector_stvehx_type_table, + vector_stvewx_type_table, + vector_stl_type_table, + vector_vsububm_type_table, + vector_vsubuhm_type_table, + vector_vsubuwm_type_table, + vector_vsubfp_type_table, + vector_subc_type_table, + vector_vsububs_type_table, + vector_vsubsbs_type_table, + vector_vsubuhs_type_table, + vector_vsubshs_type_table, + vector_vsubuws_type_table, + vector_vsubsws_type_table, + vector_vsum4ubs_type_table, + vector_vsum4sbs_type_table, + vector_vsum4shs_type_table, + vector_sum2s_type_table, + vector_sums_type_table, + vector_trunc_type_table, + vector_vupkhsb_type_table, + vector_vupklsb_type_table, + vector_vupkhpx_type_table, + vector_vupklpx_type_table, + vector_vupkhsh_type_table, + vector_vupklsh_type_table, + vector_xor_type_table, + vector_abs_type_table, + vector_abss_type_table, + NULL +}; + +int is_intrinsic_function_call(ENode *funccall) { + ENode *funcref = funccall->data.funccall.funcref; + return + ENODE_IS(funcref, EOBJREF) && + funcref->data.objref->datatype == DFUNC && + (TYPE_FUNC(funcref->data.objref->type)->flags & FUNC_INTRINSIC); +} + +static void abs_intrinsic(ENode *expr, short outputReg, Operand *output) { + int reg1; + int reg2; + Operand op; + + memclrw(&op, sizeof(op)); + GEN_NODE_TO_GPR(expr, &op, expr->rtype, 0); + + reg1 = ALLOC_GPR(); + emitpcode(PC_SRAWI, reg1, op.reg, 31); + + reg2 = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_XOR, reg2, reg1, op.reg); + emitpcode(PC_SUBF, reg2, reg1, reg2); + + output->optype = OpndType_GPR; + output->reg = reg2; +} + +static void setflm_intrinsic(ENode *expr, short outputReg, Operand *output, int flag) { + int reg; + Operand op; + + memclrw(&op, sizeof(op)); + GEN_NODE_TO_FPR(expr, &op, expr->rtype, 0); + + if (!flag) { + output->optype = OpndType_FPR; + reg = (outputReg && outputReg != op.reg) ? outputReg : ALLOC_FPR(); + emitpcode(PC_MFFS, output->reg = reg); + } + + emitpcode(PC_MTFSF, 255, op.reg); +} + +static void alloca_intrinsic(ENode *expr, short outputReg, Operand *output) { + int reg; + Operand op; + + memclrw(&op, sizeof(op)); + + if (ENODE_IS(expr, EINTCONST)) { + reg = outputReg ? outputReg : ALLOC_GPR(); + allocate_dynamic_stack_space( + 1, reg, ALLOC_GPR(), CInt64_GetULong(&expr->data.intval)); + } else { + GEN_NODE_TO_GPR(expr, &op, expr->rtype, 0); + reg = outputReg ? outputReg : ALLOC_GPR(); + + emitpcode(PC_NEG, reg, op.reg); + emitpcode(PC_RLWINM, reg, reg, 0, 0, get_alloca_alignment()); + allocate_dynamic_stack_space(0, reg, ALLOC_GPR(), 0); + } + + output->optype = OpndType_GPR; + output->reg = reg; +} + +static void load_bytereversed_intrinsic(Opcode opcode, ENode *expr1, ENode *expr2, short outputReg, Operand *output) { + int reg; + Operand op1; + Operand op2; + + reg = outputReg ? outputReg : ALLOC_GPR(); + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + + if (ENODE_IS(expr2, EINTCONST) && expr2->data.intval.lo == 0) { + GEN_NODE(expr1, &op1); + if (op1.optype == OpndType_GPR_Indexed) { + emitpcode(opcode, reg, op1.reg, op1.regOffset); + } else { + ENSURE_GPR(&op1, expr1->rtype, 0); + emitpcode(opcode, reg, 0, op1.reg); + } + } else { + GEN_NODE_TO_GPR(expr1, &op1, expr1->rtype, 0); + GEN_NODE_TO_GPR(expr2, &op2, expr2->rtype, 0); + emitpcode(opcode, reg, op1.reg, op2.reg); + } + + setpcodeflags(fSideEffects | ((expr1->flags | expr2->flags | output->flags) & (fIsConst | fIsVolatile))); + output->optype = OpndType_GPR; + output->reg = reg; +} + +static void store_bytereversed_intrinsic(Opcode opcode, ENode *expr1, ENode *expr2, ENode *expr3) { + Operand op1; + Operand op2; + Operand op3; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + memclrw(&op3, sizeof(op3)); + + if (ENODE_IS(expr3, EINTCONST) && expr3->data.intval.lo == 0) { + GEN_NODE_TO_GPR(expr1, &op1, expr1->rtype, 0); + GEN_NODE(expr2, &op2); + if (op2.optype == OpndType_GPR_Indexed) { + emitpcode(opcode, op1.reg, op2.reg, op2.regOffset); + } else { + ENSURE_GPR(&op2, expr2->rtype, 0); + emitpcode(opcode, op1.reg, 0, op2.reg); + } + } else { + GEN_NODE_TO_GPR(expr1, &op1, expr1->rtype, 0); + GEN_NODE_TO_GPR(expr2, &op2, expr2->rtype, 0); + GEN_NODE_TO_GPR(expr3, &op3, expr3->rtype, 0); + emitpcode(opcode, op1.reg, op2.reg, op3.reg); + } + + setpcodeflags(fSideEffects | ((op1.flags | op2.flags | op3.flags) & (fIsConst | fIsVolatile))); +} + +static void data_cache_block_intrinsic(Opcode opcode, ENode *expr1, ENode *expr2) { + Operand op1; + Operand op2; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + + if (ENODE_IS(expr2, EINTCONST) && expr2->data.intval.lo == 0) { + GEN_NODE(expr1, &op1); + if (op1.optype == OpndType_GPR_Indexed) { + emitpcode(opcode, op1.reg, op1.regOffset); + } else { + ENSURE_GPR(&op1, expr1->rtype, 0); + emitpcode(opcode, 0, op1.reg); + } + } else { + GEN_NODE_TO_GPR(expr1, &op1, expr1->rtype, 0); + GEN_NODE_TO_GPR(expr2, &op2, expr2->rtype, 0); + emitpcode(opcode, op1.reg, op2.reg); + } +} + +static void memcpy_intrinsic(ENode *destexpr, ENode *srcexpr, SInt32 size, Boolean ignored, Operand *output) { + UInt32 qual; + Operand destOp; + Operand srcOp; + + qual = 0; + + memclrw(&destOp, sizeof(destOp)); + memclrw(&srcOp, sizeof(srcOp)); + + GEN_NODE(srcexpr, &srcOp); + indirect(&srcOp, srcexpr); + + GEN_NODE(destexpr, output); + destOp = *output; + indirect(&destOp, destexpr); + + if (destOp.object) + qual = destOp.object->qual; + + move_block(&destOp, &srcOp, size, destexpr->rtype ? CMach_AllocationAlignment(destexpr->rtype, qual) : 1); +} + +static SInt32 checkconstintarg(ENode *expr, char *name, SInt32 min, SInt32 max, int argnum) { + SInt32 value; + + if (!ENODE_IS(expr, EINTCONST)) + PPCError_ErrorTerm(PPCErrorStr210, name, 3, max, argnum); + + value = CInt64_GetULong(&expr->data.intval); + + if (value < min) { + PPCError_Warning(PPCErrorStr211, name, argnum, value, min, max, min); + return min; + } + + if (value > max) { + PPCError_Warning(PPCErrorStr211, name, argnum, value, min, max, value & max); + value = value & max; + } + + return value; +} + +static void rlwimi_intrinsic(ENode *expr1, ENode *expr2, ENode *expr3, ENode *expr4, ENode *expr5, short outputReg, Operand *output) { + SInt32 arg3; + SInt32 arg4; + SInt32 arg5; + Operand op1; + Operand op2; + int reg; + char *name = "__rlwimi"; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + + arg3 = checkconstintarg(expr3, name, 0, 31, 3); + arg4 = checkconstintarg(expr4, name, 0, 31, 4); + arg5 = checkconstintarg(expr5, name, 0, 31, 5); + + GEN_NODE_TO_GPR(expr1, &op1, expr1->rtype, 0); + + GEN_NODE(expr2, &op2); + if (copts.optimizationlevel > 1) { + reg = ALLOC_GPR(); + emitpcode(PC_MR, reg, op1.reg); + op1.reg = reg; + } + ENSURE_GPR(&op2, expr2->rtype, 0); + + emitpcode(PC_RLWIMI, op1.reg, op2.reg, arg3, arg4, arg5); + + output->optype = OpndType_GPR; + output->reg = op1.reg; +} + +static void rlwinm_intrinsic(ENode *expr1, ENode *expr2, ENode *expr3, ENode *expr4, short outputReg, Operand *output) { + char *name = "__rlwinm"; + short reg; + short arg2; + short arg3; + short arg4; + Operand op1; + + memclrw(&op1, sizeof(op1)); + arg2 = checkconstintarg(expr2, name, 0, 31, 2); + arg3 = checkconstintarg(expr3, name, 0, 31, 3); + arg4 = checkconstintarg(expr4, name, 0, 31, 4); + + GEN_NODE_TO_GPR(expr1, &op1, expr1->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWINM, reg, op1.reg, arg2, arg3, arg4); + + output->optype = OpndType_GPR; + output->reg = reg; +} + +static void rlwnm_intrinsic(ENode *expr1, ENode *expr2, ENode *expr3, ENode *expr4, short outputReg, Operand *output) { + short reg; + short arg3; + short arg4; + char *name = "__rlwnm"; + Operand op1; + Operand op2; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + arg3 = checkconstintarg(expr3, name, 0, 31, 3); + arg4 = checkconstintarg(expr4, name, 0, 31, 4); + + GEN_NODE_TO_GPR(expr1, &op1, expr1->rtype, 0); + GEN_NODE_TO_GPR(expr2, &op2, expr2->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_GPR(); + emitpcode(PC_RLWNM, reg, op1.reg, op2.reg, arg3, arg4); + + output->optype = OpndType_GPR; + output->reg = reg; +} + +static Boolean promotable_types_are_equal(Type *a, Type *b) { + if (a == b) + return 1; + + if (a == TYPE(&stsignedint)) { + if ( + b == TYPE(&stunsignedint) || + b == TYPE(&stsignedchar) || + b == TYPE(&stunsignedchar) || + b == TYPE(&stsignedshort) || + b == TYPE(&stunsignedshort) || + b == TYPE(&stsignedlong) || + b == TYPE(&stunsignedlong) || + b == TYPE(&stbool) + ) + return 1; + } + + return 0; +} + +static int Intrinsics_VerifyParameterCount(int wantedCount, ENodeList *args, HashNameNode *name) { + ENodeList *scan; + int count; + + for (scan = args, count = 0; scan; scan = scan->next) + count++; + + if (count != wantedCount) { + PPCError_Error(PPCErrorStr103, name->name, count, wantedCount); + return 0; + } + + return 1; +} + +static ENode *Intrinsics_CreateIntrinsicFunc(Object *func, ENodeList *args, Type *rtype) { + TypeFunc *tfunc; + ENodeList *scan; + ENode *expr; + + tfunc = TYPE_FUNC(func->type); + CError_ASSERT(3741, IS_TYPE_FUNC(tfunc)); + + for (scan = args; scan; scan = scan->next) { + if (IS_TYPE_ARRAY(scan->node->rtype)) + scan->node = CExpr_PointerGeneration(scan->node); + } + + expr = lalloc(sizeof(ENode)); + expr->type = EFUNCCALL; + expr->cost = 4; + expr->rtype = rtype; + expr->ignored = 0; + expr->flags = tfunc->qual & ENODE_FLAG_QUALS; + expr->data.funccall.funcref = create_objectrefnode(func); + expr->data.funccall.args = args; + expr->data.funccall.functype = tfunc; + return CExpr_AdjustFunctionCall(expr); +} + +static Type *Intrinsics_Verify1VectorArg2Ops(Intrinsics id, ENodeList *args, HashNameNode *name) { + ENode *arg1; + TypeTable22 *table; + Type *typeA; + Type *typeB; + Type *rtype; + + arg1 = args->node; + + for (table = typeTable[id - Intrinsic_042]; table->rtype; table++) { + typeB = arg1->rtype; + typeA = table->arg1; + if (IS_TYPE_POINTER(typeA) && IS_TYPE_POINTER(typeB)) { + typeA = TPTR_TARGET(typeA); + typeB = TPTR_TARGET(typeB); + } + if (promotable_types_are_equal(typeA, typeB)) + break; + } + + rtype = table->rtype; + if (!rtype) { + PPCError_Error(PPCErrorStr104, name->name, name->name, arg1->rtype, 0); + rtype = NULL; + } + return rtype; +} + +static Type *Intrinsics_VerifyNoVectorArgs(Intrinsics id, HashNameNode *name) { + TypeTable11 *table = typeTable[id - Intrinsic_042]; + return table->rtype; +} + +static Type *Intrinsics_Verify1VectorArg(Intrinsics id, ENodeList *args, HashNameNode *name) { + ENode *arg1; + TypeTable21 *table; + Type *typeA; + Type *typeB; + Type *rtype; + + arg1 = args->node; + + for (table = typeTable[id - Intrinsic_042]; table->rtype; table++) { + typeB = arg1->rtype; + typeA = table->arg1; + if (IS_TYPE_POINTER(typeA) && IS_TYPE_POINTER(typeB)) { + typeA = TPTR_TARGET(typeA); + typeB = TPTR_TARGET(typeB); + } + if (promotable_types_are_equal(typeA, typeB)) + break; + } + + switch (id) { + case Intrinsic_104: + case Intrinsic_105: + case Intrinsic_106: + case Intrinsic_107: + case Intrinsic_108: + case Intrinsic_109: + case Intrinsic_269: + case Intrinsic_270: + case Intrinsic_271: + if (ENODE_IS(arg1, EINTCONST)) { + SInt32 val = arg1->data.intval.lo; + if (val > 15 || val < -16) { + PPCError_Error(PPCErrorStr108, name->name, name->name, 5); + return NULL; + } + } else { + PPCError_Error(PPCErrorStr108, name->name, name->name, 5); + return NULL; + } + break; + case Intrinsic_058: + if (ENODE_IS(arg1, EINTCONST)) { + SInt32 val = arg1->data.intval.lo; + if (val > 3 || val < 0) { + PPCError_Error(PPCErrorStr108, name->name, name->name, 2); + return NULL; + } + } else { + PPCError_Error(PPCErrorStr108, name->name, name->name, 2); + return NULL; + } + break; + } + + rtype = table->rtype; + if (!rtype) { + PPCError_Error(PPCErrorStr104, name->name, name->name, arg1->rtype, 0); + rtype = NULL; + } + return rtype; +} + +static Type *Intrinsics_Verify2VectorArgs(Intrinsics id, ENodeList *args, HashNameNode *name) { + ENode *arg1; + ENode *arg2; + TypeTable31 *table; + Type *typeB1; + Type *typeB2; + Type *typeA1; + Type *typeA2; + + arg1 = args->node; + arg2 = args->next->node; + table = typeTable[id - Intrinsic_042]; + + switch (id) { + case Intrinsic_055: + case Intrinsic_056: + case Intrinsic_057: + case Intrinsic_103: + case Intrinsic_190: + case Intrinsic_191: + case Intrinsic_192: + case Intrinsic_193: + case Intrinsic_266: + case Intrinsic_267: + case Intrinsic_268: + if (ENODE_IS(arg2, EINTCONST)) { + if (arg2->data.intval.lo > 31 || arg2->data.intval.hi < 0) { + PPCError_Error(PPCErrorStr108, name->name, name->name, 5); + return NULL; + } + } else { + PPCError_Error(PPCErrorStr108, name->name, name->name, 5); + return NULL; + } + break; + } + + for (; table->rtype; table++) { + typeB1 = arg1->rtype; + typeB2 = arg2->rtype; + typeA1 = table->arg1; + typeA2 = table->arg2; + if (IS_TYPE_POINTER(typeA1) && IS_TYPE_POINTER(typeB1)) { + typeA1 = TPTR_TARGET(typeA1); + typeB1 = TPTR_TARGET(typeB1); + } + if (IS_TYPE_POINTER(typeA2) && IS_TYPE_POINTER(typeB2)) { + typeA2 = TPTR_TARGET(typeA2); + typeB2 = TPTR_TARGET(typeB2); + } + if (promotable_types_are_equal(typeA1, typeB1) && + promotable_types_are_equal(typeA2, typeB2)) + break; + } + + if (!table->rtype) { + PPCError_Error(PPCErrorStr105, name->name, name->name, arg1->rtype, 0, arg2->rtype, 0); + return NULL; + } + + switch (id) { + case Intrinsic_066: + case Intrinsic_067: + case Intrinsic_068: + if (arg2->flags & ENODE_FLAG_VOLATILE) + PPCError_Warning(PPCErrorStr178, name->name); + } + + return table->rtype; +} + +static Type *Intrinsics_Verify3VectorArgs(Intrinsics id, ENodeList *args, HashNameNode *name) { + ENode *arg1; + ENode *arg2; + ENode *arg3; + TypeTable41 *table; + Type *typeB1; + Type *typeB2; + Type *typeB3; + Type *typeA1; + Type *typeA2; + Type *typeA3; + + arg1 = args->node; + arg2 = args->next->node; + arg3 = args->next->next->node; + + for (table = typeTable[id - Intrinsic_042]; table->rtype; table++) { + typeB1 = arg1->rtype; + typeB2 = arg2->rtype; + typeB3 = arg3->rtype; + typeA1 = table->arg1; + typeA2 = table->arg2; + typeA3 = table->arg3; + if (IS_TYPE_POINTER(typeA1) && IS_TYPE_POINTER(typeB1)) { + typeA1 = TPTR_TARGET(typeA1); + typeB1 = TPTR_TARGET(typeB1); + } + if (IS_TYPE_POINTER(typeA2) && IS_TYPE_POINTER(typeB2)) { + typeA2 = TPTR_TARGET(typeA2); + typeB2 = TPTR_TARGET(typeB2); + } + if (IS_TYPE_POINTER(typeA3) && IS_TYPE_POINTER(typeB3)) { + typeA3 = TPTR_TARGET(typeA3); + typeB3 = TPTR_TARGET(typeB3); + } + if (promotable_types_are_equal(typeA1, typeB1) && + promotable_types_are_equal(typeA2, typeB2) && + promotable_types_are_equal(typeA3, typeB3)) + break; + } + + switch (id) { + case Intrinsic_060: + case Intrinsic_061: + case Intrinsic_062: + case Intrinsic_063: + if (ENODE_IS(arg3, EINTCONST)) { + SInt32 val = arg3->data.intval.lo; + if (val > 3 || val < 0) { + PPCError_Error(PPCErrorStr108, name->name, name->name, 2); + return NULL; + } + } else { + PPCError_Error(PPCErrorStr108, name->name, name->name, 2); + return NULL; + } + break; + case Intrinsic_100: + case Intrinsic_263: + if (ENODE_IS(arg3, EINTCONST)) { + if (arg3->data.intval.lo > 15 || arg3->data.intval.hi < 0) { + PPCError_Error(PPCErrorStr108, name->name, name->name, 4); + return NULL; + } + } else { + PPCError_Error(PPCErrorStr108, name->name, name->name, 4); + return NULL; + } + break; + } + + if (!table->rtype) { + PPCError_Error(PPCErrorStr106, name->name, name->name, arg1->rtype, 0, arg2->rtype, 0, arg3->rtype, 0); + return NULL; + } + + switch (id) { + case Intrinsic_114: + case Intrinsic_115: + case Intrinsic_116: + if (arg3->flags & ENODE_FLAG_VOLATILE) + PPCError_Warning(PPCErrorStr178, name->name); + } + + return table->rtype; +} + +static Opcode Intrinsics_FindOpcodeNoArgs(Intrinsics id, ENode *funccall) { + TypeTable11 *table = typeTable[id - Intrinsic_042]; + return table->opcode; +} + +static Opcode Intrinsics_FindOpcode1Arg(Intrinsics id, ENode *funccall, ENode *arg1) { + TypeTable21 *table; + Type *typeA; + Type *typeB; + + for (table = typeTable[id - Intrinsic_042]; table->rtype; table++) { + typeB = arg1->rtype; + typeA = table->arg1; + if (IS_TYPE_POINTER(typeA) && IS_TYPE_POINTER(typeB)) { + typeA = TPTR_TARGET(typeA); + typeB = TPTR_TARGET(typeB); + } + if (promotable_types_are_equal(typeA, typeB)) + break; + } + + CError_ASSERT(4105, table->rtype); + return table->opcode; +} + +static Opcode Intrinsics_FindOpcode2Args(Intrinsics id, ENode *funccall, ENode *arg1, ENode *arg2) { + TypeTable31 *table; + Type *typeB1; + Type *typeB2; + Type *typeA1; + Type *typeA2; + + for (table = typeTable[id - Intrinsic_042]; table->rtype; table++) { + typeB1 = arg1->rtype; + typeB2 = arg2->rtype; + typeA1 = table->arg1; + typeA2 = table->arg2; + if (IS_TYPE_POINTER(typeA1) && IS_TYPE_POINTER(typeB1)) { + typeA1 = TPTR_TARGET(typeA1); + typeB1 = TPTR_TARGET(typeB1); + } + if (IS_TYPE_POINTER(typeA2) && IS_TYPE_POINTER(typeB2)) { + typeA2 = TPTR_TARGET(typeA2); + typeB2 = TPTR_TARGET(typeB2); + } + if (promotable_types_are_equal(typeA1, typeB1) && + promotable_types_are_equal(typeA2, typeB2)) + break; + } + + CError_ASSERT(4144, table->rtype); + return table->opcode; +} + +static Opcode Intrinsics_FindOpcode3Args(Intrinsics id, ENode *funccall, ENode *arg1, ENode *arg2, ENode *arg3) { + TypeTable41 *table; + Type *typeB1; + Type *typeB2; + Type *typeB3; + Type *typeA1; + Type *typeA2; + Type *typeA3; + + for (table = typeTable[id - Intrinsic_042]; table->rtype; table++) { + typeB1 = arg1->rtype; + typeB2 = arg2->rtype; + typeB3 = arg3->rtype; + typeA1 = table->arg1; + typeA2 = table->arg2; + typeA3 = table->arg3; + if (IS_TYPE_POINTER(typeA1) && IS_TYPE_POINTER(typeB1)) { + typeA1 = TPTR_TARGET(typeA1); + typeB1 = TPTR_TARGET(typeB1); + } + if (IS_TYPE_POINTER(typeA2) && IS_TYPE_POINTER(typeB2)) { + typeA2 = TPTR_TARGET(typeA2); + typeB2 = TPTR_TARGET(typeB2); + } + if (IS_TYPE_POINTER(typeA3) && IS_TYPE_POINTER(typeB3)) { + typeA3 = TPTR_TARGET(typeA3); + typeB3 = TPTR_TARGET(typeB3); + } + if (promotable_types_are_equal(typeA1, typeB1) && + promotable_types_are_equal(typeA2, typeB2) && + promotable_types_are_equal(typeA3, typeB3)) + break; + } + + CError_ASSERT(4191, table->rtype); + return table->opcode; +} + +static void vector_intrinsic_no_args(Opcode opcode) { + emitpcode(opcode); +} + +static void vector_intrinsic_mfvscr(short outputReg, Opcode opcode, Operand *output) { + short reg; + + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg); + + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_1arg(ENode *arg1, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + short reg; + + memclrw(&op1, sizeof(op1)); + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg, op1.reg); + + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_splats(ENode *arg1, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + short reg; + + memclrw(&op1, sizeof(op1)); + CError_ASSERT(4253, ENODE_IS(arg1, EINTCONST)); + GEN_NODE(arg1, &op1); + + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg, op1.immediate); + + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_splatu8(ENode *arg1, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + short reg; + + memclrw(&op1, sizeof(op1)); + CError_ASSERT(4277, ENODE_IS(arg1, EINTCONST)); + GEN_NODE(arg1, &op1); + + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg, op1.immediate); + + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_splatu16(ENode *arg1, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + short reg; + + memclrw(&op1, sizeof(op1)); + CError_ASSERT(4301, ENODE_IS(arg1, EINTCONST)); + GEN_NODE(arg1, &op1); + + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg, op1.immediate); + + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_splatu32(ENode *arg1, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + short reg; + + memclrw(&op1, sizeof(op1)); + CError_ASSERT(4325, ENODE_IS(arg1, EINTCONST)); + GEN_NODE(arg1, &op1); + + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg, op1.immediate); + + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_dss(ENode *arg1) { + Operand op1; + + memclrw(&op1, sizeof(op1)); + CError_ASSERT(4348, ENODE_IS(arg1, EINTCONST)); + GEN_NODE(arg1, &op1); + + emitpcode(PC_DSS, op1.immediate, 0); +} + +static void vector_intrinsic_2args(ENode *arg1, ENode *arg2, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + Operand op2; + short reg; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + GEN_NODE_TO_VR(arg2, &op2, arg2->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg, op1.reg, op2.reg); + + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_2args1const(ENode *arg1, ENode *arg2, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + Operand op2; + short reg; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + CError_ASSERT(4393, ENODE_IS(arg2, EINTCONST)); + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + GEN_NODE(arg2, &op2); + + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg, op1.reg, op2.immediate); + + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_3args(ENode *arg1, ENode *arg2, ENode *arg3, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + Operand op2; + Operand op3; + short reg; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + memclrw(&op3, sizeof(op3)); + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + GEN_NODE_TO_VR(arg2, &op2, arg2->rtype, 0); + GEN_NODE_TO_VR(arg3, &op3, arg3->rtype, 0); + + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg, op1.reg, op2.reg, op3.reg); + + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_datastream(ENode *arg1, ENode *arg2, ENode *arg3, Opcode opcode) { + Operand op1; + Operand op2; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + CError_ASSERT(4445, ENODE_IS(arg3, EINTCONST)); + GEN_NODE_TO_GPR(arg1, &op1, arg1->rtype, 0); + GEN_NODE_TO_GPR(arg2, &op2, arg2->rtype, 0); + + switch (opcode) { + case PC_DST: + case PC_DSTST: + emitpcode(opcode, op1.reg, op2.reg, arg3->data.intval.lo, 0); + break; + case PC_DSTT: + case PC_DSTSTT: + emitpcode(opcode, op1.reg, op2.reg, arg3->data.intval.lo); + break; + default: + CError_FATAL(4463); + } +} + +static void vector_intrinsic_sld(ENode *arg1, ENode *arg2, ENode *arg3, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + Operand op2; + short reg; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + CError_ASSERT(4479, ENODE_IS(arg3, EINTCONST)); + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + GEN_NODE_TO_VR(arg2, &op2, arg2->rtype, 0); + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg, op1.reg, op2.reg, arg3->data.intval.lo); + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_load(ENode *arg1, ENode *arg2, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + Operand op2; + short reg; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + + GEN_NODE(arg1, &op1); + if (op1.optype == OpndType_Absolute && op1.immediate == 0) { + op1.optype = OpndType_GPR; + op1.reg = 0; + } else { + ENSURE_GPR(&op1, arg1->rtype, 0); + } + + GEN_NODE(arg2, &op2); + if (op2.optype == OpndType_Absolute && op2.immediate == 0 && op1.reg != 0) { + op2 = op1; + op1.optype = OpndType_GPR; + op1.reg = 0; + } else { + ENSURE_GPR(&op2, arg2->rtype, 0); + } + + reg = outputReg ? outputReg : ALLOC_VR(); + + emitpcode(opcode, reg, op1.reg, op2.reg); + output->optype = OpndType_VR; + output->reg = reg; +} + +static void vector_intrinsic_store(ENode *arg1, ENode *arg2, ENode *arg3, short outputReg, Operand *output, Opcode opcode) { + Operand op1; + Operand op2; + Operand op3; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + memclrw(&op3, sizeof(op3)); + + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + + GEN_NODE(arg2, &op2); + if (op2.optype == OpndType_Absolute && op2.immediate == 0) { + op2.optype = OpndType_GPR; + op2.reg = 0; + } else { + ENSURE_GPR(&op2, arg2->rtype, 0); + } + + GEN_NODE(arg3, &op3); + if (op3.optype == OpndType_Absolute && op3.immediate == 0 && op2.reg != 0) { + op3 = op2; + op2.optype = OpndType_GPR; + op2.reg = 0; + } else { + ENSURE_GPR(&op3, arg3->rtype, 0); + } + + emitpcode(opcode, op1.reg, op2.reg, op3.reg); + output->optype = OpndType_VR; + output->reg = op1.reg; +} + +static void vector_intrinsic_abs(Intrinsics id, ENode *funccall, ENode *arg1, short outputReg, Operand *output) { + TypeTable22 *table; + Type *typeA; + Type *typeB; + short reg1; + short reg2; + short reg3; + Operand op1; + + for (table = typeTable[id - Intrinsic_042]; table->rtype; table++) { + typeB = arg1->rtype; + typeA = table->arg1; + if (IS_TYPE_POINTER(typeA) && IS_TYPE_POINTER(typeB)) { + typeA = TPTR_TARGET(typeA); + typeB = TPTR_TARGET(typeB); + } + if (promotable_types_are_equal(typeA, typeB)) + break; + } + + CError_ASSERT(4617, table->rtype); + + reg1 = ALLOC_VR(); + reg2 = ALLOC_VR(); + reg3 = outputReg ? outputReg : ALLOC_VR(); + + memclrw(&op1, sizeof(op1)); + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + + if (arg1->rtype == TYPE(&stvectorfloat)) { + emitpcode(PC_VSPLTISW, reg1, -1); + emitpcode(PC_VSLW, reg2, reg1, reg1); + emitpcode(table->opcode1, reg3, op1.reg, reg2); + } else { + emitpcode(PC_VSPLTISB, reg1, 0); + emitpcode(table->opcode1, reg2, reg1, op1.reg); + emitpcode(table->opcode2, reg3, op1.reg, reg2); + } + + output->optype = OpndType_VR; + output->reg = reg3; +} + +static void vector_intrinsic_abss(Intrinsics id, ENode *funccall, ENode *arg1, short outputReg, Operand *output) { + TypeTable22 *table; + Type *typeA; + Type *typeB; + short reg1; + short reg2; + short reg3; + Operand op1; + + for (table = typeTable[id - Intrinsic_042]; table->rtype; table++) { + typeB = arg1->rtype; + typeA = table->arg1; + if (IS_TYPE_POINTER(typeA) && IS_TYPE_POINTER(typeB)) { + typeA = TPTR_TARGET(typeA); + typeB = TPTR_TARGET(typeB); + } + if (promotable_types_are_equal(typeA, typeB)) + break; + } + + CError_ASSERT(4683, table->rtype); + + reg1 = ALLOC_VR(); + reg2 = ALLOC_VR(); + reg3 = outputReg ? outputReg : ALLOC_VR(); + + memclrw(&op1, sizeof(op1)); + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + + emitpcode(PC_VSPLTISB, reg1, 0); + emitpcode(table->opcode1, reg2, reg1, op1.reg); + emitpcode(table->opcode2, reg3, op1.reg, reg2); + + output->optype = OpndType_VR; + output->reg = reg3; +} + +static void vector_intrinsic_mtvscr(ENode *arg1, Operand *output, Opcode opcode) { + Operand op1; + + memclrw(&op1, sizeof(op1)); + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + emitpcode(opcode, op1.reg); +} + +static void vector_predicate_2args(ENode *arg1, ENode *arg2, short outputReg, Operand *output, Opcode opcode, Intrinsics id) { + Operand op1; + Operand op2; + short reg1; + short reg2; + short reg3; + Opcode cond; + + memclrw(&op1, sizeof(op1)); + memclrw(&op2, sizeof(op2)); + + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + GEN_NODE_TO_VR(arg2, &op2, arg2->rtype, 0); + + reg1 = ALLOC_VR(); + reg2 = op1.reg; + reg3 = op2.reg; + if ( + ((id == Intrinsic_132 || id == Intrinsic_145) && arg1->rtype != TYPE(&stvectorfloat)) || + ((id == Intrinsic_135 || id == Intrinsic_147) && arg1->rtype == TYPE(&stvectorfloat)) || + id == Intrinsic_136 || + id == Intrinsic_148 || + id == Intrinsic_153 || + id == Intrinsic_141 || + id == Intrinsic_142 || + id == Intrinsic_154 + ) + { + reg3 = op1.reg; + reg2 = op2.reg; + } + emitpcode(opcode, reg1, reg2, reg3); + pcsetrecordbit(pclastblock->lastPCode); + + if (arg1->rtype == TYPE(&stvectorfloat)) { + switch (id) { + case Intrinsic_131: + case Intrinsic_132: + case Intrinsic_133: + case Intrinsic_135: + case Intrinsic_136: + cond = ELESS; + break; + case Intrinsic_150: + case Intrinsic_151: + case Intrinsic_152: + case Intrinsic_153: + case Intrinsic_154: + cond = EGREATEREQU; + break; + case Intrinsic_134: + case Intrinsic_138: + case Intrinsic_139: + case Intrinsic_140: + case Intrinsic_141: + case Intrinsic_142: + cond = EEQU; + break; + case Intrinsic_144: + case Intrinsic_145: + case Intrinsic_146: + case Intrinsic_147: + case Intrinsic_148: + case Intrinsic_156: + cond = ENOTEQU; + break; + default: + CError_FATAL(4805); + } + } else { + switch (id) { + case Intrinsic_131: + case Intrinsic_133: + case Intrinsic_136: + cond = ELESS; + break; + case Intrinsic_144: + case Intrinsic_146: + case Intrinsic_148: + cond = ENOTEQU; + break; + case Intrinsic_132: + case Intrinsic_135: + case Intrinsic_138: + cond = EEQU; + break; + case Intrinsic_145: + case Intrinsic_147: + case Intrinsic_150: + cond = EGREATEREQU; + break; + default: + CError_FATAL(4834); + } + } + + output->optype = OpndType_CRField; + output->reg = 6; + output->regOffset = cond; +} + +static void vector_predicate_1arg(ENode *arg1, short outputReg, Operand *output, Opcode opcode, Intrinsics id) { + Operand op1; + short reg; + Opcode cond; + + memclrw(&op1, sizeof(op1)); + + GEN_NODE_TO_VR(arg1, &op1, arg1->rtype, 0); + + reg = ALLOC_VR(); + emitpcode(opcode, reg, op1.reg, op1.reg); + pcsetrecordbit(pclastblock->lastPCode); + + switch (id) { + case Intrinsic_143: + cond = ELESS; + break; + case Intrinsic_149: + cond = EGREATEREQU; + break; + case Intrinsic_137: + cond = EEQU; + break; + case Intrinsic_155: + cond = ENOTEQU; + break; + default: + CError_FATAL(4878); + } + + output->optype = OpndType_CRField; + output->reg = 6; + output->regOffset = cond; +} + +ENode *Intrinsics_HandleIntrinsicCall(Object *func, ENodeList *args) { + ENode *callexpr; + Type *rtype; + Intrinsics id; + + callexpr = NULL; + + if (copts.altivec_model) { + id = func->u.func.u.intrinsicid; + switch (id) { + case Intrinsic_060: + case Intrinsic_061: + case Intrinsic_062: + case Intrinsic_063: + case Intrinsic_072: + case Intrinsic_073: + case Intrinsic_079: + case Intrinsic_080: + case Intrinsic_081: + case Intrinsic_082: + case Intrinsic_086: + case Intrinsic_093: + case Intrinsic_098: + case Intrinsic_100: + case Intrinsic_114: + case Intrinsic_115: + case Intrinsic_116: + case Intrinsic_202: + case Intrinsic_203: + case Intrinsic_224: + case Intrinsic_225: + case Intrinsic_226: + case Intrinsic_227: + case Intrinsic_228: + case Intrinsic_229: + case Intrinsic_230: + case Intrinsic_231: + case Intrinsic_240: + case Intrinsic_252: + case Intrinsic_259: + case Intrinsic_263: + case Intrinsic_280: + case Intrinsic_281: + case Intrinsic_282: + case Intrinsic_283: + case Intrinsic_284: + if (Intrinsics_VerifyParameterCount(3, args, func->name)) { + if ((rtype = Intrinsics_Verify3VectorArgs(id, args, func->name))) + callexpr = Intrinsics_CreateIntrinsicFunc(func, args, rtype); + } + break; + case Intrinsic_042: + case Intrinsic_043: + case Intrinsic_044: + case Intrinsic_045: + case Intrinsic_046: + case Intrinsic_047: + case Intrinsic_049: + case Intrinsic_050: + case Intrinsic_051: + case Intrinsic_052: + case Intrinsic_053: + case Intrinsic_054: + case Intrinsic_055: + case Intrinsic_056: + case Intrinsic_057: + case Intrinsic_066: + case Intrinsic_067: + case Intrinsic_068: + case Intrinsic_070: + case Intrinsic_071: + case Intrinsic_074: + case Intrinsic_075: + case Intrinsic_076: + case Intrinsic_078: + case Intrinsic_084: + case Intrinsic_085: + case Intrinsic_087: + case Intrinsic_088: + case Intrinsic_089: + case Intrinsic_090: + case Intrinsic_091: + case Intrinsic_092: + case Intrinsic_095: + case Intrinsic_099: + case Intrinsic_101: + case Intrinsic_102: + case Intrinsic_103: + case Intrinsic_110: + case Intrinsic_111: + case Intrinsic_112: + case Intrinsic_113: + case Intrinsic_117: + case Intrinsic_118: + case Intrinsic_119: + case Intrinsic_120: + case Intrinsic_121: + case Intrinsic_122: + case Intrinsic_124: + case Intrinsic_125: + case Intrinsic_126: + case Intrinsic_127: + case Intrinsic_130: + case Intrinsic_131: + case Intrinsic_132: + case Intrinsic_133: + case Intrinsic_134: + case Intrinsic_135: + case Intrinsic_136: + case Intrinsic_138: + case Intrinsic_139: + case Intrinsic_140: + case Intrinsic_141: + case Intrinsic_142: + case Intrinsic_144: + case Intrinsic_145: + case Intrinsic_146: + case Intrinsic_147: + case Intrinsic_148: + case Intrinsic_150: + case Intrinsic_151: + case Intrinsic_152: + case Intrinsic_153: + case Intrinsic_154: + case Intrinsic_156: + case Intrinsic_157: + case Intrinsic_158: + case Intrinsic_159: + case Intrinsic_160: + case Intrinsic_161: + case Intrinsic_162: + case Intrinsic_163: + case Intrinsic_164: + case Intrinsic_165: + case Intrinsic_166: + case Intrinsic_167: + case Intrinsic_168: + case Intrinsic_169: + case Intrinsic_170: + case Intrinsic_171: + case Intrinsic_172: + case Intrinsic_173: + case Intrinsic_174: + case Intrinsic_175: + case Intrinsic_177: + case Intrinsic_178: + case Intrinsic_179: + case Intrinsic_180: + case Intrinsic_181: + case Intrinsic_182: + case Intrinsic_183: + case Intrinsic_184: + case Intrinsic_185: + case Intrinsic_186: + case Intrinsic_187: + case Intrinsic_188: + case Intrinsic_189: + case Intrinsic_190: + case Intrinsic_191: + case Intrinsic_192: + case Intrinsic_193: + case Intrinsic_196: + case Intrinsic_197: + case Intrinsic_198: + case Intrinsic_199: + case Intrinsic_200: + case Intrinsic_204: + case Intrinsic_205: + case Intrinsic_206: + case Intrinsic_207: + case Intrinsic_208: + case Intrinsic_209: + case Intrinsic_210: + case Intrinsic_211: + case Intrinsic_212: + case Intrinsic_213: + case Intrinsic_214: + case Intrinsic_215: + case Intrinsic_216: + case Intrinsic_217: + case Intrinsic_218: + case Intrinsic_219: + case Intrinsic_220: + case Intrinsic_221: + case Intrinsic_222: + case Intrinsic_223: + case Intrinsic_232: + case Intrinsic_233: + case Intrinsic_234: + case Intrinsic_235: + case Intrinsic_236: + case Intrinsic_237: + case Intrinsic_238: + case Intrinsic_239: + case Intrinsic_241: + case Intrinsic_242: + case Intrinsic_243: + case Intrinsic_244: + case Intrinsic_245: + case Intrinsic_246: + case Intrinsic_247: + case Intrinsic_248: + case Intrinsic_249: + case Intrinsic_250: + case Intrinsic_251: + case Intrinsic_254: + case Intrinsic_255: + case Intrinsic_256: + case Intrinsic_260: + case Intrinsic_261: + case Intrinsic_262: + case Intrinsic_264: + case Intrinsic_265: + case Intrinsic_266: + case Intrinsic_267: + case Intrinsic_268: + case Intrinsic_272: + case Intrinsic_273: + case Intrinsic_274: + case Intrinsic_275: + case Intrinsic_276: + case Intrinsic_277: + case Intrinsic_278: + case Intrinsic_279: + case Intrinsic_285: + case Intrinsic_286: + case Intrinsic_287: + case Intrinsic_288: + case Intrinsic_289: + case Intrinsic_290: + case Intrinsic_291: + case Intrinsic_292: + case Intrinsic_293: + case Intrinsic_294: + case Intrinsic_295: + case Intrinsic_296: + case Intrinsic_297: + case Intrinsic_298: + case Intrinsic_299: + case Intrinsic_300: + case Intrinsic_308: + if (Intrinsics_VerifyParameterCount(2, args, func->name)) { + if ((rtype = Intrinsics_Verify2VectorArgs(id, args, func->name))) + callexpr = Intrinsics_CreateIntrinsicFunc(func, args, rtype); + } + break; + case Intrinsic_048: + case Intrinsic_058: + case Intrinsic_064: + case Intrinsic_065: + case Intrinsic_069: + case Intrinsic_083: + case Intrinsic_094: + case Intrinsic_096: + case Intrinsic_097: + case Intrinsic_104: + case Intrinsic_105: + case Intrinsic_106: + case Intrinsic_107: + case Intrinsic_108: + case Intrinsic_109: + case Intrinsic_123: + case Intrinsic_128: + case Intrinsic_129: + case Intrinsic_137: + case Intrinsic_143: + case Intrinsic_149: + case Intrinsic_155: + case Intrinsic_176: + case Intrinsic_194: + case Intrinsic_195: + case Intrinsic_201: + case Intrinsic_253: + case Intrinsic_257: + case Intrinsic_258: + case Intrinsic_269: + case Intrinsic_270: + case Intrinsic_271: + case Intrinsic_301: + case Intrinsic_302: + case Intrinsic_303: + case Intrinsic_304: + case Intrinsic_305: + case Intrinsic_306: + case Intrinsic_307: + if (Intrinsics_VerifyParameterCount(1, args, func->name)) { + if ((rtype = Intrinsics_Verify1VectorArg(id, args, func->name))) + callexpr = Intrinsics_CreateIntrinsicFunc(func, args, rtype); + } + break; + case Intrinsic_059: + case Intrinsic_077: + if (Intrinsics_VerifyParameterCount(0, args, func->name)) { + if ((rtype = Intrinsics_VerifyNoVectorArgs(id, func->name))) + callexpr = Intrinsics_CreateIntrinsicFunc(func, args, rtype); + } + break; + case Intrinsic_309: + case Intrinsic_310: + if (Intrinsics_VerifyParameterCount(1, args, func->name)) { + if ((rtype = Intrinsics_Verify1VectorArg2Ops(id, args, func->name))) + callexpr = Intrinsics_CreateIntrinsicFunc(func, args, rtype); + } + break; + } + } + + return callexpr; +} + +void call_intrinsic_function(ENode *funccall, short outputReg, Operand *output) { + ENodeList *args; + Object *object; + Intrinsics id; + Opcode op; + short reg; + + static Opcode opcode[MaxIntrinsics] = { + /* Intrinsic_000 */ PC_EIEIO, + /* Intrinsic_001 */ PC_SYNC, + /* Intrinsic_002 */ PC_ISYNC, + 0, + 0, + /* Intrinsic_005 */ PC_FABS, + /* Intrinsic_006 */ PC_FNABS, + 0, + 0, + /* Intrinsic_009 */ PC_CNTLZW, + /* Intrinsic_010 */ PC_LHBRX, + /* Intrinsic_011 */ PC_LWBRX, + /* Intrinsic_012 */ PC_STHBRX, + /* Intrinsic_013 */ PC_STWBRX, + /* Intrinsic_014 */ PC_DCBF, + /* Intrinsic_015 */ PC_DCBT, + /* Intrinsic_016 */ PC_DCBST, + /* Intrinsic_017 */ PC_DCBTST, + /* Intrinsic_018 */ PC_DCBZ, + /* Intrinsic_019 */ PC_MULHW, + /* Intrinsic_020 */ PC_MULHWU, + /* Intrinsic_021 */ PC_DIVW, + /* Intrinsic_022 */ PC_DIVWU, + /* Intrinsic_023 */ PC_FMADD, + /* Intrinsic_024 */ PC_FMSUB, + /* Intrinsic_025 */ PC_FNMADD, + /* Intrinsic_026 */ PC_FNMSUB, + /* Intrinsic_027 */ PC_FMADDS, + /* Intrinsic_028 */ PC_FMSUBS, + /* Intrinsic_029 */ PC_FNMADDS, + /* Intrinsic_030 */ PC_FNMSUBS, + /* Intrinsic_031 */ PC_MFFS, + /* Intrinsic_032 */ PC_FRES, + /* Intrinsic_033 */ PC_FRSQRTE, + /* Intrinsic_004 */ PC_FSEL, + 0, + 0, + /* Intrinsic_037 */ PC_RLWIMI, + /* Intrinsic_038 */ PC_RLWINM, + /* Intrinsic_039 */ PC_RLWNM, + /* Intrinsic_040 */ PC_FABS, + /* Intrinsic_041 */ PC_FNABS + }; + + args = funccall->data.funccall.args; + object = funccall->data.funccall.funcref->data.objref; + id = object->u.func.u.intrinsicid; + cur_intrinsic_object = object; + + switch (id) { + case Intrinsic_000: + case Intrinsic_001: + case Intrinsic_002: + appendpcode(pclastblock, makepcode(opcode[id])); + output->optype = OpndType_Absolute; + break; + case Intrinsic_003: + case Intrinsic_004: + abs_intrinsic(args->node, outputReg, output); + break; + case Intrinsic_005: + case Intrinsic_006: + case Intrinsic_032: + case Intrinsic_033: + case Intrinsic_040: + case Intrinsic_041: + fp_unary_operator(opcode[id], args->node, outputReg, output); + break; + case Intrinsic_007: + setflm_intrinsic(args->node, outputReg, output, funccall->ignored); + break; + case Intrinsic_008: + alloca_intrinsic(args->node, outputReg, output); + break; + case Intrinsic_009: + unary_operator(PC_CNTLZW, args->node, outputReg, output); + break; + case Intrinsic_010: + case Intrinsic_011: + load_bytereversed_intrinsic(opcode[id], args->node, args->next->node, outputReg, output); + break; + case Intrinsic_012: + case Intrinsic_013: + store_bytereversed_intrinsic(opcode[id], args->node, args->next->node, args->next->next->node); + output->optype = OpndType_Absolute; + break; + case Intrinsic_014: + case Intrinsic_015: + case Intrinsic_016: + case Intrinsic_017: + case Intrinsic_018: + data_cache_block_intrinsic(opcode[id], args->node, args->next->node); + output->optype = OpndType_Absolute; + break; + case Intrinsic_019: + case Intrinsic_020: + case Intrinsic_021: + case Intrinsic_022: + binary_operator(opcode[id], args->node, args->next->node, outputReg, output); + break; + case Intrinsic_023: + case Intrinsic_024: + case Intrinsic_025: + case Intrinsic_026: + case Intrinsic_027: + case Intrinsic_028: + case Intrinsic_029: + case Intrinsic_030: + case Intrinsic_034: + fp_multiply_add(opcode[id], + args->node, args->next->node, args->next->next->node, + outputReg, output); + break; + case Intrinsic_031: + reg = outputReg ? outputReg : ALLOC_FPR(); + emitpcode(PC_MFFS, output->reg = reg); + output->optype = OpndType_FPR; + break; + case Intrinsic_035: + call_function(funccall, output); + break; + case Intrinsic_036: + if (ENODE_IS(args->next->next->node, EINTCONST)) + memcpy_intrinsic( + args->node, args->next->node, args->next->next->node->data.intval.lo, + funccall->ignored, output); + else + call_function(funccall, output); + break; + case Intrinsic_037: + rlwimi_intrinsic( + args->node, + args->next->node, + args->next->next->node, + args->next->next->next->node, + args->next->next->next->next->node, + outputReg, output); + break; + case Intrinsic_038: + rlwinm_intrinsic( + args->node, + args->next->node, + args->next->next->node, + args->next->next->next->node, + outputReg, output); + break; + case Intrinsic_039: + rlwnm_intrinsic( + args->node, + args->next->node, + args->next->next->node, + args->next->next->next->node, + outputReg, output); + break; + case Intrinsic_072: + case Intrinsic_073: + case Intrinsic_079: + case Intrinsic_080: + case Intrinsic_081: + case Intrinsic_082: + case Intrinsic_086: + case Intrinsic_093: + case Intrinsic_098: + case Intrinsic_202: + case Intrinsic_203: + case Intrinsic_224: + case Intrinsic_225: + case Intrinsic_226: + case Intrinsic_227: + case Intrinsic_228: + case Intrinsic_229: + case Intrinsic_230: + case Intrinsic_231: + case Intrinsic_240: + case Intrinsic_252: + case Intrinsic_259: + op = Intrinsics_FindOpcode3Args(id, funccall, args->node, args->next->node, args->next->next->node); + vector_intrinsic_3args(args->node, args->next->node, args->next->next->node, outputReg, output, op); + break; + case Intrinsic_100: + case Intrinsic_263: + op = Intrinsics_FindOpcode3Args(id, funccall, args->node, args->next->node, args->next->next->node); + vector_intrinsic_sld(args->node, args->next->node, args->next->next->node, outputReg, output, op); + break; + case Intrinsic_042: + case Intrinsic_043: + case Intrinsic_044: + case Intrinsic_045: + case Intrinsic_046: + case Intrinsic_047: + case Intrinsic_049: + case Intrinsic_050: + case Intrinsic_051: + case Intrinsic_053: + case Intrinsic_074: + case Intrinsic_075: + case Intrinsic_076: + case Intrinsic_078: + case Intrinsic_084: + case Intrinsic_085: + case Intrinsic_087: + case Intrinsic_088: + case Intrinsic_089: + case Intrinsic_090: + case Intrinsic_091: + case Intrinsic_092: + case Intrinsic_095: + case Intrinsic_099: + case Intrinsic_101: + case Intrinsic_102: + case Intrinsic_110: + case Intrinsic_111: + case Intrinsic_112: + case Intrinsic_113: + case Intrinsic_117: + case Intrinsic_118: + case Intrinsic_119: + case Intrinsic_120: + case Intrinsic_121: + case Intrinsic_122: + case Intrinsic_124: + case Intrinsic_125: + case Intrinsic_126: + case Intrinsic_127: + case Intrinsic_130: + case Intrinsic_157: + case Intrinsic_158: + case Intrinsic_159: + case Intrinsic_160: + case Intrinsic_161: + case Intrinsic_162: + case Intrinsic_163: + case Intrinsic_164: + case Intrinsic_165: + case Intrinsic_166: + case Intrinsic_167: + case Intrinsic_168: + case Intrinsic_169: + case Intrinsic_170: + case Intrinsic_171: + case Intrinsic_172: + case Intrinsic_173: + case Intrinsic_174: + case Intrinsic_175: + case Intrinsic_177: + case Intrinsic_178: + case Intrinsic_179: + case Intrinsic_180: + case Intrinsic_181: + case Intrinsic_182: + case Intrinsic_183: + case Intrinsic_184: + case Intrinsic_185: + case Intrinsic_186: + case Intrinsic_187: + case Intrinsic_188: + case Intrinsic_189: + case Intrinsic_204: + case Intrinsic_205: + case Intrinsic_206: + case Intrinsic_207: + case Intrinsic_208: + case Intrinsic_209: + case Intrinsic_210: + case Intrinsic_211: + case Intrinsic_212: + case Intrinsic_213: + case Intrinsic_214: + case Intrinsic_215: + case Intrinsic_216: + case Intrinsic_217: + case Intrinsic_218: + case Intrinsic_219: + case Intrinsic_220: + case Intrinsic_221: + case Intrinsic_222: + case Intrinsic_223: + case Intrinsic_232: + case Intrinsic_233: + case Intrinsic_234: + case Intrinsic_235: + case Intrinsic_236: + case Intrinsic_237: + case Intrinsic_238: + case Intrinsic_239: + case Intrinsic_241: + case Intrinsic_242: + case Intrinsic_243: + case Intrinsic_244: + case Intrinsic_245: + case Intrinsic_246: + case Intrinsic_247: + case Intrinsic_248: + case Intrinsic_249: + case Intrinsic_250: + case Intrinsic_251: + case Intrinsic_254: + case Intrinsic_255: + case Intrinsic_256: + case Intrinsic_260: + case Intrinsic_261: + case Intrinsic_262: + case Intrinsic_264: + case Intrinsic_265: + case Intrinsic_272: + case Intrinsic_273: + case Intrinsic_274: + case Intrinsic_275: + case Intrinsic_276: + case Intrinsic_277: + case Intrinsic_278: + case Intrinsic_279: + case Intrinsic_285: + case Intrinsic_286: + case Intrinsic_287: + case Intrinsic_288: + case Intrinsic_289: + case Intrinsic_290: + case Intrinsic_291: + case Intrinsic_292: + case Intrinsic_293: + case Intrinsic_294: + case Intrinsic_295: + case Intrinsic_296: + case Intrinsic_297: + case Intrinsic_298: + case Intrinsic_299: + case Intrinsic_300: + case Intrinsic_308: + op = Intrinsics_FindOpcode2Args(id, funccall, args->node, args->next->node); + vector_intrinsic_2args(args->node, args->next->node, outputReg, output, op); + break; + case Intrinsic_048: + case Intrinsic_064: + case Intrinsic_065: + case Intrinsic_069: + case Intrinsic_094: + case Intrinsic_096: + case Intrinsic_097: + case Intrinsic_123: + case Intrinsic_128: + case Intrinsic_129: + case Intrinsic_176: + case Intrinsic_194: + case Intrinsic_195: + case Intrinsic_201: + case Intrinsic_253: + case Intrinsic_257: + case Intrinsic_258: + case Intrinsic_301: + case Intrinsic_302: + case Intrinsic_303: + case Intrinsic_304: + case Intrinsic_305: + case Intrinsic_306: + case Intrinsic_307: + op = Intrinsics_FindOpcode1Arg(id, funccall, args->node); + vector_intrinsic_1arg(args->node, outputReg, output, op); + break; + case Intrinsic_055: + case Intrinsic_056: + case Intrinsic_057: + case Intrinsic_103: + case Intrinsic_190: + case Intrinsic_191: + case Intrinsic_192: + case Intrinsic_193: + case Intrinsic_266: + case Intrinsic_267: + case Intrinsic_268: + op = Intrinsics_FindOpcode2Args(id, funccall, args->node, args->next->node); + vector_intrinsic_2args1const(args->node, args->next->node, outputReg, output, op); + break; + case Intrinsic_104: + case Intrinsic_105: + case Intrinsic_106: + case Intrinsic_269: + case Intrinsic_270: + case Intrinsic_271: + op = Intrinsics_FindOpcode1Arg(id, funccall, args->node); + vector_intrinsic_splats(args->node, outputReg, output, op); + break; + case Intrinsic_107: + op = Intrinsics_FindOpcode1Arg(id, funccall, args->node); + vector_intrinsic_splatu8(args->node, outputReg, output, op); + break; + case Intrinsic_108: + op = Intrinsics_FindOpcode1Arg(id, funccall, args->node); + vector_intrinsic_splatu16(args->node, outputReg, output, op); + break; + case Intrinsic_109: + op = Intrinsics_FindOpcode1Arg(id, funccall, args->node); + vector_intrinsic_splatu32(args->node, outputReg, output, op); + break; + case Intrinsic_083: + vector_intrinsic_mtvscr(args->node, output, PC_MTVSCR); + break; + case Intrinsic_077: + op = Intrinsics_FindOpcodeNoArgs(id, funccall); + vector_intrinsic_mfvscr(outputReg, op, output); + break; + case Intrinsic_058: + vector_intrinsic_dss(args->node); + break; + case Intrinsic_059: + op = Intrinsics_FindOpcodeNoArgs(id, funccall); + vector_intrinsic_no_args(op); + break; + case Intrinsic_060: + case Intrinsic_061: + case Intrinsic_062: + case Intrinsic_063: + switch (id) { + case Intrinsic_060: + op = PC_DST; + break; + case Intrinsic_061: + op = PC_DSTST; + break; + case Intrinsic_062: + op = PC_DSTSTT; + break; + case Intrinsic_063: + op = PC_DSTT; + break; + } + vector_intrinsic_datastream(args->node, args->next->node, args->next->next->node, op); + break; + case Intrinsic_066: + case Intrinsic_067: + case Intrinsic_068: + case Intrinsic_070: + case Intrinsic_071: + case Intrinsic_196: + case Intrinsic_197: + case Intrinsic_198: + case Intrinsic_199: + case Intrinsic_200: + op = Intrinsics_FindOpcode2Args(id, funccall, args->node, args->next->node); + vector_intrinsic_load(args->node, args->next->node, outputReg, output, op); + break; + case Intrinsic_114: + case Intrinsic_115: + case Intrinsic_116: + case Intrinsic_280: + case Intrinsic_281: + case Intrinsic_282: + case Intrinsic_283: + case Intrinsic_284: + op = Intrinsics_FindOpcode3Args(id, funccall, args->node, args->next->node, args->next->next->node); + vector_intrinsic_store(args->node, args->next->node, args->next->next->node, outputReg, output, op); + break; + case Intrinsic_131: + case Intrinsic_132: + case Intrinsic_133: + case Intrinsic_134: + case Intrinsic_135: + case Intrinsic_136: + case Intrinsic_138: + case Intrinsic_139: + case Intrinsic_140: + case Intrinsic_141: + case Intrinsic_142: + case Intrinsic_144: + case Intrinsic_145: + case Intrinsic_146: + case Intrinsic_147: + case Intrinsic_148: + case Intrinsic_150: + case Intrinsic_151: + case Intrinsic_152: + case Intrinsic_153: + case Intrinsic_154: + case Intrinsic_156: + op = Intrinsics_FindOpcode2Args(id, funccall, args->node, args->next->node); + vector_predicate_2args(args->node, args->next->node, outputReg, output, op, id); + break; + case Intrinsic_137: + case Intrinsic_143: + case Intrinsic_149: + case Intrinsic_155: + op = Intrinsics_FindOpcode1Arg(id, funccall, args->node); + vector_predicate_1arg(args->node, outputReg, output, op, id); + break; + case Intrinsic_309: + vector_intrinsic_abs(id, funccall, args->node, outputReg, output); + break; + case Intrinsic_310: + vector_intrinsic_abss(id, funccall, args->node, outputReg, output); + break; + case Intrinsic_052: + case Intrinsic_054: + op = Intrinsics_FindOpcode2Args(id, funccall, args->node, args->next->node); + vector_intrinsic_2args(args->node, args->next->node, outputReg, output, op); + break; + default: + CError_FATAL(6152); + } +} + +void Intrinsics_SetupRuntimeObjects(void) { + static TypePointer char_ptr = {TYPEPOINTER, 4, TYPE(&stchar), 0}; + Boolean savecpp; + int i; + + savecpp = copts.cplusplus; + copts.cplusplus = 0; + + for (i = 0; i < MaxIntrinsics; i++) + intrinsics[i] = NULL; + + intrinsics[Intrinsic_000] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("__eieio"), 0, 0); + intrinsics[Intrinsic_001] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("__sync"), 0, 0); + intrinsics[Intrinsic_002] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("__isync"), 0, 0); + intrinsics[Intrinsic_003] = CParser_NewRTFunc(TYPE(&stsignedint), GetHashNameNodeExport("__abs"), 0, 1, &stsignedint); + intrinsics[Intrinsic_004] = CParser_NewRTFunc(TYPE(&stsignedlong), GetHashNameNodeExport("__labs"), 0, 1, &stsignedlong); + intrinsics[Intrinsic_005] = CParser_NewRTFunc(TYPE(&stdouble), GetHashNameNodeExport("__fabs"), 0, 1, &stdouble); + intrinsics[Intrinsic_006] = CParser_NewRTFunc(TYPE(&stdouble), GetHashNameNodeExport("__fnabs"), 0, 1, &stdouble); + intrinsics[Intrinsic_007] = CParser_NewRTFunc(TYPE(&stdouble), GetHashNameNodeExport("__setflm"), 0, 1, &stdouble); + intrinsics[Intrinsic_033] = CParser_NewRTFunc(TYPE(&stdouble), GetHashNameNodeExport("__frsqrte"), 0, 1, &stdouble); + intrinsics[Intrinsic_008] = CParser_NewRTFunc(TYPE(&void_ptr), GetHashNameNodeExport("__alloca"), 0, 1, &stunsignedint); + intrinsics[Intrinsic_009] = CParser_NewRTFunc(TYPE(&stsignedint), GetHashNameNodeExport("__cntlzw"), 0, 1, &stunsignedint); + intrinsics[Intrinsic_010] = CParser_NewRTFunc(TYPE(&stunsignedint), GetHashNameNodeExport("__lhbrx"), 0, 2, &void_ptr, &stsignedint); + intrinsics[Intrinsic_011] = CParser_NewRTFunc(TYPE(&stunsignedint), GetHashNameNodeExport("__lwbrx"), 0, 2, &void_ptr, &stsignedint); + intrinsics[Intrinsic_012] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("__sthbrx"), 0, 3, &stunsignedshort, &void_ptr, &stsignedint); + intrinsics[Intrinsic_013] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("__stwbrx"), 0, 3, &stunsignedint, &void_ptr, &stsignedint); + intrinsics[Intrinsic_014] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("__dcbf"), 0, 2, &void_ptr, &stsignedint); + intrinsics[Intrinsic_015] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("__dcbt"), 0, 2, &void_ptr, &stsignedint); + intrinsics[Intrinsic_016] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("__dcbst"), 0, 2, &void_ptr, &stsignedint); + intrinsics[Intrinsic_017] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("__dcbtst"), 0, 2, &void_ptr, &stsignedint); + intrinsics[Intrinsic_018] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("__dcbz"), 0, 2, &void_ptr, &stsignedint); + intrinsics[Intrinsic_019] = CParser_NewRTFunc(TYPE(&stsignedint), GetHashNameNodeExport("__mulhw"), 0, 2, &stsignedint, &stsignedint); + intrinsics[Intrinsic_020] = CParser_NewRTFunc(TYPE(&stunsignedint), GetHashNameNodeExport("__mulhwu"), 0, 2, &stunsignedint, &stunsignedint); + intrinsics[Intrinsic_021] = CParser_NewRTFunc(TYPE(&stsignedint), GetHashNameNodeExport("__divw"), 0, 2, &stsignedint, &stsignedint); + intrinsics[Intrinsic_022] = CParser_NewRTFunc(TYPE(&stsignedint), GetHashNameNodeExport("__divwu"), 0, 2, &stsignedint, &stsignedint); + intrinsics[Intrinsic_023] = CParser_NewRTFunc(TYPE(&stdouble), GetHashNameNodeExport("__fmadd"), 0, 3, &stdouble, &stdouble, &stdouble); + intrinsics[Intrinsic_024] = CParser_NewRTFunc(TYPE(&stdouble), GetHashNameNodeExport("__fmsub"), 0, 3, &stdouble, &stdouble, &stdouble); + intrinsics[Intrinsic_025] = CParser_NewRTFunc(TYPE(&stdouble), GetHashNameNodeExport("__fnmadd"), 0, 3, &stdouble, &stdouble, &stdouble); + intrinsics[Intrinsic_026] = CParser_NewRTFunc(TYPE(&stdouble), GetHashNameNodeExport("__fnmsub"), 0, 3, &stdouble, &stdouble, &stdouble); + intrinsics[Intrinsic_034] = CParser_NewRTFunc(TYPE(&stdouble), GetHashNameNodeExport("__fsel"), 0, 3, &stdouble, &stdouble, &stdouble); + intrinsics[Intrinsic_027] = CParser_NewRTFunc(TYPE(&stfloat), GetHashNameNodeExport("__fmadds"), 0, 3, &stfloat, &stfloat, &stfloat); + intrinsics[Intrinsic_028] = CParser_NewRTFunc(TYPE(&stfloat), GetHashNameNodeExport("__fmsubs"), 0, 3, &stfloat, &stfloat, &stfloat); + intrinsics[Intrinsic_029] = CParser_NewRTFunc(TYPE(&stfloat), GetHashNameNodeExport("__fnmadds"), 0, 3, &stfloat, &stfloat, &stfloat); + intrinsics[Intrinsic_030] = CParser_NewRTFunc(TYPE(&stfloat), GetHashNameNodeExport("__fnmsubs"), 0, 3, &stfloat, &stfloat, &stfloat); + intrinsics[Intrinsic_031] = CParser_NewRTFunc(TYPE(&stdouble), GetHashNameNodeExport("__mffs"), 0, 0); + intrinsics[Intrinsic_032] = CParser_NewRTFunc(TYPE(&stfloat), GetHashNameNodeExport("__fres"), 0, 1, &stfloat); + intrinsics[Intrinsic_040] = CParser_NewRTFunc(TYPE(&stfloat), GetHashNameNodeExport("__fabsf"), 0, 1, &stfloat); + intrinsics[Intrinsic_041] = CParser_NewRTFunc(TYPE(&stfloat), GetHashNameNodeExport("__fnabsf"), 0, 1, &stfloat); + intrinsics[Intrinsic_035] = CParser_NewRTFunc(TYPE(&char_ptr), GetHashNameNodeExport("__strcpy"), 0, 2, &char_ptr, &char_ptr); + TYPE_FUNC(intrinsics[Intrinsic_035]->type)->args->next->qual |= Q_CONST; + intrinsics[Intrinsic_036] = CParser_NewRTFunc(TYPE(&void_ptr), GetHashNameNodeExport("__memcpy"), 0, 3, &void_ptr, &void_ptr, &stunsignedlong); + __memcpy_object = intrinsics[Intrinsic_036]; + TYPE_FUNC(intrinsics[Intrinsic_036]->type)->args->next->qual |= Q_CONST; + intrinsics[Intrinsic_037] = CParser_NewRTFunc(TYPE(&stsignedint), GetHashNameNodeExport("__rlwimi"), 0, 5, &stsignedint, &stsignedint, &stsignedint, &stsignedint, &stsignedint); + intrinsics[Intrinsic_038] = CParser_NewRTFunc(TYPE(&stsignedint), GetHashNameNodeExport("__rlwinm"), 0, 4, &stsignedint, &stsignedint, &stsignedint, &stsignedint); + intrinsics[Intrinsic_039] = CParser_NewRTFunc(TYPE(&stsignedint), GetHashNameNodeExport("__rlwnm"), 0, 4, &stsignedint, &stsignedint, &stsignedint, &stsignedint); + intrinsics[Intrinsic_042] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_add"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_043] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_addc"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_044] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_adds"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_045] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_and"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_046] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_andc"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_047] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_avg"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_048] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_ceil"), 0, 1, &stvector); + intrinsics[Intrinsic_049] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_cmpb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_050] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_cmpeq"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_051] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_cmpge"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_052] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_cmple"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_053] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_cmpgt"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_054] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_cmplt"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_055] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_ctf"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_056] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_cts"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_057] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_ctu"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_058] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("vec_dss"), 0, 1, &stsignedint); + intrinsics[Intrinsic_059] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("vec_dssall"), 0, 0); + intrinsics[Intrinsic_060] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("vec_dst"), 0, 3, &stvector, &stsignedint, &stsignedint); + intrinsics[Intrinsic_061] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("vec_dstst"), 0, 3, &stvector, &stsignedint, &stsignedint); + intrinsics[Intrinsic_062] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("vec_dststt"), 0, 3, &stvector, &stsignedint, &stsignedint); + intrinsics[Intrinsic_063] = CParser_NewRTFunc(&stvoid, GetHashNameNodeExport("vec_dstt"), 0, 3, &stvector, &stsignedint, &stsignedint); + intrinsics[Intrinsic_064] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_expte"), 0, 1, &stvector); + intrinsics[Intrinsic_065] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_floor"), 0, 1, &stvector); + intrinsics[Intrinsic_066] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_ld"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_067] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_lde"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_068] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_ldl"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_069] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_loge"), 0, 1, &stvector); + intrinsics[Intrinsic_070] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_lvsl"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_071] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_lvsr"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_072] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_madd"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_073] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_madds"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_074] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_max"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_075] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_mergeh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_076] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_mergel"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_077] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_mfvscr"), 0, 0); + intrinsics[Intrinsic_078] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_min"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_079] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_mladd"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_080] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_mradds"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_081] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_msum"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_082] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_msums"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_083] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_mtvscr"), 0, 1, &stvector); + intrinsics[Intrinsic_084] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_mule"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_085] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_mulo"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_086] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_nmsub"), 0, 2, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_087] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_nor"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_088] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_or"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_089] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_pack"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_090] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_packpx"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_091] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_packs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_092] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_packsu"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_093] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_perm"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_094] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_re"), 0, 1, &stvector); + intrinsics[Intrinsic_095] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_rl"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_096] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_round"), 0, 1, &stvector); + intrinsics[Intrinsic_097] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_rsqrte"), 0, 1, &stvector); + intrinsics[Intrinsic_098] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sel"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_099] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sl"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_100] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sld"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_101] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sll"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_102] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_slo"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_103] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_splat"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_104] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_splat_s8"), 0, 1, &stvector); + intrinsics[Intrinsic_105] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_splat_s16"), 0, 1, &stvector); + intrinsics[Intrinsic_106] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_splat_s32"), 0, 1, &stvector); + intrinsics[Intrinsic_107] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_splat_u8"), 0, 1, &stvector); + intrinsics[Intrinsic_108] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_splat_u16"), 0, 1, &stvector); + intrinsics[Intrinsic_109] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_splat_u32"), 0, 1, &stvector); + intrinsics[Intrinsic_110] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sr"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_111] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sra"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_112] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_srl"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_113] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sro"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_114] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_st"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_115] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_ste"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_116] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_stl"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_117] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sub"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_118] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_subc"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_119] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_subs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_120] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sum4s"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_121] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sum2s"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_122] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_sums"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_123] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_trunc"), 0, 1, &stvector); + intrinsics[Intrinsic_124] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_unpack2sh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_125] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_unpack2sl"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_126] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_unpack2uh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_127] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_unpack2ul"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_128] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_unpackh"), 0, 1, &stvector); + intrinsics[Intrinsic_129] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_unpackl"), 0, 1, &stvector); + intrinsics[Intrinsic_130] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_xor"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_131] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_eq"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_132] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_ge"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_133] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_gt"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_134] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_in"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_135] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_le"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_136] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_lt"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_137] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_nan"), 0, 1, &stvector); + intrinsics[Intrinsic_138] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_ne"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_139] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_nge"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_140] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_ngt"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_141] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_nle"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_142] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_nlt"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_143] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_all_numeric"), 0, 1, &stvector); + intrinsics[Intrinsic_144] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_eq"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_145] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_ge"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_146] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_gt"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_147] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_le"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_148] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_lt"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_149] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_nan"), 0, 1, &stvector); + intrinsics[Intrinsic_150] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_ne"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_151] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_nge"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_152] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_ngt"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_153] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_nle"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_154] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_nlt"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_155] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_numeric"), 0, 1, &stvector); + intrinsics[Intrinsic_156] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_any_out"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_157] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vaddubm"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_158] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vadduhm"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_159] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vadduwm"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_160] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vaddfp"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_161] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vaddcuw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_162] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vaddubs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_163] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vaddsbs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_164] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vadduhs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_165] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vaddshs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_166] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vadduws"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_167] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vaddsws"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_168] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vand"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_169] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vandc"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_170] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vavgub"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_171] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vavgsb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_172] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vavguh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_173] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vavgsh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_174] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vavguw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_175] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vavgsw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_176] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vrfip"), 0, 1, &stvector); + intrinsics[Intrinsic_177] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpbfp"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_178] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpequb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_179] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpequh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_180] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpequw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_181] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpeqfp"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_182] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpgefp"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_183] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpgtub"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_184] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpgtsb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_185] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpgtuh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_186] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpgtsh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_187] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpgtuw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_188] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpgtsw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_189] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcmpgtfp"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_190] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcfux"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_191] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vcfsx"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_192] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vctsxs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_193] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vctuxs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_194] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vexptefp"), 0, 1, &stvector); + intrinsics[Intrinsic_195] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vrfim"), 0, 1, &stvector); + intrinsics[Intrinsic_196] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_lvx"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_197] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_lvebx"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_198] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_lvehx"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_199] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_lvewx"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_200] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_lvxl"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_201] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vlogefp"), 0, 1, &stvector); + intrinsics[Intrinsic_202] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmaddfp"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_203] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmhaddshs"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_204] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmaxub"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_205] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmaxsb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_206] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmaxuh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_207] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmaxsh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_208] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmaxuw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_209] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmaxsw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_210] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmaxfp"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_211] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmrghb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_212] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmrghh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_213] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmrghw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_214] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmrglb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_215] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmrglh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_216] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmrglw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_217] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vminub"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_218] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vminsb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_219] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vminuh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_220] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vminsh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_221] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vminuw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_222] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vminsw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_223] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vminfp"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_224] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmladduhm"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_225] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmhraddshs"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_226] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmsumubm"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_227] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmsumuhm"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_228] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmsummbm"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_229] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmsumshm"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_230] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmsumuhs"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_231] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmsumshs"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_232] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmuleub"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_233] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmulesb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_234] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmuleuh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_235] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmulesh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_236] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmuloub"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_237] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmulosb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_238] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmulouh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_239] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vmulosh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_240] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vnmsubfp"), 0, 2, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_241] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vnor"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_242] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vor"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_243] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vpkuhum"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_244] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vpkuwum"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_245] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vpkpx"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_246] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vpkuhus"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_247] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vpkshss"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_248] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vpkuwus"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_249] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vpkswss"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_250] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vpkshus"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_251] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vpkswus"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_252] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vperm"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_253] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vrefp"), 0, 1, &stvector); + intrinsics[Intrinsic_254] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vrlb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_255] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vrlh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_256] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vrlw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_257] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vrfin"), 0, 1, &stvector); + intrinsics[Intrinsic_258] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vrsqrtefp"), 0, 1, &stvector); + intrinsics[Intrinsic_259] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsel"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_260] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vslb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_261] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vslh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_262] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vslw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_263] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsldoi"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_264] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsl"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_265] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vslo"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_266] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vspltb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_267] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsplth"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_268] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vspltw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_269] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vspltisb"), 0, 1, &stvector); + intrinsics[Intrinsic_270] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vspltish"), 0, 1, &stvector); + intrinsics[Intrinsic_271] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vspltisw"), 0, 1, &stvector); + intrinsics[Intrinsic_272] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsrb"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_273] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsrh"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_274] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsrw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_275] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsrab"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_276] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsrah"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_277] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsraw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_278] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsr"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_279] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsro"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_280] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_stvx"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_281] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_stvebx"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_282] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_stvehx"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_283] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_stvewx"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_284] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_stvxl"), 0, 3, &stvector, &stvector, &stvector); + intrinsics[Intrinsic_285] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsububm"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_286] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsubuhm"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_287] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsubuwm"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_288] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsubfp"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_289] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsubcuw"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_290] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsububs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_291] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsubsbs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_292] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsubuhs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_293] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsubshs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_294] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsubuws"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_295] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsubsws"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_296] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsum4ubs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_297] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsum4sbs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_298] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsum4shs"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_299] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsum2sws"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_300] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vsumsws"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_301] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vrfiz"), 0, 1, &stvector); + intrinsics[Intrinsic_302] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vupkhsb"), 0, 1, &stvector); + intrinsics[Intrinsic_303] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vupklsb"), 0, 1, &stvector); + intrinsics[Intrinsic_304] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vupkhpx"), 0, 1, &stvector); + intrinsics[Intrinsic_305] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vupklpx"), 0, 1, &stvector); + intrinsics[Intrinsic_306] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vupkhsh"), 0, 1, &stvector); + intrinsics[Intrinsic_307] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vupklsh"), 0, 1, &stvector); + intrinsics[Intrinsic_308] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_vxor"), 0, 2, &stvector, &stvector); + intrinsics[Intrinsic_309] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_abs"), 0, 1, &stvector); + intrinsics[Intrinsic_310] = CParser_NewRTFunc(TYPE(&stvector), GetHashNameNodeExport("vec_abss"), 0, 1, &stvector); + + for (i = 0; i < MaxIntrinsics; i++) { + CError_ASSERT(6600, intrinsics[i]); + intrinsics[i]->u.func.u.intrinsicid = i; + TYPE_FUNC(intrinsics[i]->type)->flags |= FUNC_INTRINSIC; + CScope_AddGlobalObject(intrinsics[i]); + } + + copts.cplusplus = savecpp; +} + +static Object *CheckRuntimeObject(char *name) { + NameSpaceName *nsname = CScope_FindNameSpaceName(cscope_root, GetHashNameNodeExport(name)); + + if (!nsname) + return NULL; + + if (nsname->first.object && nsname->first.next == NULL) + return OBJECT(nsname->first.object); + + return NULL; +} + +Boolean Intrinsics_ReInitRuntimeObjects(Boolean flag) { + if (flag) { + if (!(intrinsics[Intrinsic_000] = CheckRuntimeObject("__eieio"))) return 0; + if (!(intrinsics[Intrinsic_001] = CheckRuntimeObject("__sync"))) return 0; + if (!(intrinsics[Intrinsic_002] = CheckRuntimeObject("__isync"))) return 0; + if (!(intrinsics[Intrinsic_003] = CheckRuntimeObject("__abs"))) return 0; + if (!(intrinsics[Intrinsic_004] = CheckRuntimeObject("__labs"))) return 0; + if (!(intrinsics[Intrinsic_005] = CheckRuntimeObject("__fabs"))) return 0; + if (!(intrinsics[Intrinsic_006] = CheckRuntimeObject("__fnabs"))) return 0; + if (!(intrinsics[Intrinsic_007] = CheckRuntimeObject("__setflm"))) return 0; + if (!(intrinsics[Intrinsic_033] = CheckRuntimeObject("__frsqrte"))) return 0; + if (!(intrinsics[Intrinsic_008] = CheckRuntimeObject("__alloca"))) return 0; + if (!(intrinsics[Intrinsic_009] = CheckRuntimeObject("__cntlzw"))) return 0; + if (!(intrinsics[Intrinsic_010] = CheckRuntimeObject("__lhbrx"))) return 0; + if (!(intrinsics[Intrinsic_011] = CheckRuntimeObject("__lwbrx"))) return 0; + if (!(intrinsics[Intrinsic_012] = CheckRuntimeObject("__sthbrx"))) return 0; + if (!(intrinsics[Intrinsic_013] = CheckRuntimeObject("__stwbrx"))) return 0; + if (!(intrinsics[Intrinsic_014] = CheckRuntimeObject("__dcbf"))) return 0; + if (!(intrinsics[Intrinsic_015] = CheckRuntimeObject("__dcbt"))) return 0; + if (!(intrinsics[Intrinsic_016] = CheckRuntimeObject("__dcbst"))) return 0; + if (!(intrinsics[Intrinsic_017] = CheckRuntimeObject("__dcbtst"))) return 0; + if (!(intrinsics[Intrinsic_018] = CheckRuntimeObject("__dcbz"))) return 0; + if (!(intrinsics[Intrinsic_019] = CheckRuntimeObject("__mulhw"))) return 0; + if (!(intrinsics[Intrinsic_020] = CheckRuntimeObject("__mulhwu"))) return 0; + if (!(intrinsics[Intrinsic_021] = CheckRuntimeObject("__divw"))) return 0; + if (!(intrinsics[Intrinsic_022] = CheckRuntimeObject("__divwu"))) return 0; + if (!(intrinsics[Intrinsic_023] = CheckRuntimeObject("__fmadd"))) return 0; + if (!(intrinsics[Intrinsic_024] = CheckRuntimeObject("__fmsub"))) return 0; + if (!(intrinsics[Intrinsic_025] = CheckRuntimeObject("__fnmadd"))) return 0; + if (!(intrinsics[Intrinsic_026] = CheckRuntimeObject("__fnmsub"))) return 0; + if (!(intrinsics[Intrinsic_034] = CheckRuntimeObject("__fsel"))) return 0; + if (!(intrinsics[Intrinsic_027] = CheckRuntimeObject("__fmadds"))) return 0; + if (!(intrinsics[Intrinsic_028] = CheckRuntimeObject("__fmsubs"))) return 0; + if (!(intrinsics[Intrinsic_029] = CheckRuntimeObject("__fnmadds"))) return 0; + if (!(intrinsics[Intrinsic_030] = CheckRuntimeObject("__fnmsubs"))) return 0; + if (!(intrinsics[Intrinsic_031] = CheckRuntimeObject("__mffs"))) return 0; + if (!(intrinsics[Intrinsic_032] = CheckRuntimeObject("__fres"))) return 0; + if (!(intrinsics[Intrinsic_040] = CheckRuntimeObject("__fabsf"))) return 0; + if (!(intrinsics[Intrinsic_041] = CheckRuntimeObject("__fnabsf"))) return 0; + if (!(intrinsics[Intrinsic_035] = CheckRuntimeObject("__strcpy"))) return 0; + if (!(intrinsics[Intrinsic_037] = CheckRuntimeObject("__rlwimi"))) return 0; + if (!(intrinsics[Intrinsic_038] = CheckRuntimeObject("__rlwinm"))) return 0; + if (!(intrinsics[Intrinsic_039] = CheckRuntimeObject("__rlwnm"))) return 0; + if (!(intrinsics[Intrinsic_042] = CheckRuntimeObject("vec_add"))) return 0; + if (!(intrinsics[Intrinsic_043] = CheckRuntimeObject("vec_addc"))) return 0; + if (!(intrinsics[Intrinsic_044] = CheckRuntimeObject("vec_adds"))) return 0; + if (!(intrinsics[Intrinsic_045] = CheckRuntimeObject("vec_and"))) return 0; + if (!(intrinsics[Intrinsic_046] = CheckRuntimeObject("vec_andc"))) return 0; + if (!(intrinsics[Intrinsic_047] = CheckRuntimeObject("vec_avg"))) return 0; + if (!(intrinsics[Intrinsic_048] = CheckRuntimeObject("vec_ceil"))) return 0; + if (!(intrinsics[Intrinsic_049] = CheckRuntimeObject("vec_cmpb"))) return 0; + if (!(intrinsics[Intrinsic_050] = CheckRuntimeObject("vec_cmpeq"))) return 0; + if (!(intrinsics[Intrinsic_051] = CheckRuntimeObject("vec_cmpge"))) return 0; + if (!(intrinsics[Intrinsic_052] = CheckRuntimeObject("vec_cmple"))) return 0; + if (!(intrinsics[Intrinsic_053] = CheckRuntimeObject("vec_cmpgt"))) return 0; + if (!(intrinsics[Intrinsic_054] = CheckRuntimeObject("vec_cmplt"))) return 0; + if (!(intrinsics[Intrinsic_055] = CheckRuntimeObject("vec_ctf"))) return 0; + if (!(intrinsics[Intrinsic_056] = CheckRuntimeObject("vec_cts"))) return 0; + if (!(intrinsics[Intrinsic_057] = CheckRuntimeObject("vec_ctu"))) return 0; + if (!(intrinsics[Intrinsic_064] = CheckRuntimeObject("vec_expte"))) return 0; + if (!(intrinsics[Intrinsic_065] = CheckRuntimeObject("vec_floor"))) return 0; + if (!(intrinsics[Intrinsic_066] = CheckRuntimeObject("vec_ld"))) return 0; + if (!(intrinsics[Intrinsic_067] = CheckRuntimeObject("vec_lde"))) return 0; + if (!(intrinsics[Intrinsic_068] = CheckRuntimeObject("vec_ldl"))) return 0; + if (!(intrinsics[Intrinsic_069] = CheckRuntimeObject("vec_loge"))) return 0; + if (!(intrinsics[Intrinsic_070] = CheckRuntimeObject("vec_lvsl"))) return 0; + if (!(intrinsics[Intrinsic_071] = CheckRuntimeObject("vec_lvsr"))) return 0; + if (!(intrinsics[Intrinsic_072] = CheckRuntimeObject("vec_madd"))) return 0; + if (!(intrinsics[Intrinsic_073] = CheckRuntimeObject("vec_madds"))) return 0; + if (!(intrinsics[Intrinsic_074] = CheckRuntimeObject("vec_max"))) return 0; + if (!(intrinsics[Intrinsic_075] = CheckRuntimeObject("vec_mergeh"))) return 0; + if (!(intrinsics[Intrinsic_076] = CheckRuntimeObject("vec_mergel"))) return 0; + if (!(intrinsics[Intrinsic_077] = CheckRuntimeObject("vec_mfvscr"))) return 0; + if (!(intrinsics[Intrinsic_078] = CheckRuntimeObject("vec_min"))) return 0; + if (!(intrinsics[Intrinsic_079] = CheckRuntimeObject("vec_mladd"))) return 0; + if (!(intrinsics[Intrinsic_080] = CheckRuntimeObject("vec_mradds"))) return 0; + if (!(intrinsics[Intrinsic_081] = CheckRuntimeObject("vec_msum"))) return 0; + if (!(intrinsics[Intrinsic_082] = CheckRuntimeObject("vec_msums"))) return 0; + if (!(intrinsics[Intrinsic_083] = CheckRuntimeObject("vec_mtvscr"))) return 0; + if (!(intrinsics[Intrinsic_084] = CheckRuntimeObject("vec_mule"))) return 0; + if (!(intrinsics[Intrinsic_085] = CheckRuntimeObject("vec_mulo"))) return 0; + if (!(intrinsics[Intrinsic_086] = CheckRuntimeObject("vec_nmsub"))) return 0; + if (!(intrinsics[Intrinsic_087] = CheckRuntimeObject("vec_nor"))) return 0; + if (!(intrinsics[Intrinsic_088] = CheckRuntimeObject("vec_or"))) return 0; + if (!(intrinsics[Intrinsic_089] = CheckRuntimeObject("vec_pack"))) return 0; + if (!(intrinsics[Intrinsic_090] = CheckRuntimeObject("vec_packpx"))) return 0; + if (!(intrinsics[Intrinsic_091] = CheckRuntimeObject("vec_packs"))) return 0; + if (!(intrinsics[Intrinsic_092] = CheckRuntimeObject("vec_packsu"))) return 0; + if (!(intrinsics[Intrinsic_093] = CheckRuntimeObject("vec_perm"))) return 0; + if (!(intrinsics[Intrinsic_094] = CheckRuntimeObject("vec_re"))) return 0; + if (!(intrinsics[Intrinsic_095] = CheckRuntimeObject("vec_rl"))) return 0; + if (!(intrinsics[Intrinsic_096] = CheckRuntimeObject("vec_round"))) return 0; + if (!(intrinsics[Intrinsic_097] = CheckRuntimeObject("vec_rsqrte"))) return 0; + if (!(intrinsics[Intrinsic_098] = CheckRuntimeObject("vec_sel"))) return 0; + if (!(intrinsics[Intrinsic_099] = CheckRuntimeObject("vec_sl"))) return 0; + if (!(intrinsics[Intrinsic_100] = CheckRuntimeObject("vec_sld"))) return 0; + if (!(intrinsics[Intrinsic_101] = CheckRuntimeObject("vec_sll"))) return 0; + if (!(intrinsics[Intrinsic_102] = CheckRuntimeObject("vec_slo"))) return 0; + if (!(intrinsics[Intrinsic_103] = CheckRuntimeObject("vec_splat"))) return 0; + if (!(intrinsics[Intrinsic_104] = CheckRuntimeObject("vec_splat_s8"))) return 0; + if (!(intrinsics[Intrinsic_105] = CheckRuntimeObject("vec_splat_s16"))) return 0; + if (!(intrinsics[Intrinsic_106] = CheckRuntimeObject("vec_splat_s32"))) return 0; + if (!(intrinsics[Intrinsic_107] = CheckRuntimeObject("vec_splat_u8"))) return 0; + if (!(intrinsics[Intrinsic_108] = CheckRuntimeObject("vec_splat_u16"))) return 0; + if (!(intrinsics[Intrinsic_109] = CheckRuntimeObject("vec_splat_u32"))) return 0; + if (!(intrinsics[Intrinsic_110] = CheckRuntimeObject("vec_sr"))) return 0; + if (!(intrinsics[Intrinsic_111] = CheckRuntimeObject("vec_sra"))) return 0; + if (!(intrinsics[Intrinsic_112] = CheckRuntimeObject("vec_srl"))) return 0; + if (!(intrinsics[Intrinsic_113] = CheckRuntimeObject("vec_sro"))) return 0; + if (!(intrinsics[Intrinsic_114] = CheckRuntimeObject("vec_st"))) return 0; + if (!(intrinsics[Intrinsic_115] = CheckRuntimeObject("vec_ste"))) return 0; + if (!(intrinsics[Intrinsic_116] = CheckRuntimeObject("vec_stl"))) return 0; + if (!(intrinsics[Intrinsic_117] = CheckRuntimeObject("vec_sub"))) return 0; + if (!(intrinsics[Intrinsic_118] = CheckRuntimeObject("vec_subc"))) return 0; + if (!(intrinsics[Intrinsic_119] = CheckRuntimeObject("vec_subs"))) return 0; + if (!(intrinsics[Intrinsic_120] = CheckRuntimeObject("vec_sum4s"))) return 0; + if (!(intrinsics[Intrinsic_121] = CheckRuntimeObject("vec_sum2s"))) return 0; + if (!(intrinsics[Intrinsic_122] = CheckRuntimeObject("vec_sums"))) return 0; + if (!(intrinsics[Intrinsic_123] = CheckRuntimeObject("vec_trunc"))) return 0; + if (!(intrinsics[Intrinsic_124] = CheckRuntimeObject("vec_unpack2sh"))) return 0; + if (!(intrinsics[Intrinsic_125] = CheckRuntimeObject("vec_unpack2sl"))) return 0; + if (!(intrinsics[Intrinsic_126] = CheckRuntimeObject("vec_unpack2uh"))) return 0; + if (!(intrinsics[Intrinsic_127] = CheckRuntimeObject("vec_unpack2ul"))) return 0; + if (!(intrinsics[Intrinsic_128] = CheckRuntimeObject("vec_unpackh"))) return 0; + if (!(intrinsics[Intrinsic_129] = CheckRuntimeObject("vec_unpackl"))) return 0; + if (!(intrinsics[Intrinsic_130] = CheckRuntimeObject("vec_xor"))) return 0; + if (!(intrinsics[Intrinsic_131] = CheckRuntimeObject("vec_all_eq"))) return 0; + if (!(intrinsics[Intrinsic_132] = CheckRuntimeObject("vec_all_ge"))) return 0; + if (!(intrinsics[Intrinsic_133] = CheckRuntimeObject("vec_all_gt"))) return 0; + if (!(intrinsics[Intrinsic_134] = CheckRuntimeObject("vec_all_in"))) return 0; + if (!(intrinsics[Intrinsic_135] = CheckRuntimeObject("vec_all_le"))) return 0; + if (!(intrinsics[Intrinsic_136] = CheckRuntimeObject("vec_all_lt"))) return 0; + if (!(intrinsics[Intrinsic_137] = CheckRuntimeObject("vec_all_nan"))) return 0; + if (!(intrinsics[Intrinsic_138] = CheckRuntimeObject("vec_all_ne"))) return 0; + if (!(intrinsics[Intrinsic_139] = CheckRuntimeObject("vec_all_nge"))) return 0; + if (!(intrinsics[Intrinsic_140] = CheckRuntimeObject("vec_all_ngt"))) return 0; + if (!(intrinsics[Intrinsic_141] = CheckRuntimeObject("vec_all_nle"))) return 0; + if (!(intrinsics[Intrinsic_142] = CheckRuntimeObject("vec_all_nlt"))) return 0; + if (!(intrinsics[Intrinsic_143] = CheckRuntimeObject("vec_all_numeric"))) return 0; + if (!(intrinsics[Intrinsic_144] = CheckRuntimeObject("vec_any_eq"))) return 0; + if (!(intrinsics[Intrinsic_145] = CheckRuntimeObject("vec_any_ge"))) return 0; + if (!(intrinsics[Intrinsic_146] = CheckRuntimeObject("vec_any_gt"))) return 0; + if (!(intrinsics[Intrinsic_147] = CheckRuntimeObject("vec_any_le"))) return 0; + if (!(intrinsics[Intrinsic_148] = CheckRuntimeObject("vec_any_lt"))) return 0; + if (!(intrinsics[Intrinsic_149] = CheckRuntimeObject("vec_any_nan"))) return 0; + if (!(intrinsics[Intrinsic_150] = CheckRuntimeObject("vec_any_ne"))) return 0; + if (!(intrinsics[Intrinsic_151] = CheckRuntimeObject("vec_any_nge"))) return 0; + if (!(intrinsics[Intrinsic_152] = CheckRuntimeObject("vec_any_ngt"))) return 0; + if (!(intrinsics[Intrinsic_153] = CheckRuntimeObject("vec_any_nle"))) return 0; + if (!(intrinsics[Intrinsic_154] = CheckRuntimeObject("vec_any_nlt"))) return 0; + if (!(intrinsics[Intrinsic_155] = CheckRuntimeObject("vec_any_numeric"))) return 0; + if (!(intrinsics[Intrinsic_156] = CheckRuntimeObject("vec_any_out"))) return 0; + if (!(intrinsics[Intrinsic_157] = CheckRuntimeObject("vec_vaddubm"))) return 0; + if (!(intrinsics[Intrinsic_158] = CheckRuntimeObject("vec_vadduhm"))) return 0; + if (!(intrinsics[Intrinsic_159] = CheckRuntimeObject("vec_vadduwm"))) return 0; + if (!(intrinsics[Intrinsic_160] = CheckRuntimeObject("vec_vaddfp"))) return 0; + if (!(intrinsics[Intrinsic_161] = CheckRuntimeObject("vec_vaddcuw"))) return 0; + if (!(intrinsics[Intrinsic_162] = CheckRuntimeObject("vec_vaddubs"))) return 0; + if (!(intrinsics[Intrinsic_163] = CheckRuntimeObject("vec_vaddubs"))) return 0; + if (!(intrinsics[Intrinsic_164] = CheckRuntimeObject("vec_vadduhs"))) return 0; + if (!(intrinsics[Intrinsic_165] = CheckRuntimeObject("vec_vadduhs"))) return 0; + if (!(intrinsics[Intrinsic_166] = CheckRuntimeObject("vec_vadduws"))) return 0; + if (!(intrinsics[Intrinsic_167] = CheckRuntimeObject("vec_vadduws"))) return 0; + if (!(intrinsics[Intrinsic_168] = CheckRuntimeObject("vec_vand"))) return 0; + if (!(intrinsics[Intrinsic_169] = CheckRuntimeObject("vec_vandc"))) return 0; + if (!(intrinsics[Intrinsic_170] = CheckRuntimeObject("vec_vavgub"))) return 0; + if (!(intrinsics[Intrinsic_171] = CheckRuntimeObject("vec_vavgsb"))) return 0; + if (!(intrinsics[Intrinsic_172] = CheckRuntimeObject("vec_vavguh"))) return 0; + if (!(intrinsics[Intrinsic_173] = CheckRuntimeObject("vec_vavgsh"))) return 0; + if (!(intrinsics[Intrinsic_174] = CheckRuntimeObject("vec_vavguw"))) return 0; + if (!(intrinsics[Intrinsic_175] = CheckRuntimeObject("vec_vavgsw"))) return 0; + if (!(intrinsics[Intrinsic_176] = CheckRuntimeObject("vec_vrfip"))) return 0; + if (!(intrinsics[Intrinsic_177] = CheckRuntimeObject("vec_vcmpbfp"))) return 0; + if (!(intrinsics[Intrinsic_178] = CheckRuntimeObject("vec_vcmpequb"))) return 0; + if (!(intrinsics[Intrinsic_179] = CheckRuntimeObject("vec_vcmpequh"))) return 0; + if (!(intrinsics[Intrinsic_180] = CheckRuntimeObject("vec_vcmpequw"))) return 0; + if (!(intrinsics[Intrinsic_181] = CheckRuntimeObject("vec_vcmpeqfp"))) return 0; + if (!(intrinsics[Intrinsic_182] = CheckRuntimeObject("vec_vcmpgefp"))) return 0; + if (!(intrinsics[Intrinsic_183] = CheckRuntimeObject("vec_vcmpgtub"))) return 0; + if (!(intrinsics[Intrinsic_184] = CheckRuntimeObject("vec_vcmpgtsb"))) return 0; + if (!(intrinsics[Intrinsic_185] = CheckRuntimeObject("vec_vcmpgtuh"))) return 0; + if (!(intrinsics[Intrinsic_186] = CheckRuntimeObject("vec_vcmpgtsh"))) return 0; + if (!(intrinsics[Intrinsic_187] = CheckRuntimeObject("vec_vcmpgtuw"))) return 0; + if (!(intrinsics[Intrinsic_188] = CheckRuntimeObject("vec_vcmpgtsw"))) return 0; + if (!(intrinsics[Intrinsic_189] = CheckRuntimeObject("vec_vcmpgtfp"))) return 0; + if (!(intrinsics[Intrinsic_190] = CheckRuntimeObject("vec_vcfux"))) return 0; + if (!(intrinsics[Intrinsic_191] = CheckRuntimeObject("vec_vcfsx"))) return 0; + if (!(intrinsics[Intrinsic_192] = CheckRuntimeObject("vec_vctsxs"))) return 0; + if (!(intrinsics[Intrinsic_193] = CheckRuntimeObject("vec_vctuxs"))) return 0; + if (!(intrinsics[Intrinsic_194] = CheckRuntimeObject("vec_vexptefp"))) return 0; + if (!(intrinsics[Intrinsic_195] = CheckRuntimeObject("vec_vrfim"))) return 0; + if (!(intrinsics[Intrinsic_196] = CheckRuntimeObject("vec_lvx"))) return 0; + if (!(intrinsics[Intrinsic_197] = CheckRuntimeObject("vec_lvebx"))) return 0; + if (!(intrinsics[Intrinsic_198] = CheckRuntimeObject("vec_lvehx"))) return 0; + if (!(intrinsics[Intrinsic_199] = CheckRuntimeObject("vec_lvewx"))) return 0; + if (!(intrinsics[Intrinsic_200] = CheckRuntimeObject("vec_lvxl"))) return 0; + if (!(intrinsics[Intrinsic_201] = CheckRuntimeObject("vec_vlogefp"))) return 0; + if (!(intrinsics[Intrinsic_202] = CheckRuntimeObject("vec_vmaddfp"))) return 0; + if (!(intrinsics[Intrinsic_203] = CheckRuntimeObject("vec_vmhaddshs"))) return 0; + if (!(intrinsics[Intrinsic_204] = CheckRuntimeObject("vec_vmaxub"))) return 0; + if (!(intrinsics[Intrinsic_205] = CheckRuntimeObject("vec_vmaxsb"))) return 0; + if (!(intrinsics[Intrinsic_206] = CheckRuntimeObject("vec_vmaxuh"))) return 0; + if (!(intrinsics[Intrinsic_207] = CheckRuntimeObject("vec_vmaxsh"))) return 0; + if (!(intrinsics[Intrinsic_208] = CheckRuntimeObject("vec_vmaxuw"))) return 0; + if (!(intrinsics[Intrinsic_209] = CheckRuntimeObject("vec_vmaxsw"))) return 0; + if (!(intrinsics[Intrinsic_210] = CheckRuntimeObject("vec_vmaxfp"))) return 0; + if (!(intrinsics[Intrinsic_211] = CheckRuntimeObject("vec_vmrghb"))) return 0; + if (!(intrinsics[Intrinsic_212] = CheckRuntimeObject("vec_vmrghh"))) return 0; + if (!(intrinsics[Intrinsic_213] = CheckRuntimeObject("vec_vmrghw"))) return 0; + if (!(intrinsics[Intrinsic_214] = CheckRuntimeObject("vec_vmrglb"))) return 0; + if (!(intrinsics[Intrinsic_215] = CheckRuntimeObject("vec_vmrglh"))) return 0; + if (!(intrinsics[Intrinsic_216] = CheckRuntimeObject("vec_vmrglw"))) return 0; + if (!(intrinsics[Intrinsic_204] = CheckRuntimeObject("vec_vminub"))) return 0; + if (!(intrinsics[Intrinsic_205] = CheckRuntimeObject("vec_vminsb"))) return 0; + if (!(intrinsics[Intrinsic_206] = CheckRuntimeObject("vec_vminuh"))) return 0; + if (!(intrinsics[Intrinsic_207] = CheckRuntimeObject("vec_vminsh"))) return 0; + if (!(intrinsics[Intrinsic_208] = CheckRuntimeObject("vec_vminuw"))) return 0; + if (!(intrinsics[Intrinsic_209] = CheckRuntimeObject("vec_vminsw"))) return 0; + if (!(intrinsics[Intrinsic_210] = CheckRuntimeObject("vec_vminfp"))) return 0; + if (!(intrinsics[Intrinsic_224] = CheckRuntimeObject("vec_vmladduhm"))) return 0; + if (!(intrinsics[Intrinsic_225] = CheckRuntimeObject("vec_vmhraddshs"))) return 0; + if (!(intrinsics[Intrinsic_226] = CheckRuntimeObject("vec_vmsumubm"))) return 0; + if (!(intrinsics[Intrinsic_227] = CheckRuntimeObject("vec_vmsumuhm"))) return 0; + if (!(intrinsics[Intrinsic_228] = CheckRuntimeObject("vec_vmsummbm"))) return 0; + if (!(intrinsics[Intrinsic_229] = CheckRuntimeObject("vec_vmsumshm"))) return 0; + if (!(intrinsics[Intrinsic_230] = CheckRuntimeObject("vec_vmsumuhs"))) return 0; + if (!(intrinsics[Intrinsic_231] = CheckRuntimeObject("vec_vmsumshs"))) return 0; + if (!(intrinsics[Intrinsic_232] = CheckRuntimeObject("vec_vmuleub"))) return 0; + if (!(intrinsics[Intrinsic_233] = CheckRuntimeObject("vec_vmulesb"))) return 0; + if (!(intrinsics[Intrinsic_234] = CheckRuntimeObject("vec_vmuleuh"))) return 0; + if (!(intrinsics[Intrinsic_235] = CheckRuntimeObject("vec_vmulesh"))) return 0; + if (!(intrinsics[Intrinsic_236] = CheckRuntimeObject("vec_vmuloub"))) return 0; + if (!(intrinsics[Intrinsic_237] = CheckRuntimeObject("vec_vmulosb"))) return 0; + if (!(intrinsics[Intrinsic_238] = CheckRuntimeObject("vec_vmulouh"))) return 0; + if (!(intrinsics[Intrinsic_239] = CheckRuntimeObject("vec_vmulosh"))) return 0; + if (!(intrinsics[Intrinsic_240] = CheckRuntimeObject("vec_vnmsubfp"))) return 0; + if (!(intrinsics[Intrinsic_241] = CheckRuntimeObject("vec_vnor"))) return 0; + if (!(intrinsics[Intrinsic_242] = CheckRuntimeObject("vec_vor"))) return 0; + if (!(intrinsics[Intrinsic_243] = CheckRuntimeObject("vec_vpkuhum"))) return 0; + if (!(intrinsics[Intrinsic_244] = CheckRuntimeObject("vec_vpkuwum"))) return 0; + if (!(intrinsics[Intrinsic_245] = CheckRuntimeObject("vec_vpkpx"))) return 0; + if (!(intrinsics[Intrinsic_246] = CheckRuntimeObject("vec_vpkuhus"))) return 0; + if (!(intrinsics[Intrinsic_247] = CheckRuntimeObject("vec_vpkshss"))) return 0; + if (!(intrinsics[Intrinsic_248] = CheckRuntimeObject("vec_vpkuwus"))) return 0; + if (!(intrinsics[Intrinsic_249] = CheckRuntimeObject("vec_vpkswss"))) return 0; + if (!(intrinsics[Intrinsic_250] = CheckRuntimeObject("vec_vpkshus"))) return 0; + if (!(intrinsics[Intrinsic_251] = CheckRuntimeObject("vec_vpkswus"))) return 0; + if (!(intrinsics[Intrinsic_252] = CheckRuntimeObject("vec_vperm"))) return 0; + if (!(intrinsics[Intrinsic_253] = CheckRuntimeObject("vec_vrefp"))) return 0; + if (!(intrinsics[Intrinsic_254] = CheckRuntimeObject("vec_vrlb"))) return 0; + if (!(intrinsics[Intrinsic_255] = CheckRuntimeObject("vec_vrlh"))) return 0; + if (!(intrinsics[Intrinsic_256] = CheckRuntimeObject("vec_vrlw"))) return 0; + if (!(intrinsics[Intrinsic_257] = CheckRuntimeObject("vec_vrfin"))) return 0; + if (!(intrinsics[Intrinsic_258] = CheckRuntimeObject("vec_vrsqrtefp"))) return 0; + if (!(intrinsics[Intrinsic_259] = CheckRuntimeObject("vec_vsel"))) return 0; + if (!(intrinsics[Intrinsic_260] = CheckRuntimeObject("vec_vslb"))) return 0; + if (!(intrinsics[Intrinsic_261] = CheckRuntimeObject("vec_vslh"))) return 0; + if (!(intrinsics[Intrinsic_262] = CheckRuntimeObject("vec_vslw"))) return 0; + if (!(intrinsics[Intrinsic_263] = CheckRuntimeObject("vec_vsldoi"))) return 0; + if (!(intrinsics[Intrinsic_264] = CheckRuntimeObject("vec_vsl"))) return 0; + if (!(intrinsics[Intrinsic_265] = CheckRuntimeObject("vec_vslo"))) return 0; + if (!(intrinsics[Intrinsic_266] = CheckRuntimeObject("vec_vspltb"))) return 0; + if (!(intrinsics[Intrinsic_267] = CheckRuntimeObject("vec_vsplth"))) return 0; + if (!(intrinsics[Intrinsic_268] = CheckRuntimeObject("vec_vspltw"))) return 0; + if (!(intrinsics[Intrinsic_269] = CheckRuntimeObject("vec_vspltisb"))) return 0; + if (!(intrinsics[Intrinsic_270] = CheckRuntimeObject("vec_vspltish"))) return 0; + if (!(intrinsics[Intrinsic_271] = CheckRuntimeObject("vec_vspltisw"))) return 0; + if (!(intrinsics[Intrinsic_272] = CheckRuntimeObject("vec_vsrb"))) return 0; + if (!(intrinsics[Intrinsic_273] = CheckRuntimeObject("vec_vsrh"))) return 0; + if (!(intrinsics[Intrinsic_274] = CheckRuntimeObject("vec_vsrw"))) return 0; + if (!(intrinsics[Intrinsic_275] = CheckRuntimeObject("vec_vsrab"))) return 0; + if (!(intrinsics[Intrinsic_276] = CheckRuntimeObject("vec_vsrah"))) return 0; + if (!(intrinsics[Intrinsic_277] = CheckRuntimeObject("vec_vsraw"))) return 0; + if (!(intrinsics[Intrinsic_278] = CheckRuntimeObject("vec_vsr"))) return 0; + if (!(intrinsics[Intrinsic_279] = CheckRuntimeObject("vec_vsro"))) return 0; + if (!(intrinsics[Intrinsic_280] = CheckRuntimeObject("vec_stvx"))) return 0; + if (!(intrinsics[Intrinsic_281] = CheckRuntimeObject("vec_stvebx"))) return 0; + if (!(intrinsics[Intrinsic_282] = CheckRuntimeObject("vec_stvehx"))) return 0; + if (!(intrinsics[Intrinsic_283] = CheckRuntimeObject("vec_stvewx"))) return 0; + if (!(intrinsics[Intrinsic_284] = CheckRuntimeObject("vec_stvxl"))) return 0; + if (!(intrinsics[Intrinsic_285] = CheckRuntimeObject("vec_vsububm"))) return 0; + if (!(intrinsics[Intrinsic_286] = CheckRuntimeObject("vec_vsubuhm"))) return 0; + if (!(intrinsics[Intrinsic_287] = CheckRuntimeObject("vec_vsubuwm"))) return 0; + if (!(intrinsics[Intrinsic_288] = CheckRuntimeObject("vec_vsubfp"))) return 0; + if (!(intrinsics[Intrinsic_289] = CheckRuntimeObject("vec_vsubcuw"))) return 0; + if (!(intrinsics[Intrinsic_290] = CheckRuntimeObject("vec_vsububs"))) return 0; + if (!(intrinsics[Intrinsic_291] = CheckRuntimeObject("vec_vsubsbs"))) return 0; + if (!(intrinsics[Intrinsic_292] = CheckRuntimeObject("vec_vsubuhs"))) return 0; + if (!(intrinsics[Intrinsic_293] = CheckRuntimeObject("vec_vsubshs"))) return 0; + if (!(intrinsics[Intrinsic_294] = CheckRuntimeObject("vec_vsubuws"))) return 0; + if (!(intrinsics[Intrinsic_295] = CheckRuntimeObject("vec_vsubsws"))) return 0; + if (!(intrinsics[Intrinsic_296] = CheckRuntimeObject("vec_vsum4ubs"))) return 0; + if (!(intrinsics[Intrinsic_297] = CheckRuntimeObject("vec_vsum4sbs"))) return 0; + if (!(intrinsics[Intrinsic_298] = CheckRuntimeObject("vec_vsum4shs"))) return 0; + if (!(intrinsics[Intrinsic_299] = CheckRuntimeObject("vec_vsum2sws"))) return 0; + if (!(intrinsics[Intrinsic_300] = CheckRuntimeObject("vec_vsumsws"))) return 0; + if (!(intrinsics[Intrinsic_301] = CheckRuntimeObject("vec_vrfiz"))) return 0; + if (!(intrinsics[Intrinsic_302] = CheckRuntimeObject("vec_vupkhsb"))) return 0; + if (!(intrinsics[Intrinsic_303] = CheckRuntimeObject("vec_vupklsb"))) return 0; + if (!(intrinsics[Intrinsic_304] = CheckRuntimeObject("vec_vupkhpx"))) return 0; + if (!(intrinsics[Intrinsic_305] = CheckRuntimeObject("vec_vupklpx"))) return 0; + if (!(intrinsics[Intrinsic_306] = CheckRuntimeObject("vec_vupkhsh"))) return 0; + if (!(intrinsics[Intrinsic_307] = CheckRuntimeObject("vec_vupklsh"))) return 0; + if (!(intrinsics[Intrinsic_308] = CheckRuntimeObject("vec_vxor"))) return 0; + if (!(intrinsics[Intrinsic_309] = CheckRuntimeObject("vec_abs"))) return 0; + if (!(intrinsics[Intrinsic_310] = CheckRuntimeObject("vec_abss"))) return 0; + } + + return 1; +} + +Boolean Intrinsics_IsPublicRuntimeObject(Object *object) { + int i; + + for (i = 0; i < MaxIntrinsics; i++) { + if (object == intrinsics[i]) + return 1; + } + + return 0; +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Operands.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Operands.c new file mode 100644 index 0000000..c5da285 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Operands.c @@ -0,0 +1,1040 @@ +#include "compiler/Operands.h" +#include "compiler/CError.h" +#include "compiler/CMachine.h" +#include "compiler/CParser.h" +#include "compiler/CodeGen.h" +#include "compiler/CompilerTools.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/RegisterInfo.h" +#include "compiler/Registers.h" +#include "compiler/StackFrame.h" +#include "compiler/TOC.h" +#include "compiler/enode.h" +#include "compiler/objects.h" + +unsigned long long uns_to_float_cc = 0x4330000000000000; +unsigned long long int_to_float_cc = 0x4330000080000000; +Float one_point_zero = {1.0}; + +void load_immediate(short reg, SInt32 value) { + short tmpreg = reg; + short tmpreg2; + + if (!FITS_IN_SHORT(value)) { + if (copts.optimizationlevel > 1 && LOW_PART_BUGGY(value)) + tmpreg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_LIS, tmpreg2 = tmpreg, 0, (short) HIGH_PART(value)); + if (LOW_PART_BUGGY(value)) + emitpcode(PC_ADDI, reg, tmpreg2, 0, LOW_PART(value)); + } else { + emitpcode(PC_LI, reg, value); + } +} + +static void set_op_flags(Operand *op, ENode *expr) { + CError_ASSERT(118, op); + + if (expr) { + if (expr->type == EINTCONST) { + op->flags = 0; + if (expr->flags & ENODE_FLAG_VOLATILE) + op->flags |= OpndFlags_Volatile; + if (expr->flags & ENODE_FLAG_CONST) + op->flags |= OpndFlags_Const; + } else { + op->flags = CParserIsVolatileExpr(expr) ? OpndFlags_Volatile : 0; + op->flags |= CParserIsConstExpr(expr) ? OpndFlags_Const : 0; + } + } else { + op->flags = 0; + } +} + +void symbol_operand(Operand *op, Object *obj) { + memclrw(op, sizeof(Operand)); + op->optype = OpndType_Symbol; + op->object = obj; +} + +void indirect(Operand *op, ENode *expr) { + switch (op->optype) { + case OpndType_GPRPair: + CError_FATAL(163); + case OpndType_CRField: + case OpndType_IndirectGPR_ImmOffset: + case OpndType_IndirectGPR_Indexed: + case OpndType_IndirectSymbol: + if (op->optype) + Coerce_to_register(op, TYPE(&void_ptr), 0); + case OpndType_GPR: + op->immOffset = 0; + op->object = NULL; + case OpndType_GPR_ImmOffset: + op->optype = OpndType_IndirectGPR_ImmOffset; + set_op_flags(op, expr); + break; + case OpndType_GPR_Indexed: + op->optype = OpndType_IndirectGPR_Indexed; + set_op_flags(op, expr); + break; + case OpndType_Absolute: + if (FITS_IN_SHORT(op->immediate)) { + op->reg = 0; + op->immOffset = op->immediate; + } else { + emitpcode(PC_LIS, op->reg = used_virtual_registers[RegClass_GPR]++, 0, (short) HIGH_PART(op->immediate)); + op->immOffset = LOW_PART(op->immediate); + } + op->object = NULL; + op->optype = OpndType_IndirectGPR_ImmOffset; + set_op_flags(op, expr); + break; + case OpndType_Symbol: + op->optype = OpndType_IndirectSymbol; + set_op_flags(op, expr); + break; + default: + CError_FATAL(215); + } +} + +#define COMBO_OP(a, b) (b + (a * 11)) + +void combine(Operand *opA, Operand *opB, short output_reg, Operand *opOut) { + Operand *tmp_op; + int tmp; + + if (opA->optype == OpndType_Symbol || opA->optype == OpndType_IndirectSymbol) + coerce_to_addressable(opA); + if (opB->optype == OpndType_Symbol || opB->optype == OpndType_IndirectSymbol) + coerce_to_addressable(opB); + + switch (COMBO_OP(opA->optype, opB->optype)) { + case COMBO_OP(OpndType_GPR, OpndType_GPR): + opOut->optype = OpndType_GPR_Indexed; + opOut->reg = opA->reg; + opOut->regOffset = opB->reg; + break; + case COMBO_OP(OpndType_GPR_ImmOffset, OpndType_GPR_ImmOffset): + if (FITS_IN_SHORT(opA->immOffset + opB->immOffset) && (!opA->object || !opB->object)) { + opB->immOffset += opA->immOffset; + if (!opB->object) + opB->object = opA->object; + } else { + tmp = (output_reg && (output_reg != opB->reg)) ? output_reg : used_virtual_registers[RegClass_GPR]++; + add_immediate(tmp, opA->reg, opA->object, opA->immOffset); + opA->reg = tmp; + } + case COMBO_OP(OpndType_GPR, OpndType_GPR_ImmOffset): + tmp_op = opA; + opA = opB; + opB = tmp_op; + case COMBO_OP(OpndType_GPR_ImmOffset, OpndType_GPR): + if (opA->reg == _FP_ || opA->reg == _CALLER_SP_) { + opOut->optype = OpndType_GPR_Indexed; + opOut->reg = (output_reg && (output_reg != opB->reg)) ? output_reg : used_virtual_registers[RegClass_GPR]++; + opOut->regOffset = opB->reg; + add_immediate(opOut->reg, opA->reg, opA->object, LOW_PART(opA->immOffset)); + } else if (opB->reg == _FP_ || opB->reg == _CALLER_SP_) { + opOut->optype = OpndType_GPR_Indexed; + opOut->reg = (output_reg && (output_reg != opA->reg)) ? output_reg : used_virtual_registers[RegClass_GPR]++; + opOut->regOffset = opA->reg; + add_immediate(opOut->reg, opB->reg, opA->object, LOW_PART(opA->immOffset)); + } else if (opA->object) { + opOut->optype = OpndType_GPR_Indexed; + opOut->reg = (output_reg && (output_reg != opB->reg)) ? output_reg : used_virtual_registers[RegClass_GPR]++; + opOut->regOffset = opB->reg; + add_immediate(opOut->reg, opA->reg, opA->object, LOW_PART(opA->immOffset)); + } else { + opOut->optype = OpndType_GPR_ImmOffset; + opOut->reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + opOut->immOffset = opA->immOffset; + opOut->object = opA->object; + emitpcode(PC_ADD, opOut->reg, opA->reg, opB->reg); + } + break; + case COMBO_OP(OpndType_GPR, OpndType_GPR_Indexed): + tmp_op = opA; + opA = opB; + opB = tmp_op; + case COMBO_OP(OpndType_GPR_Indexed, OpndType_GPR): + opOut->optype = OpndType_GPR_Indexed; + opOut->reg = opA->reg; + opOut->regOffset = (output_reg && (output_reg != opA->reg)) ? output_reg : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_ADD, opOut->regOffset, opA->regOffset, opB->reg); + break; + case COMBO_OP(OpndType_GPR_ImmOffset, OpndType_GPR_Indexed): + tmp_op = opA; + opA = opB; + opB = tmp_op; + case COMBO_OP(OpndType_GPR_Indexed, OpndType_GPR_ImmOffset): + if (opB->object) { + opOut->optype = OpndType_GPR_Indexed; + opOut->reg = (output_reg && (output_reg != opB->reg)) ? output_reg + : used_virtual_registers[RegClass_GPR]++; + opOut->regOffset = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_ADD, opOut->reg, opA->reg, opA->regOffset); + add_immediate(opOut->regOffset, opB->reg, opB->object, opB->immOffset); + } else { + opOut->optype = OpndType_GPR_ImmOffset; + opOut->immOffset = opB->immOffset; + opOut->object = opB->object; + opOut->reg = (output_reg && (output_reg != opB->reg)) ? output_reg + : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_ADD, opOut->reg, opA->reg, opA->regOffset); + emitpcode(PC_ADD, opOut->reg, opOut->reg, opB->reg); + } + break; + case COMBO_OP(OpndType_GPR_Indexed, OpndType_GPR_Indexed): + opOut->optype = OpndType_GPR_Indexed; + opOut->reg = opA->reg; + opOut->regOffset = (output_reg && (output_reg != opA->regOffset)) ? output_reg + : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_ADD, opOut->regOffset, opB->reg, opB->regOffset); + emitpcode(PC_ADD, opOut->regOffset, opOut->regOffset, opA->regOffset); + break; + case COMBO_OP(OpndType_GPR_ImmOffset, OpndType_Absolute): + tmp_op = opA; + opA = opB; + opB = tmp_op; + case COMBO_OP(OpndType_Absolute, OpndType_GPR_ImmOffset): + if (!opB->object) { + opOut->optype = OpndType_GPR_ImmOffset; + opOut->reg = opB->reg; + opOut->immOffset = opB->immOffset; + opOut->object = opB->object; + if (FITS_IN_SHORT(opOut->immOffset + opA->immediate)) { + opOut->immOffset += opA->immediate; + } else { + opOut->reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + if (!HIGH_PART(opA->immediate)) { + emitpcode(PC_ADDI, opOut->reg, opB->reg, 0, LOW_PART(opA->immediate)); + } else { + emitpcode(PC_ADDIS, opOut->reg, opB->reg, 0, (short) HIGH_PART(opA->immediate)); + if (FITS_IN_SHORT(opOut->immOffset + LOW_PART(opA->immediate))) { + opOut->immOffset += LOW_PART(opA->immediate); + } else { + emitpcode(PC_ADDI, opOut->reg, opOut->reg, 0, LOW_PART(opA->immediate)); + } + } + } + break; + } else if (opB->object->datatype == DLOCAL && can_add_displ_to_local(opB->object, opB->immOffset + opA->immediate)) { + opOut->optype = OpndType_GPR_ImmOffset; + opOut->object = opB->object; + opOut->reg = opB->reg; + opOut->immOffset = LOW_PART(opB->immOffset + opA->immediate); + break; + } else { + opOut->reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + add_immediate(opOut->reg, opB->reg, opB->object, opB->immOffset); + opB->optype = OpndType_GPR; + opB->reg = opOut->reg; + tmp_op = opA; + opA = opB; + opB = tmp_op; + } + case COMBO_OP(OpndType_GPR, OpndType_Absolute): + tmp_op = opA; + opA = opB; + opB = tmp_op; + case COMBO_OP(OpndType_Absolute, OpndType_GPR): + opOut->optype = (opA->immediate != 0) ? OpndType_GPR_ImmOffset : OpndType_GPR; + opOut->immOffset = LOW_PART(opA->immediate); + opOut->object = NULL; + if (FITS_IN_SHORT(opA->immediate)) { + opOut->reg = opB->reg; + } else { + opOut->reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_ADDIS, opOut->reg, opB->reg, 0, (short) HIGH_PART(opA->immediate)); + } + break; + case COMBO_OP(OpndType_GPR_Indexed, OpndType_Absolute): + tmp_op = opA; + opA = opB; + opB = tmp_op; + case COMBO_OP(OpndType_Absolute, OpndType_GPR_Indexed): + opOut->optype = OpndType_GPR_Indexed; + opOut->reg = opB->reg; + opOut->regOffset = (output_reg && (output_reg != opB->reg)) ? output_reg : used_virtual_registers[RegClass_GPR]++; + if (!HIGH_PART(opA->immediate)) { + emitpcode(PC_ADDI, opOut->regOffset, opB->regOffset, 0, LOW_PART(opA->immediate)); + } else { + emitpcode(PC_ADDIS, opOut->regOffset, opB->regOffset, 0, (short) HIGH_PART(opA->immediate)); + if (LOW_PART_BUGGY(opA->immediate)) + emitpcode(PC_ADDI, opOut->regOffset, opOut->regOffset, 0, LOW_PART(opA->immediate)); + } + break; + case COMBO_OP(OpndType_Absolute, OpndType_Absolute): + opOut->optype = OpndType_Absolute; + opOut->immediate = opA->immediate + opB->immediate; + break; + default: + CError_FATAL(415); + } +} + +void coerce_to_addressable(Operand *op) { + UInt32 offset; + short reg; + short flag; + short tmp; + Object *obj; + + flag = 0; + obj = op->object; + tmp = 0; + + switch (op->optype) { + case OpndType_GPR: + case OpndType_GPR_ImmOffset: + case OpndType_GPR_Indexed: + case OpndType_GPRPair: + case OpndType_Absolute: + case OpndType_VR: + case OpndType_CRField: + case OpndType_IndirectGPR_ImmOffset: + case OpndType_IndirectGPR_Indexed: + break; + case OpndType_IndirectSymbol: + flag = 1; + case OpndType_Symbol: + if (obj->datatype == DLOCAL) { + if (!local_is_16bit_offset(obj)) { + reg = used_virtual_registers[RegClass_GPR]++; + op_absolute_ha(reg, local_base_register(obj), obj, 0, 1); + op->optype = OpndType_GPR_ImmOffset; + op->reg = reg; + op->object = obj; + } else { + op->optype = OpndType_GPR_ImmOffset; + op->reg = local_base_register(obj); + op->object = obj; + } + } else if (obj->datatype == DABSOLUTE) { + offset = obj->u.address; + if (FITS_IN_SHORT(offset)) { + op->reg = 0; + op->immOffset = obj->u.address; + } else { + emitpcode(PC_LIS, op->reg = used_virtual_registers[RegClass_GPR]++, 0, (short) HIGH_PART(offset)); + op->immOffset = LOW_PART(obj->u.address); + } + op->object = obj; + op->optype = OpndType_GPR_ImmOffset; + } else { + if (copts.codegen_pic) + tmp = pic_base_reg; + reg = used_virtual_registers[RegClass_GPR]++; + op_absolute_ha(reg, tmp, obj, 0, 1); + op->optype = OpndType_GPR_ImmOffset; + op->reg = reg; + } + if (flag) { + if (op->optype == OpndType_GPR_ImmOffset) { + op->optype = OpndType_IndirectGPR_ImmOffset; + } else { + CError_FATAL(563); + } + } + break; + default: + CError_FATAL(581); + } +} + +void Coerce_to_register(Operand *op, Type *type, short output_reg) { + SInt32 offset; + Opcode opcode; + short reg; + short tmp; + short cond_neg; + short cond; + short bit_offset; + short bit_size; + + if (TYPE_IS_8BYTES(type)) { + coerce_to_register_pair(op, type, output_reg, 0); + return; + } + + coerce_to_addressable(op); + switch (op->optype) { + case OpndType_GPRPair: + return; + case OpndType_GPR: + return; + case OpndType_GPR_ImmOffset: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + add_immediate(reg, op->reg, op->object, op->immOffset); + break; + case OpndType_GPR_Indexed: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_ADD, reg, op->reg, op->regOffset); + break; + case OpndType_Absolute: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + offset = op->immediate; + if (FITS_IN_SHORT(offset)) { + emitpcode(PC_LI, reg, offset); + } else { + tmp = reg; + if (copts.optimizationlevel > 1 && LOW_PART_BUGGY(offset)) + tmp = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_LIS, tmp, 0, (short) HIGH_PART(offset)); + if (LOW_PART_BUGGY(offset)) + emitpcode(PC_ADDI, reg, tmp, 0, LOW_PART(offset)); + } + break; + case OpndType_IndirectGPR_ImmOffset: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + opcode = PC_LWZ; + if (IS_TYPE_INT(type) || IS_TYPE_ENUM(type)) { + switch (type->size) { + case 1: + opcode = PC_LBZ; + break; + case 2: + if (is_unsigned(type)) + opcode = PC_LHZ; + else + opcode = PC_LHA; + break; + } + } else { + CError_ASSERT(680, IS_TYPE_POINTER(type) || IS_TYPE_4BYTES_MEMBERPOINTER(type)); + } + load_store_register(opcode, reg, op->reg, op->object, op->immOffset); + setpcodeflags(op->flags); + break; + case OpndType_IndirectGPR_Indexed: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + opcode = PC_LWZX; + if (IS_TYPE_INT(type) || IS_TYPE_ENUM(type)) { + switch (type->size) { + case 1: + opcode = PC_LBZX; + break; + case 2: + if (is_unsigned(type)) + opcode = PC_LHZX; + else + opcode = PC_LHAX; + break; + } + } else { + CError_ASSERT(724, IS_TYPE_POINTER(type) || IS_TYPE_4BYTES_MEMBERPOINTER(type)); + } + emitpcode(opcode, reg, op->reg, op->regOffset); + setpcodeflags(op->flags); + break; + case OpndType_CRField: + cond_neg = 0; + cond = 0; + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_MFCR, tmp = reg); + switch (op->regOffset) { + case ENOTEQU: + cond_neg = 1; + case EEQU: + cond = 2; + break; + case EGREATEREQU: + cond_neg = 1; + case ELESS: + cond = 0; + break; + case ELESSEQU: + cond_neg = 1; + case EGREATER: + cond = 1; + break; + default: + CError_FATAL(758); + } + bit_offset = cond + (op->reg << 2); + bit_size = 1; + emitpcode(PC_RLWINM, tmp, tmp, (bit_size + bit_offset) & 31, 32 - bit_size, 31); + if (cond_neg) + emitpcode(PC_XORI, tmp, tmp, 1); + break; + default: + CError_FATAL(769); + } + + op->optype = OpndType_GPR; + op->reg = reg; +} + +void coerce_to_register_pair(Operand *op, Type *type, short output_reg, short output_regHi) { + SInt32 offset; + short reg; + short regHi; + short tmp1; + short tmp2; + + regHi = -1; + + CError_ASSERT(794, TYPE_IS_8BYTES(type) || (IS_TYPE_STRUCT(type) && type->size == 8)); + + coerce_to_addressable(op); + switch (op->optype) { + case OpndType_GPRPair: + if (output_reg && !output_regHi) + output_regHi = used_virtual_registers[RegClass_GPR]++; + if (output_regHi && !output_reg) + output_reg = used_virtual_registers[RegClass_GPR]++; + if (op->reg != output_reg || op->regHi != output_regHi) { + tmp1 = output_reg ? output_reg : op->reg; + tmp2 = output_regHi ? output_regHi : op->regHi; + if (tmp1 != op->reg) { + if (tmp1 == op->regHi) { + CError_ASSERT(818, tmp1 != tmp2); + emitpcode(PC_MR, tmp2, op->regHi); + emitpcode(PC_MR, tmp1, op->reg); + } else { + emitpcode(PC_MR, tmp1, op->reg); + if (op->regHi != tmp2) + emitpcode(PC_MR, tmp2, op->regHi); + } + } else if (tmp2 != op->regHi) { + if (tmp2 == op->reg) { + CError_ASSERT(832, tmp1 != tmp2); + emitpcode(PC_MR, tmp1, op->reg); + emitpcode(PC_MR, tmp2, op->regHi); + } else { + emitpcode(PC_MR, tmp2, op->regHi); + if (op->reg != tmp1) + emitpcode(PC_MR, tmp1, op->reg); + } + } + } + reg = op->reg; + regHi = op->regHi; + break; + case OpndType_GPR: + CError_FATAL(849); + break; + case OpndType_GPR_ImmOffset: + CError_FATAL(852); + break; + case OpndType_GPR_Indexed: + CError_FATAL(855); + break; + case OpndType_Absolute: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + offset = op->immediate; + if (FITS_IN_SHORT(offset)) { + emitpcode(PC_LI, reg, offset); + } else { + tmp1 = reg; + if (copts.optimizationlevel > 1 && LOW_PART_BUGGY(offset)) + tmp1 = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_LIS, tmp1, 0, (short) HIGH_PART(offset)); + if (LOW_PART_BUGGY(offset)) + emitpcode(PC_ADDI, reg, tmp1, 0, LOW_PART(offset)); + } + regHi = output_regHi ? output_regHi : used_virtual_registers[RegClass_GPR]++; + if (is_unsigned(type) || offset >= 0) + load_immediate(regHi, 0); + else + load_immediate(regHi, -1); + break; + case OpndType_IndirectGPR_ImmOffset: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + regHi = output_regHi ? output_regHi : used_virtual_registers[RegClass_GPR]++; + if (op->reg == regHi) { + if (op->reg == reg) { + CError_FATAL(887); + } else { + load_store_register(PC_LWZ, reg, op->reg, op->object, op->immOffset + low_offset); + setpcodeflags(op->flags); + load_store_register(PC_LWZ, regHi, op->reg, op->object, op->immOffset + high_offset); + setpcodeflags(op->flags); + } + } else { + load_store_register(PC_LWZ, regHi, op->reg, op->object, op->immOffset + high_offset); + setpcodeflags(op->flags); + load_store_register(PC_LWZ, reg, op->reg, op->object, op->immOffset + low_offset); + setpcodeflags(op->flags); + } + break; + case OpndType_IndirectGPR_Indexed: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + regHi = output_regHi ? output_regHi : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_ADD, reg, op->reg, op->regOffset); + load_store_register(PC_LWZ, regHi, reg, NULL, high_offset); + setpcodeflags(op->flags); + load_store_register(PC_LWZ, reg, reg, NULL, low_offset); + setpcodeflags(op->flags); + break; + default: + CError_FATAL(912); + } + + if (regHi == -1) { + CError_FATAL(916); + } else { + op->optype = OpndType_GPRPair; + op->reg = reg; + op->regHi = regHi; + } +} + +void Coerce_to_fp_register(Operand *op, Type *type, short output_reg) { + short reg; + + coerce_to_addressable(op); + + switch (op->optype) { + case OpndType_FPR: + reg = op->reg; + break; + case OpndType_IndirectGPR_ImmOffset: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_FPR]++; + load_store_register((type->size == 4) ? PC_LFS : PC_LFD, reg, op->reg, op->object, op->immOffset); + setpcodeflags(op->flags); + break; + case OpndType_IndirectGPR_Indexed: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_FPR]++; + emitpcode((type->size == 4) ? PC_LFSX : PC_LFDX, reg, op->reg, op->regOffset, 0, 0x390); + setpcodeflags(op->flags); + break; + default: + CError_FATAL(986); + } + + op->optype = OpndType_FPR; + op->reg = reg; +} + +void Coerce_to_v_register(Operand *op, Type *type, short output_reg) { + short reg; + + coerce_to_addressable(op); + + switch (op->optype) { + case OpndType_VR: + reg = op->reg; + break; + case OpndType_IndirectGPR_ImmOffset: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_VR]++; + load_store_register(PC_LVX, reg, op->reg, op->object, op->immOffset); + setpcodeflags(op->flags); + break; + case OpndType_IndirectGPR_Indexed: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_VR]++; + emitpcode(PC_LVX, reg, op->reg, op->regOffset); + setpcodeflags(op->flags); + break; + case OpndType_Absolute: + reg = output_reg ? output_reg : used_virtual_registers[RegClass_VR]++; + switch (TYPE_STRUCT(type)->stype) { + case STRUCT_VECTOR_UCHAR: + case STRUCT_VECTOR_SCHAR: + case STRUCT_VECTOR_BCHAR: + emitpcode(PC_VSPLTISB, reg, op->immediate); + break; + case STRUCT_VECTOR_USHORT: + case STRUCT_VECTOR_SSHORT: + case STRUCT_VECTOR_BSHORT: + case STRUCT_VECTOR_PIXEL: + emitpcode(PC_VSPLTISH, reg, op->immediate); + break; + case STRUCT_VECTOR_UINT: + case STRUCT_VECTOR_SINT: + case STRUCT_VECTOR_BINT: + case STRUCT_VECTOR_FLOAT: + emitpcode(PC_VSPLTISW, reg, op->immediate); + break; + default: + CError_FATAL(1049); + } + op->optype = OpndType_VR; + op->reg = reg; + setpcodeflags(op->flags); + break; + default: + CError_FATAL(1059); + } + + op->optype = OpndType_VR; + op->reg = reg; +} + +void store(short reg, Operand *op, Type *type) { + Opcode opcode; + + coerce_to_addressable(op); + switch (op->optype) { + case OpndType_IndirectGPR_ImmOffset: + opcode = PC_STW; + if (IS_TYPE_INT(type) || IS_TYPE_ENUM(type)) { + switch (type->size) { + case 1: + opcode = PC_STB; + break; + case 2: + opcode = PC_STH; + break; + } + } else { + CError_ASSERT(1171, IS_TYPE_POINTER(type) || IS_TYPE_4BYTES_MEMBERPOINTER(type)); + } + load_store_register(opcode, reg, op->reg, op->object, op->immOffset); + setpcodeflags(op->flags); + break; + case OpndType_IndirectGPR_Indexed: + opcode = PC_STWX; + if (IS_TYPE_INT(type) || IS_TYPE_ENUM(type)) { + switch (type->size) { + case 1: + opcode = PC_STBX; + break; + case 2: + opcode = PC_STHX; + break; + } + } else { + CError_ASSERT(1188, IS_TYPE_POINTER(type) || IS_TYPE_4BYTES_MEMBERPOINTER(type)); + } + emitpcode(opcode, reg, op->reg, op->regOffset); + setpcodeflags(op->flags); + break; + default: + CError_FATAL(1193); + } +} + +void store_pair(short reg, short regHi, Operand *op, Type *type) { + short tmp; + + CError_ASSERT(1208, TYPE_IS_8BYTES(type)); + + coerce_to_addressable(op); + switch (op->optype) { + case OpndType_IndirectGPR_ImmOffset: + load_store_register(PC_STW, reg, op->reg, op->object, op->immOffset + low_offset); + setpcodeflags(op->flags); + load_store_register(PC_STW, regHi, op->reg, op->object, op->immOffset + high_offset); + setpcodeflags(op->flags); + break; + case OpndType_IndirectGPR_Indexed: + tmp = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_ADD, tmp, op->reg, op->regOffset); + load_store_register(PC_STW, reg, tmp, NULL, low_offset); + setpcodeflags(op->flags); + load_store_register(PC_STW, regHi, tmp, NULL, high_offset); + setpcodeflags(op->flags); + break; + default: + CError_FATAL(1228); + } +} + +void store_fp(short reg, Operand *op, Type *type) { + coerce_to_addressable(op); + switch (op->optype) { + case OpndType_IndirectGPR_ImmOffset: + load_store_register((type->size == 4) ? PC_STFS : PC_STFD, reg, op->reg, op->object, op->immOffset); + setpcodeflags(op->flags); + break; + case OpndType_IndirectGPR_Indexed: + emitpcode((type->size == 4) ? PC_STFSX : PC_STFDX, reg, op->reg, op->regOffset); + setpcodeflags(op->flags); + break; + default: + CError_FATAL(1259); + } +} + +void store_v(short reg, Operand *op, Type *tstruct) { + coerce_to_addressable(op); + switch (op->optype) { + case OpndType_IndirectGPR_ImmOffset: + load_store_register(PC_STVX, reg, op->reg, op->object, op->immOffset); + setpcodeflags(op->flags); + break; + case OpndType_IndirectGPR_Indexed: + emitpcode(PC_STVX, reg, op->reg, op->regOffset); + setpcodeflags(op->flags); + break; + default: + CError_FATAL(1283); + } +} + +static Boolean last_matches_rlwinm_or_exts(Operand *op, short opcode, short b, short c) { + PCode *pc; + + if (pclastblock->pcodeCount <= 0) + return 0; + + pc = pclastblock->lastPCode; + if (pc->args[0].kind != PCOp_REGISTER || pc->args[0].arg != RegClass_GPR || pc->args[0].data.reg.reg != op->reg) + return 0; + + if (pc->op != opcode && (opcode != PC_EXTSH || pc->op != PC_EXTSB)) + return 0; + + if (opcode == PC_RLWINM) { + if (pc->args[2].data.imm.value != 0 || pc->args[3].data.imm.value != b || pc->args[4].data.imm.value != c) + return 0; + } + + return 1; +} + +void extend32(Operand *op, Type *type, short output_reg) { + int r28; + int reg; + + r28 = op->optype >= OpndType_IndirectGPR_ImmOffset; + if (op->optype != OpndType_GPR) + Coerce_to_register(op, type, output_reg); + + switch (type->size) { + case 1: + if (is_unsigned(type)) { + if (r28) + return; + if (last_matches_rlwinm_or_exts(op, PC_RLWINM, 24, 31)) + return; + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_RLWINM, reg, op->reg, 0, 24, 31); + } else { + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + if (last_matches_rlwinm_or_exts(op, PC_EXTSB, 0, 0)) + return; + emitpcode(PC_EXTSB, reg, op->reg); + } + break; + case 2: + if (r28) + return; + if (is_unsigned(type)) { + if (last_matches_rlwinm_or_exts(op, PC_RLWINM, 16, 31)) + return; + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_RLWINM, reg, op->reg, 0, 16, 31); + } else { + if (last_matches_rlwinm_or_exts(op, PC_EXTSH, 0, 0)) + return; + reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_EXTSH, reg, op->reg); + } + break; + default: + CError_FATAL(1389); + } + + op->optype = OpndType_GPR; + op->reg = reg; +} + +void extend64(Operand *op, Type *type, short output_reg, short output_regHi) { + short tmp; + short regHi; + + if (op->optype != OpndType_GPR) + Coerce_to_register(op, type, output_reg); + + regHi = output_regHi ? output_regHi : used_virtual_registers[RegClass_GPR]++; + if (regHi == op->reg) { + tmp = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_MR, tmp, op->reg); + op->reg = tmp; + } + + if (is_unsigned(type)) + load_immediate(regHi, 0); + else + emitpcode(PC_SRAWI, regHi, op->reg, 31); + + op->optype = OpndType_GPRPair; + op->regHi = regHi; +} + +void load_floating_constant(short reg, Type *type, Float *data) { + Object *object; + Object *indObject; + Operand op1; + Operand op2; + Operand op3; + SInt32 offset = 0; + + memclrw(&op1, sizeof(Operand)); + + object = CreateFloatConst(type, data, &offset); + indObject = createIndirect(object, 0, 1); + + if (indObject) { + symbol_operand(&op1, indObject); + indirect(&op1, NULL); + } else { + symbol_operand(&op1, object); + } + + if (offset) { + op2 = op1; + memclrw(&op3, sizeof(Operand)); + op3.optype = OpndType_Absolute; + op3.immediate = offset; + if (op2.optype != OpndType_GPR) + Coerce_to_register(&op2, TYPE(&void_ptr), 0); + combine(&op2, &op3, 0, &op1); + } + + indirect(&op1, NULL); + if (op1.optype != OpndType_FPR) + Coerce_to_fp_register(&op1, type, reg); +} + +void convert_integer_to_floating(Operand *op, Boolean is_single, short output_reg) { + Operand temp_op; + Float d; + int const_reg; + int tmp_reg; + int work_reg; + int result_reg; + Opcode opcode; + + symbol_operand(&temp_op, maketemporary(TYPE(&stdouble))); + coerce_to_addressable(&temp_op); + d.value = *((double *) &int_to_float_cc); + + const_reg = used_virtual_registers[RegClass_FPR]++; + load_floating_constant(const_reg, TYPE(&stdouble), &d); + + tmp_reg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_XORIS, tmp_reg, op->reg, 0x8000); + load_store_register(PC_STW, tmp_reg, temp_op.reg, temp_op.object, low_offset); + + emitpcode(PC_LIS, tmp_reg = used_virtual_registers[RegClass_GPR]++, 0, 0x4330); + load_store_register(PC_STW, tmp_reg, temp_op.reg, temp_op.object, high_offset); + + load_store_register(PC_LFD, work_reg = used_virtual_registers[RegClass_FPR]++, temp_op.reg, temp_op.object, 0); + + result_reg = output_reg ? output_reg : used_virtual_registers[RegClass_FPR]++; + if (is_single != 0) + opcode = PC_FSUBS; + else + opcode = PC_FSUB; + emitpcode(opcode, result_reg, work_reg, const_reg); + + op->optype = OpndType_FPR; + op->reg = result_reg; +} + +void convert_unsigned_to_floating(Operand *op, Boolean is_single, short output_reg) { + Operand temp_op; + Float d; + int const_reg; + int tmp_reg; + int work_reg; + int result_reg; + Opcode opcode; + + symbol_operand(&temp_op, maketemporary(TYPE(&stdouble))); + coerce_to_addressable(&temp_op); + d.value = *((double *) &uns_to_float_cc); + + const_reg = used_virtual_registers[RegClass_FPR]++; + load_floating_constant(const_reg, TYPE(&stdouble), &d); + + load_store_register(PC_STW, op->reg, temp_op.reg, temp_op.object, low_offset); + + emitpcode(PC_LIS, tmp_reg = used_virtual_registers[RegClass_GPR]++, 0, 0x4330); + load_store_register(PC_STW, tmp_reg, temp_op.reg, temp_op.object, high_offset); + + load_store_register(PC_LFD, work_reg = used_virtual_registers[RegClass_FPR]++, temp_op.reg, temp_op.object, 0); + + result_reg = output_reg ? output_reg : used_virtual_registers[RegClass_FPR]++; + if (is_single != 0) + opcode = PC_FSUBS; + else + opcode = PC_FSUB; + emitpcode(opcode, result_reg, work_reg, const_reg); + + op->optype = OpndType_FPR; + op->reg = result_reg; +} + +void convert_floating_to_integer(Operand *op, short output_reg) { + Operand temp_op; + int tmp_reg; + int result_reg; + + symbol_operand(&temp_op, maketemporary(TYPE(&stdouble))); + coerce_to_addressable(&temp_op); + + tmp_reg = used_virtual_registers[RegClass_FPR]++; + emitpcode(PC_FCTIWZ, tmp_reg, op->reg); + load_store_register(PC_STFD, tmp_reg, temp_op.reg, temp_op.object, 0); + + result_reg = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + load_store_register(PC_LWZ, result_reg, temp_op.reg, temp_op.object, low_offset); + + op->optype = OpndType_GPR; + op->reg = result_reg; +} + +void convert_floating_to_unsigned(Operand *op, short output_reg) { + static UInt32 used_regs[RegClassMax] = {0, 0, 0, 2, 0}; + + if (op->reg != 1) + emitpcode(PC_FMR, 1, op->reg); + + branch_subroutine(rt_cvt_fp2unsigned, 0, used_regs); + + op->optype = OpndType_GPR; + op->reg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_MR, op->reg, 3); +} + +void extract_bitfield(Operand *input_op, TypeBitfield *tbitfield, short output_reg, Operand *output_op) { + int r27; + int offset; + int tmp_reg; + int output; + + offset = tbitfield->bitlength; + output = output_reg ? output_reg : used_virtual_registers[RegClass_GPR]++; + r27 = tbitfield->offset + (32 - (tbitfield->bitfieldtype->size * 8)); + if (is_unsigned(tbitfield->bitfieldtype)) { + emitpcode(PC_RLWINM, output, input_op->reg, (r27 + offset) & 31, 32 - offset, 31); + } else if (r27 == 0) { + emitpcode(PC_SRAWI, output, input_op->reg, 32 - offset); + } else { + tmp_reg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_RLWINM, tmp_reg, input_op->reg, r27 & 31, 0, offset); + emitpcode(PC_SRAWI, output, tmp_reg, 32 - offset); + } + + output_op->optype = OpndType_GPR; + output_op->reg = output; +} + +void insert_bitfield(short reg, Operand *op, TypeBitfield *tbitfield) { + int offset = tbitfield->bitlength; + int r7 = tbitfield->offset + (32 - (tbitfield->bitfieldtype->size * 8)); + emitpcode(PC_RLWIMI, op->reg, reg, 32 - (r7 + offset), r7, r7 + offset - 1); +} + +void load_address(short dest_reg, Operand *op) { + coerce_to_addressable(op); + if (op->optype == OpndType_IndirectGPR_ImmOffset) { + if (!op->immOffset && !op->object) { + if (op->reg != dest_reg) + emitpcode(PC_MR, dest_reg, op->reg); + } else { + add_immediate(dest_reg, op->reg, op->object, (SInt16) op->immOffset); + } + } else if (op->optype == OpndType_IndirectGPR_Indexed) { + emitpcode(PC_ADD, dest_reg, op->reg, op->regOffset); + } else { + CError_FATAL(1849); + } +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/PCodeAssembly.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/PCodeAssembly.c new file mode 100644 index 0000000..368f8c5 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/PCodeAssembly.c @@ -0,0 +1,1613 @@ +#include "compiler/PCodeAssembly.h" +#include "compiler/CError.h" +#include "compiler/CFunc.h" +#include "compiler/CMangler.h" +#include "compiler/CParser.h" +#include "compiler/CodeGen.h" +#include "compiler/ObjGenMachO.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/PCodeListing.h" +#include "compiler/PPCError.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StackFrame.h" +#include "compiler/TOC.h" +#include "compiler/objects.h" + +static UInt32 codebase; + +static SInt32 pcode_update_mem_labeldiff_imm(PCode *instr, const PCodeArg *op, WeirdOperand *wop) { + SInt32 offset; + Object *object; + UInt8 arg; + + if (op->kind == PCOp_MEMORY) { + object = op->data.mem.obj; + offset = op->data.mem.offset; + switch (object->datatype) { + case DLOCAL: + switch ((UInt8) op->arg) { + case RefType_1: + offset += local_offset_16(object); + break; + case RefType_D: + offset = local_offset_lo(object, offset); + break; + case RefType_C: + offset = local_offset_ha(object, offset); + break; + default: + CError_FATAL(83); + } + break; + case DDATA: + case DFUNC: + case DVFUNC: + case DNONLAZYPTR: + switch ((UInt8) op->arg) { + case RefType_6: + wop->type = MW_RELOC_5_LO16; + break; + case RefType_2: + wop->type = MW_RELOC_3; + break; + case RefType_3: + wop->type = MW_RELOC_4; + break; + case RefType_8: + wop->type = MW_RELOC_7_HA16; + break; + case RefType_7: + wop->type = MW_RELOC_6_HI16; + break; + default: + CError_FATAL(135); + } + + wop->x2 = object; + CError_ASSERT(144, offset == 0); + break; + default: + CError_FATAL(164); + } + } else if (op->kind == PCOp_LABELDIFF) { + arg = op->arg; + + offset = op->data.labeldiff.labelA->block->codeOffset - op->data.labeldiff.labelB->block->codeOffset; + offset += op->data.labeldiff.offset; + if (arg == 1) + offset = -offset; + + if (offset > 0x7FFF) + PPCError_Error(PPCErrorStr109); + else if (offset < -0x8000) + PPCError_Error(PPCErrorStr109); + } else if (op->kind == PCOp_IMMEDIATE) { + offset = op->data.imm.value; + } else { + CError_FATAL(193); + } + + return offset; +} + +UInt32 assemblepcode(PCode *instr, UInt32 offset, WeirdOperand *wop) { + UInt32 bits; + + bits = opcodeinfo[instr->op].insn; + wop->type = -1; + wop->x6 = 0; + + switch (instr->op) { + case PC_BL: { + int flag = PCODE_FLAG_SET_T(instr) & fAbsolute; + if (instr->args[0].kind == PCOp_MEMORY) { + bits |= instr->args[0].data.mem.offset & 0x3FFFFFC; + wop->type = MW_RELOC_2_BR24; + wop->x2 = instr->args[0].data.mem.obj; + if (flag == 0) + wop->type = MW_RELOC_2_BR24; + else + CError_FATAL(246); + } else if (instr->args[0].kind == PCOp_IMMEDIATE) { + bits |= instr->args[0].data.imm.value & 0x3FFFFFC; + if (flag) + bits |= 2; + } else { + bits |= (instr->args[0].data.label.label->block->codeOffset - offset) & 0x3FFFFFC; + if (flag) + CError_FATAL(261); + } + break; + } + + case PC_B: { + int flag = PCODE_FLAG_SET_T(instr) & fAbsolute; + if (instr->args[0].kind == PCOp_MEMORY) { + bits |= instr->args[0].data.mem.offset & 0x3FFFFFC; + wop->x2 = instr->args[0].data.mem.obj; + if (flag == 0) + wop->type = MW_RELOC_2_BR24; + else + CError_FATAL(288); + } else if (instr->args[0].kind == PCOp_IMMEDIATE) { + bits |= instr->args[0].data.imm.value & 0x3FFFFFC; + if (flag) + bits |= 2; + } else { + bits |= (instr->args[0].data.label.label->block->codeOffset - offset) & 0x3FFFFFC; + if (flag) + CError_FATAL(302); + } + if (PCODE_FLAG_SET_T(instr) & fLink) + bits |= 1; + break; + } + + case PC_BDNZ: + case PC_BDZ: { + int flag = PCODE_FLAG_SET_T(instr) & fAbsolute; + if (instr->args[0].kind == PCOp_MEMORY) { + bits |= instr->args[0].data.mem.offset & 0xFFFC; + wop->x2 = instr->args[0].data.mem.obj; + if (flag == 0) + wop->type = MW_RELOC_8; + else + CError_FATAL(333); + } else { + SInt32 value; + if (instr->args[0].kind == PCOp_IMMEDIATE) + value = instr->args[0].data.imm.value; + else + value = instr->args[0].data.label.label->block->codeOffset - offset; + + bits |= value & 0xFFFF; + if (value < 0) { + if (PCODE_FLAG_SET_T(instr) & fBranchNotTaken) + bits |= 0x200000u; + } else { + if (PCODE_FLAG_SET_T(instr) & fBranchTaken) + bits |= 0x200000u; + } + } + if (PCODE_FLAG_SET_T(instr) & fLink) + bits |= 1; + break; + } + + case PC_BC: { + int flag = PCODE_FLAG_SET_T(instr) & fAbsolute; + bits |= (instr->args[0].data.imm.value & 31) << 21; + bits |= ((instr->args[1].data.reg.reg * 4 + instr->args[2].data.imm.value) & 31) << 16; + + if (instr->args[3].kind == PCOp_MEMORY) { + bits |= instr->args[3].data.mem.offset & 0xFFFC; + wop->x2 = instr->args[3].data.mem.obj; + if (flag == 0) + wop->type = MW_RELOC_8; + else + CError_FATAL(387); + } else { + SInt32 value; + if (instr->args[3].kind == PCOp_IMMEDIATE) + value = instr->args[3].data.imm.value; + else + value = instr->args[3].data.label.label->block->codeOffset - offset; + + bits |= value & 0xFFFF; + if (value < 0) { + if (PCODE_FLAG_SET_T(instr) & fBranchNotTaken) + bits |= 0x200000u; + } else { + if (PCODE_FLAG_SET_T(instr) & fBranchTaken) + bits |= 0x200000u; + } + } + if (PCODE_FLAG_SET_T(instr) & fLink) + bits |= 1; + break; + } + + case PC_BT: + case PC_BF: + case PC_BDNZT: + case PC_BDNZF: + case PC_BDZT: + case PC_BDZF: { + int flag = PCODE_FLAG_SET_T(instr) & fAbsolute; + bits |= ((instr->args[0].data.reg.reg * 4 + instr->args[1].data.imm.value) & 31) << 16; + + if (instr->args[2].kind == PCOp_MEMORY) { + bits |= instr->args[2].data.mem.offset & 0xFFFC; + wop->x2 = instr->args[2].data.mem.obj; + if (flag == 0) + wop->type = MW_RELOC_8; + else + CError_FATAL(446); + } else { + SInt32 value; + if (instr->args[2].kind == PCOp_IMMEDIATE) { + value = instr->args[2].data.imm.value; + if (flag) + bits |= 2; + } else { + value = instr->args[2].data.label.label->block->codeOffset - offset; + CError_ASSERT(458, !flag); + } + + bits |= value & 0xFFFF; + + if (value < 0) { + if (PCODE_FLAG_SET_T(instr) & fBranchNotTaken) + bits |= 0x200000u; + } else { + if (PCODE_FLAG_SET_T(instr) & fBranchTaken) + bits |= 0x200000u; + } + } + + if (PCODE_FLAG_SET_T(instr) & fLink) + bits |= 1; + break; + } + + case PC_BTLR: + case PC_BTCTR: + case PC_BFLR: + case PC_BFCTR: + bits |= ((instr->args[0].data.reg.reg * 4 + instr->args[1].data.imm.value) & 31) << 16; + if (PCODE_FLAG_SET_T(instr) & fLink) + bits |= 1; + if (PCODE_FLAG_SET_T(instr) & fBranchTaken) + bits |= 0x200000u; + break; + + case PC_BCLR: + case PC_BCCTR: + bits |= instr->args[0].data.imm.value << 21; + bits |= ((instr->args[1].data.reg.reg * 4 + instr->args[2].data.imm.value) & 31) << 16; + case PC_BLR: + case PC_BCTR: + case PC_BCTRL: + case PC_BLRL: + if (PCODE_FLAG_SET_T(instr) & fLink) + bits |= 1; + if (PCODE_FLAG_SET_T(instr) & fBranchTaken) + bits |= 0x200000u; + break; + + case PC_CRAND: + case PC_CRANDC: + case PC_CREQV: + case PC_CRNAND: + case PC_CRNOR: + case PC_CROR: + case PC_CRORC: + case PC_CRXOR: + bits |= ((instr->args[0].data.reg.reg * 4 + instr->args[1].data.imm.value) & 31) << 21; + bits |= ((instr->args[2].data.reg.reg * 4 + instr->args[3].data.imm.value) & 31) << 16; + bits |= ((instr->args[4].data.reg.reg * 4 + instr->args[5].data.imm.value) & 31) << 11; + break; + + case PC_MCRF: + bits |= instr->args[0].data.reg.reg << 23; + bits |= instr->args[1].data.reg.reg << 18; + break; + + case PC_LBZ: + case PC_LBZU: + case PC_LHZ: + case PC_LHZU: + case PC_LHA: + case PC_LHAU: + case PC_LWZ: + case PC_LWZU: + case PC_LMW: + case PC_STB: + case PC_STBU: + case PC_STH: + case PC_STHU: + case PC_STW: + case PC_STWU: + case PC_STMW: + case PC_LFS: + case PC_LFSU: + case PC_LFD: + case PC_LFDU: + case PC_STFS: + case PC_STFSU: + case PC_STFD: + case PC_STFDU: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= pcode_update_mem_labeldiff_imm(instr, &instr->args[2], wop) & 0xFFFF; + break; + + case PC_LBZX: + case PC_LBZUX: + case PC_LHZX: + case PC_LHZUX: + case PC_LHAX: + case PC_LHAUX: + case PC_LHBRX: + case PC_LWZX: + case PC_LWZUX: + case PC_LWBRX: + case PC_STBX: + case PC_STBUX: + case PC_STHX: + case PC_STHUX: + case PC_STHBRX: + case PC_STWX: + case PC_STWUX: + case PC_STWBRX: + case PC_LFSX: + case PC_LFSUX: + case PC_LFDX: + case PC_LFDUX: + case PC_STFSX: + case PC_STFSUX: + case PC_STFDX: + case PC_STFDUX: + case PC_LWARX: + case PC_LSWX: + case PC_STFIWX: + case PC_STSWX: + case PC_STWCX: + case PC_ECIWX: + case PC_ECOWX: + case PC_DCREAD: + case PC_TLBSX: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_DCBF: + case PC_DCBST: + case PC_DCBT: + case PC_DCBTST: + case PC_DCBZ: + case PC_DCBI: + case PC_ICBI: + case PC_DCCCI: + case PC_ICBT: + case PC_ICCCI: + case PC_ICREAD: + case PC_DCBA: + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 11; + break; + + case PC_ADD: + case PC_ADDC: + case PC_ADDE: + case PC_DIVW: + case PC_DIVWU: + case PC_MULHW: + case PC_MULHWU: + case PC_MULLW: + case PC_SUBF: + case PC_SUBFC: + case PC_SUBFE: + bits |= instr->args[2].data.reg.reg << 11; + case PC_ADDME: + case PC_ADDZE: + case PC_NEG: + case PC_SUBFME: + case PC_SUBFZE: + case PC_MFROM: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + if (PCODE_FLAG_SET_F(instr) & fOverflow) + bits |= 0x400; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_ADDI: + case PC_ADDIC: + case PC_ADDICR: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= pcode_update_mem_labeldiff_imm(instr, &instr->args[2], wop) & 0xFFFF; + break; + + case PC_ADDIS: + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[0].data.reg.reg << 21; + bits |= pcode_update_mem_labeldiff_imm(instr, &instr->args[2], wop) & 0xFFFF; + break; + + case PC_MULLI: + case PC_SUBFIC: + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[2].data.imm.value & 0xFFFF; + break; + + case PC_LI: + case PC_LIS: + bits |= instr->args[0].data.reg.reg << 21; + bits |= pcode_update_mem_labeldiff_imm(instr, &instr->args[1], wop) & 0xFFFF; + break; + + case PC_ANDI: + case PC_ANDIS: + case PC_ORI: + case PC_ORIS: + case PC_XORI: + case PC_XORIS: + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 21; + bits |= pcode_update_mem_labeldiff_imm(instr, &instr->args[2], wop) & 0xFFFF; + break; + + case PC_AND: + case PC_OR: + case PC_XOR: + case PC_NAND: + case PC_NOR: + case PC_EQV: + case PC_ANDC: + case PC_ORC: + case PC_SLW: + case PC_SRW: + case PC_SRAW: + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 21; + bits |= instr->args[2].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_EXTSH: + case PC_EXTSB: + case PC_CNTLZW: + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 21; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_MR: + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_NOT: + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_SRAWI: + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 21; + bits |= (instr->args[2].data.imm.value & 31) << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_RLWINM: + case PC_RLWIMI: + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 21; + bits |= (instr->args[2].data.imm.value & 31) << 11; + bits |= (instr->args[3].data.imm.value & 31) << 6; + bits |= (instr->args[4].data.imm.value & 31) << 1; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_RLWNM: + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 21; + bits |= instr->args[2].data.reg.reg << 11; + bits |= (instr->args[3].data.imm.value & 31) << 6; + bits |= (instr->args[4].data.imm.value & 31) << 1; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_CMP: + case PC_CMPL: + bits |= instr->args[0].data.reg.reg << 23; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + break; + + case PC_CMPI: + case PC_CMPLI: + bits |= instr->args[0].data.reg.reg << 23; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.imm.value & 0xFFFF; + break; + + case PC_MTXER: + case PC_MTCTR: + case PC_MTLR: + case PC_MTMSR: + case PC_MFMSR: + case PC_MFXER: + case PC_MFCTR: + case PC_MFLR: + case PC_MFCR: + bits |= instr->args[0].data.reg.reg << 21; + break; + + case PC_MFFS: + bits |= instr->args[0].data.reg.reg << 21; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_MTCRF: + bits |= instr->args[0].data.imm.value << 12; + bits |= instr->args[1].data.reg.reg << 21; + break; + + case PC_MTFSF: + bits |= (instr->args[0].data.imm.value & 0xFF) << 17; + bits |= instr->args[1].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_FMR: + case PC_FABS: + case PC_FNEG: + case PC_FNABS: + case PC_FRES: + case PC_FRSQRTE: + case PC_FRSP: + case PC_FCTIW: + case PC_FCTIWZ: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_FADD: + case PC_FADDS: + case PC_FSUB: + case PC_FSUBS: + case PC_FDIV: + case PC_FDIVS: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_FMADD: + case PC_FMADDS: + case PC_FMSUB: + case PC_FMSUBS: + case PC_FNMADD: + case PC_FNMADDS: + case PC_FNMSUB: + case PC_FNMSUBS: + case PC_FSEL: + bits |= instr->args[3].data.reg.reg << 11; + case PC_FMUL: + case PC_FMULS: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 6; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_FCMPU: + case PC_FCMPO: + bits |= instr->args[0].data.reg.reg << 23; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + break; + + case PC_MTSPR: + if (instr->args[0].kind == PCOp_REGISTER) { + CError_ASSERT(1027, instr->args[0].arg == RegClass_SPR); + CError_ASSERT(1028, instr->args[0].data.reg.reg < 4); + bits |= ((spr_to_sysreg[instr->args[0].data.reg.reg] & 0x1F) << 16) + + ((spr_to_sysreg[instr->args[0].data.reg.reg] & 0x3E0) << 6); + } else if (instr->args[0].kind == PCOp_SYSREG && instr->args[0].arg == 0) { + bits |= ((instr->args[0].data.reg.reg & 0x1F) << 16) + + ((instr->args[0].data.reg.reg & 0x3E0) << 6); + } else { + CError_FATAL(1033); + } + + bits |= instr->args[1].data.reg.reg << 21; + break; + + case PC_MTDCR: + if (instr->args[0].kind == PCOp_IMMEDIATE) { + bits |= ((instr->args[0].data.imm.value & 0x1F) << 16) + + ((instr->args[0].data.imm.value & 0x3E0) << 6); + } else { + CError_FATAL(1042); + } + + bits |= instr->args[1].data.reg.reg << 21; + break; + + case PC_MFSPR: + bits |= instr->args[0].data.reg.reg << 21; + + if (instr->args[1].kind == PCOp_REGISTER && instr->args[1].arg == RegClass_SPR) { + CError_ASSERT(1055, instr->args[1].data.reg.reg < 4); + bits |= ((spr_to_sysreg[instr->args[1].data.reg.reg] & 0x1F) << 16) + + ((spr_to_sysreg[instr->args[1].data.reg.reg] & 0x3E0) << 6); + } else if (instr->args[1].kind == PCOp_SYSREG && instr->args[1].arg == 0) { + bits |= ((instr->args[1].data.reg.reg & 0x1F) << 16) + + ((instr->args[1].data.reg.reg & 0x3E0) << 6); + } else { + CError_FATAL(1060); + } + break; + + case PC_MFDCR: + bits |= instr->args[0].data.reg.reg << 21; + + if (instr->args[1].kind == PCOp_IMMEDIATE) { + bits |= ((instr->args[1].data.imm.value & 0x1F) << 16) + + ((instr->args[1].data.imm.value & 0x3E0) << 6); + } else { + CError_FATAL(1069); + } + break; + + case PC_LSWI: + case PC_STSWI: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= (instr->args[2].data.imm.value & 31) << 11; + break; + + case PC_MCRFS: + bits |= (instr->args[1].data.imm.value & 7) << 18; + case PC_MCRXR: + bits |= instr->args[0].data.reg.reg << 23; + break; + + case PC_MFTB: + bits |= instr->args[0].data.reg.reg << 21; + if (instr->args[1].kind == PCOp_SYSREG && instr->args[1].arg == 0) { + if (instr->args[1].data.reg.reg == 284) + bits |= 0xC4000u; + else if (instr->args[1].data.reg.reg == 285) + bits |= 0xD4000u; + else + CError_FATAL(1100); + } else { + CError_FATAL(1103); + } + break; + + case PC_MTSR: + bits |= instr->args[1].data.reg.reg << 21; + bits |= (instr->args[0].data.imm.value & 15) << 16; + break; + + case PC_MFSR: + bits |= instr->args[0].data.reg.reg << 21; + bits |= (instr->args[1].data.imm.value & 15) << 16; + break; + + case PC_MFSRIN: + case PC_MTSRIN: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 11; + break; + + case PC_MTFSB0: + case PC_MTFSB1: + bits |= (instr->args[0].data.imm.value & 31) << 21; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_MTFSFI: + bits |= instr->args[0].data.reg.reg << 23; + bits |= (instr->args[1].data.imm.value & 15) << 12; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_FSQRT: + case PC_FSQRTS: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_TLBIE: + case PC_TLBLD: + case PC_TLBLI: + bits |= instr->args[0].data.reg.reg << 11; + break; + + case PC_TW: + bits |= (instr->args[0].data.imm.value & 31) << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + break; + + case PC_TWI: + bits |= (instr->args[0].data.imm.value & 31) << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.imm.value & 0xFFFF; + break; + + case PC_OPWORD: + CError_ASSERT(1176, instr->args[0].kind != PCOp_MEMORY); + bits = pcode_update_mem_labeldiff_imm(instr, &instr->args[0], wop); + break; + + case PC_MASKG: + case PC_MASKIR: + bits |= instr->args[1].data.reg.reg << 21; + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_LSCBX: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_DIV: + case PC_DIVS: + case PC_DOZ: + case PC_MUL: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fOverflow) + bits |= 0x400; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_NABS: + case PC_ABS: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + if (PCODE_FLAG_SET_F(instr) & fOverflow) + bits |= 0x400; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_CLCS: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_DOZI: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.imm.value & 0xFFFF; + break; + + case PC_RLMI: + bits |= instr->args[1].data.reg.reg << 21; + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + bits |= (instr->args[3].data.imm.value & 31) << 6; + bits |= (instr->args[4].data.imm.value & 31) << 1; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_SLE: + case PC_SLEQ: + case PC_SLLQ: + case PC_SLQ: + case PC_SRAQ: + case PC_SRE: + case PC_SREA: + case PC_SREQ: + case PC_SRLQ: + case PC_SRQ: + case PC_RRIB: + bits |= instr->args[1].data.reg.reg << 21; + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_SLIQ: + case PC_SLLIQ: + case PC_SRAIQ: + case PC_SRIQ: + case PC_SRLIQ: + bits |= instr->args[1].data.reg.reg << 21; + bits |= instr->args[0].data.reg.reg << 16; + bits |= (instr->args[2].data.imm.value & 31) << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 1; + break; + + case PC_TLBRE: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= (instr->args[2].data.imm.value & 1) << 11; + break; + + case PC_TLBWE: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= (instr->args[2].data.imm.value & 1) << 11; + break; + + case PC_WRTEE: + bits |= instr->args[0].data.reg.reg << 21; + break; + + case PC_WRTEEI: + bits |= instr->args[0].data.imm.value << 15; + break; + + case PC_DSTT: + case PC_DSTSTT: + bits |= 0x2000000u; + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 11; + bits |= (instr->args[2].data.imm.value & 3) << 21; + break; + + case PC_DST: + case PC_DSTST: + bits |= (instr->args[3].data.imm.value & 1) << 25; + bits |= instr->args[0].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 11; + bits |= (instr->args[2].data.imm.value & 3) << 21; + break; + + case PC_DSSALL: + bits |= 0x2000000u; + break; + + case PC_DSS: + bits |= (instr->args[1].data.imm.value & 1) << 25; + bits |= (instr->args[0].data.imm.value & 3) << 21; + break; + + case PC_LVEBX: + case PC_LVEHX: + case PC_LVEWX: + case PC_LVSL: + case PC_LVSR: + case PC_LVX: + case PC_LVXL: + case PC_STVEBX: + case PC_STVEHX: + case PC_STVEWX: + case PC_STVX: + case PC_STVXL: + case PC_VADDCUW: + case PC_VADDFP: + case PC_VADDSBS: + case PC_VADDSHS: + case PC_VADDSWS: + case PC_VADDUBM: + case PC_VADDUBS: + case PC_VADDUHM: + case PC_VADDUHS: + case PC_VADDUWM: + case PC_VADDUWS: + case PC_VAND: + case PC_VANDC: + case PC_VAVGSB: + case PC_VAVGSH: + case PC_VAVGSW: + case PC_VAVGUB: + case PC_VAVGUH: + case PC_VAVGUW: + case PC_VMAXFP: + case PC_VMAXSB: + case PC_VMAXSH: + case PC_VMAXSW: + case PC_VMAXUB: + case PC_VMAXUH: + case PC_VMAXUW: + case PC_VMINFP: + case PC_VMINSB: + case PC_VMINSH: + case PC_VMINSW: + case PC_VMINUB: + case PC_VMINUH: + case PC_VMINUW: + case PC_VMRGHB: + case PC_VMRGHH: + case PC_VMRGHW: + case PC_VMRGLB: + case PC_VMRGLH: + case PC_VMRGLW: + case PC_VMULESB: + case PC_VMULESH: + case PC_VMULEUB: + case PC_VMULEUH: + case PC_VMULOSB: + case PC_VMULOSH: + case PC_VMULOUB: + case PC_VMULOUH: + case PC_VNOR: + case PC_VOR: + case PC_VPKPX: + case PC_VPKSHSS: + case PC_VPKSHUS: + case PC_VPKSWSS: + case PC_VPKSWUS: + case PC_VPKUHUM: + case PC_VPKUHUS: + case PC_VPKUWUM: + case PC_VPKUWUS: + case PC_VRLB: + case PC_VRLH: + case PC_VRLW: + case PC_VSL: + case PC_VSLB: + case PC_VSLH: + case PC_VSLO: + case PC_VSLW: + case PC_VSR: + case PC_VSRAB: + case PC_VSRAH: + case PC_VSRAW: + case PC_VSRB: + case PC_VSRH: + case PC_VSRO: + case PC_VSRW: + case PC_VSUBCUW: + case PC_VSUBFP: + case PC_VSUBSBS: + case PC_VSUBSHS: + case PC_VSUBSWS: + case PC_VSUBUBM: + case PC_VSUBUBS: + case PC_VSUBUHM: + case PC_VSUBUHS: + case PC_VSUBUWM: + case PC_VSUBUWS: + case PC_VSUMSWS: + case PC_VSUM2SWS: + case PC_VSUM4SBS: + case PC_VSUM4SHS: + case PC_VSUM4UBS: + case PC_VXOR: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + break; + + case PC_VCFSX: + case PC_VCFUX: + case PC_VCTSXS: + case PC_VCTUXS: + case PC_VSPLTB: + case PC_VSPLTH: + case PC_VSPLTW: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 11; + bits |= (instr->args[2].data.imm.value & 31) << 16; + break; + + case PC_VEXPTEFP: + case PC_VLOGEFP: + case PC_VREFP: + case PC_VRFIM: + case PC_VRFIN: + case PC_VRFIP: + case PC_VRFIZ: + case PC_VRSQRTEFP: + case PC_VUPKHPX: + case PC_VUPKHSB: + case PC_VUPKHSH: + case PC_VUPKLPX: + case PC_VUPKLSB: + case PC_VUPKLSH: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 11; + break; + + case PC_VCMPBFP: + case PC_VCMPEQFP: + case PC_VCMPEQUB: + case PC_VCMPEQUH: + case PC_VCMPEQUW: + case PC_VCMPGEFP: + case PC_VCMPGTFP: + case PC_VCMPGTSB: + case PC_VCMPGTSH: + case PC_VCMPGTSW: + case PC_VCMPGTUB: + case PC_VCMPGTUH: + case PC_VCMPGTUW: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + bits |= 0x400; + break; + + case PC_VSPLTISB: + case PC_VSPLTISH: + case PC_VSPLTISW: + bits |= instr->args[0].data.reg.reg << 21; + bits |= (instr->args[1].data.imm.value & 31) << 16; + break; + + case PC_VMHADDSHS: + case PC_VMHRADDSHS: + case PC_VMLADDUHM: + case PC_VMSUMMBM: + case PC_VMSUMSHM: + case PC_VMSUMSHS: + case PC_VMSUMUBM: + case PC_VMSUMUHM: + case PC_VMSUMUHS: + case PC_VPERM: + case PC_VSEL: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + bits |= instr->args[3].data.reg.reg << 6; + break; + + case PC_VMADDFP: + case PC_VNMSUBFP: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 6; + bits |= instr->args[3].data.reg.reg << 11; + break; + + case PC_VSLDOI: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[2].data.reg.reg << 11; + bits |= (instr->args[3].data.imm.value & 15) << 6; + break; + + case PC_VMR: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 11; + break; + + case PC_VMRP: + bits |= instr->args[0].data.reg.reg << 21; + bits |= instr->args[1].data.reg.reg << 16; + bits |= instr->args[1].data.reg.reg << 11; + break; + + case PC_MFVSCR: + bits |= instr->args[0].data.reg.reg << 21; + break; + + case PC_MTVSCR: + bits |= instr->args[0].data.reg.reg << 11; + break; + + case PC_EIEIO: + case PC_ISYNC: + case PC_SYNC: + case PC_RFI: + case PC_NOP: + case PC_SC: + case PC_TLBIA: + case PC_TLBSYNC: + case PC_TRAP: + case PC_DSA: + case PC_ESA: + case PC_RFCI: + break; + + default: + CError_FATAL(2203); + } + + return CTool_EndianConvertWord32(bits); +} + +static PCode *targetinstruction(PCodeLabel *label) { + PCodeBlock *block = label->block; + while (block->pcodeCount == 0) + block = block->nextBlock; + return block->firstPCode; +} + +static void invertybit(PCode *instr, SInt32 value) { + if (instr->op == PC_BC) { + if (instr->args[0].data.imm.value & 1) { + if (value < 0) + instr->flags |= fBranchNotTaken; + else + instr->flags |= fBranchTaken; + } + } else if (instr->op == PC_BCCTR || instr->op == PC_BCLR) { + if (instr->args[0].data.imm.value & 1) + instr->flags |= fBranchTaken; + } + + if (PCODE_FLAG_SET_T(instr) & fBranchTaken) + instr->flags = (instr->flags & ~fBranchTaken) | fBranchNotTaken; + else if (PCODE_FLAG_SET_T(instr) & fBranchNotTaken) + instr->flags = (instr->flags & ~fBranchNotTaken) | fBranchTaken; + else if (value < 0) + instr->flags = (instr->flags & ~fBranchTaken) | fBranchNotTaken; + else + instr->flags = (instr->flags & ~fBranchNotTaken) | fBranchTaken; +} + +static void insertlongbranches(SInt32 mask) { + PCodeBlock *block; + PCodeLabel *label; + SInt32 i; + + i = 0; + for (block = pcbasicblocks; block; block = block->nextBlock) { + block->codeOffset = i; + if (block->pcodeCount) { + i += block->pcodeCount * 4; + if (block->pcodeCount && (block->lastPCode->flags & fIsBranch)) + i += 4; + } + } + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (block->pcodeCount && (block->lastPCode->flags & fIsBranch)) { + switch (block->lastPCode->op) { + case PC_BT: + case PC_BF: + i = block->codeOffset + ((block->pcodeCount - 1) * 4); + if (block->lastPCode->args[2].kind == PCOp_LABEL) { + label = block->lastPCode->args[2].data.label.label; + i = label->block->codeOffset - i; + if (i != ((SInt16) (i & mask))) { + block->lastPCode->op = (block->lastPCode->op == PC_BT) ? PC_BF : PC_BT; + block->lastPCode->args[2].data.label.label = block->nextBlock->labels; + invertybit(block->lastPCode, i); + appendpcode(block, makepcode(PC_B, label)); + } + } + break; + + case PC_BC: + i = block->codeOffset + ((block->pcodeCount - 1) * 4); + if (block->lastPCode->args[3].kind == PCOp_LABEL) { + label = block->lastPCode->args[3].data.label.label; + i = label->block->codeOffset - i; + invertybit(block->lastPCode, i); + if (i != ((SInt16) (i & mask))) { + switch (block->lastPCode->args[0].data.imm.value & 30) { + case 0: + case 2: + case 8: + case 10: + block->lastPCode->args[0].data.imm.value ^= 11; + block->lastPCode->args[3].data.label.label = block->nextBlock->labels; + break; + case 16: + case 18: + block->lastPCode->args[0].data.imm.value ^= 3; + block->lastPCode->args[3].data.label.label = block->nextBlock->labels; + break; + case 4: + case 12: + block->lastPCode->args[0].data.imm.value ^= 9; + block->lastPCode->args[3].data.label.label = block->nextBlock->labels; + break; + case 20: + deletepcode(block->lastPCode); + break; + default: + CError_FATAL(2368); + } + + appendpcode(block, makepcode(PC_B, label)); + } + } + break; + + case PC_BDNZ: + case PC_BDZ: + i = block->codeOffset + ((block->pcodeCount - 1) * 4); + if (block->lastPCode->args[0].kind == PCOp_LABEL) { + label = block->lastPCode->args[0].data.label.label; + i = label->block->codeOffset - i; + if (i != ((SInt16) (i & mask))) { + switch (block->lastPCode->op) { + case PC_BDZ: + block->lastPCode->op = PC_BDNZ; + break; + case PC_BDNZ: + block->lastPCode->op = PC_BDZ; + break; + default: + CError_FATAL(2389); + } + + block->lastPCode->args[0].data.label.label = block->nextBlock->labels; + invertybit(block->lastPCode, i); + appendpcode(block, makepcode(PC_B, label)); + } + } + break; + + case PC_BDNZT: + case PC_BDNZF: + case PC_BDZT: + case PC_BDZF: + i = block->codeOffset + ((block->pcodeCount - 1) * 4); + if (block->lastPCode->args[2].kind == PCOp_LABEL) { + label = block->lastPCode->args[2].data.label.label; + i = label->block->codeOffset - i; + if (i != ((SInt16) (i & mask))) { + switch (block->lastPCode->op) { + case PC_BDNZT: + block->lastPCode->op = PC_BDZF; + break; + case PC_BDNZF: + block->lastPCode->op = PC_BDZT; + break; + case PC_BDZT: + block->lastPCode->op = PC_BDNZF; + break; + case PC_BDZF: + block->lastPCode->op = PC_BDNZT; + break; + default: + CError_FATAL(2420); + } + + block->lastPCode->args[2].data.label.label = block->nextBlock->labels; + invertybit(block->lastPCode, i); + appendpcode(block, makepcode(PC_B, label)); + } + } + break; + + } + } + } +} + +SInt32 optimizefinalbranches(SInt32 codesize) { + PCodeBlock *block; + PCode *instr; + SInt32 offset; + int changed; + int deleted; + PCodeLabel *label; + PCode *target; + + do { + changed = deleted = 0; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (block->pcodeCount == 0) + continue; + + instr = block->lastPCode; + if (!(instr->flags & fIsBranch)) + continue; + + offset = block->codeOffset + (block->pcodeCount - 1) * 4; + + if (instr->op == PC_B && instr->args[0].kind == PCOp_LABEL) { + label = instr->args[0].data.label.label; + target = targetinstruction(label); + + if (label->block->codeOffset == (offset + 4)) { + deletepcode(instr); + changed = deleted = 1; + } else if (target->op == PC_B) { + if (target->args[0].kind == PCOp_LABEL && target->args[0].data.label.label != instr->args[0].data.label.label) { + instr->args[0].data.label.label = target->args[0].data.label.label; + changed = 1; + } + } else if (target->op == PC_BLR) { + instr->op = PC_BLR; + changed = 1; + } + continue; + } + + if ((instr->op == PC_BT || instr->op == PC_BF) && instr->args[2].kind == PCOp_LABEL) { + PCodeBlock *block2 = instr->block; + label = instr->args[2].data.label.label; + target = targetinstruction(label); + + if (label->block->codeOffset == (offset + 4)) { + deletepcode(instr); + changed = deleted = 1; + } else if (target->op == PC_B) { + if (target->args[0].kind == PCOp_LABEL && target->args[0].data.label.label != instr->args[2].data.label.label) { + instr->args[2].data.label.label = target->args[0].data.label.label; + changed = 1; + } + } else if (copts.opt_bcc_lr_ctr) { + if (target->op == PC_BLR) { + if (instr->op == PC_BT) + instr->op = PC_BTLR; + else + instr->op = PC_BFLR; + instr->argCount = 2; + changed = 1; + } else if (target->op == PC_BCTR) { + if (instr->op == PC_BT) + instr->op = PC_BTCTR; + else + instr->op = PC_BFCTR; + instr->argCount = 2; + changed = 1; + } else if ( + block2->nextBlock && + block2->nextBlock->firstPCode && + block2->nextBlock->firstPCode->op == PC_BLR && + label->block->codeOffset == (offset + 8) + ) { + if ( + block2->nextBlock->predecessors && + block2->nextBlock->predecessors->block == block2 && + !block2->nextBlock->predecessors->nextLink + ) { + if (instr->op == PC_BT) + instr->op = PC_BFLR; + else + instr->op = PC_BTLR; + change_num_operands(instr, 2); + deletepcode(block2->nextBlock->firstPCode); + changed = deleted = 1; + } + } else if ( + block2->nextBlock && + block2->nextBlock->firstPCode && + block2->nextBlock->firstPCode->op == PC_BCTR && + label->block->codeOffset == (offset + 8) + ) { + if ( + block2->nextBlock->predecessors && + block2->nextBlock->predecessors->block == block2 && + !block2->nextBlock->predecessors->nextLink + ) { + if (instr->op == PC_BT) + instr->op = PC_BFCTR; + else + instr->op = PC_BTCTR; + change_num_operands(instr, 2); + deletepcode(block2->nextBlock->firstPCode); + changed = deleted = 1; + } + } + } + continue; + } + + if ( + instr->op == PC_BC && + instr->args[3].kind == PCOp_LABEL && + !(PCODE_FLAG_SET_T(instr) & (fSideEffects | fLink)) + ) + { + PCodeBlock *block2 = instr->block; + label = instr->args[3].data.label.label; + target = targetinstruction(label); + + if (label->block->codeOffset == (offset + 4)) { + deletepcode(instr); + changed = deleted = 1; + } else if (target->op == PC_B) { + if (target->args[0].kind == PCOp_LABEL && target->args[0].data.label.label != instr->args[3].data.label.label) { + instr->args[3].data.label.label = target->args[0].data.label.label; + changed = 1; + } + } else if (copts.opt_bcc_lr_ctr) { + if (target->op == PC_BLR) { + instr->op = PC_BCLR; + instr->argCount = 3; + changed = 1; + } else if (target->op == PC_BCTR) { + instr->op = PC_BCCTR; + instr->argCount = 3; + changed = 1; + } else if ( + block2->nextBlock && + block2->nextBlock->firstPCode && + block2->nextBlock->firstPCode->op == PC_BLR && + label->block->codeOffset == (offset + 8) + ) { + SInt32 val = instr->args[0].data.imm.value & 30; + if ( + block2->nextBlock->predecessors && + block2->nextBlock->predecessors->block == block2 && + !block2->nextBlock->predecessors->nextLink + ) { + if ((val & 30) == 4) + instr->args[0].data.imm.value = val | 12; + else if ((val & 30) == 12) + instr->args[0].data.imm.value = val & 23; + instr->op = PC_BCLR; + instr->argCount = 3; + deletepcode(block2->nextBlock->firstPCode); + changed = deleted = 1; + } + } else if ( + block2->nextBlock && + block2->nextBlock->firstPCode && + block2->nextBlock->firstPCode->op == PC_BCTR && + label->block->codeOffset == (offset + 8) + ) { + SInt32 val = instr->args[0].data.imm.value & 30; + if ( + block2->nextBlock->predecessors && + block2->nextBlock->predecessors->block == block2 && + !block2->nextBlock->predecessors->nextLink + ) { + if ((val & 30) == 4) + instr->args[0].data.imm.value = val | 12; + else if ((val & 30) == 12) + instr->args[0].data.imm.value = val & 23; + instr->op = PC_BCCTR; + instr->argCount = 3; + deletepcode(block2->nextBlock->firstPCode); + changed = deleted = 1; + } + } + } + } + } + + if (deleted) + codesize = pccomputeoffsets(); + } while (changed); + + return codesize; +} + +static SInt32 insert_align_nops(Object *func, SInt32 codesize) { + PCodeBlock *block; + int changed; + PCodeBlock *prev; + + do { + changed = 0; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if ( + (block->flags & fPCBlockFlag6000) == fPCBlockFlag2000 && + (block->codeOffset & 7) && + block->pcodeCount < 8 && + (prev = block->prevBlock) && + !(prev->flags & fPCBlockFlag2000) + ) + { + if (prev->lastPCode && prev->lastPCode->op == PC_NOP && !(prev->lastPCode->flags & fSideEffects)) { + deletepcode(prev->lastPCode); + } else { + PCode *nop = makepcode(PC_NOP); + nop->flags &= ~fSideEffects; + appendpcode(prev, nop); + } + + codesize = pccomputeoffsets(); + changed = 1; + } + } + + if (changed) { + pclistblocks(CMangler_GetLinkName(func)->name, "AFTER INSERT ALIGN NOPs"); + if (codesize > 32766) { + insertlongbranches(32766); + codesize = pccomputeoffsets(); + } + } + + } while (changed); + + return codesize; +} + +SInt32 assemblefunction(Object *func, EntryPoint *entrypoints) { + void *tbdata; + GList *gl; + PCodeBlock *block; + PCode *instr; + SInt32 offset2; + SInt32 codesize; + SInt32 tbsize; + SInt32 offset; + SectionHandle section; + EntryPoint *ep; + WeirdOperand wop; + + codesize = pccomputeoffsets(); + if (codesize <= 0) + PPCError_Error(PPCErrorStr190, func->name->name); + + if (copts.peephole || copts.optimizationlevel >= 3) + codesize = optimizefinalbranches(codesize); + + if (codesize > 32766) { + insertlongbranches(32766); + codesize = pccomputeoffsets(); + } + + if (copts.function_align > 4) + codesize = insert_align_nops(func, codesize); + + tbsize = 0; + if (copts.traceback) + tbdata = generate_traceback(codesize, CMangler_GetLinkName(func)->name, &tbsize, func); + + if (func->section == SECT_DEFAULT) + func->section = SECT_TEXT; + + offset = tbsize; + section = ObjGen_DeclareCode(func, codesize + tbsize); + gl = ObjGen_GetSectionGList(section); + + codebase = gl->size; + AppendGListNoData(gl, codesize + tbsize); + + if (copts.filesyminfo) { + ObjGen_SymFunc(func); + ObjGen_Line(functionbodyoffset, 0); + ObjGen_DeclareSymInfo(); + } + + if (uses_globals && pic_base_reg) + ObjGen_DeclarePICBase(func, pic_base_pcodelabel->block->codeOffset); + + if (entrypoints) { + for (ep = entrypoints; ep; ep = ep->next) { + ObjGen_DeclareEntry(ep->object, ep->block->codeOffset); + } + } + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (offset2 = block->codeOffset, instr = block->firstPCode; instr; instr = instr->nextPCode, offset2 += 4) { + if (copts.filesyminfo && instr->sourceoffset != -1) + ObjGen_Line(instr->sourceoffset, offset2); + + *((UInt32 *) (*gl->data + codebase + offset2)) = assemblepcode(instr, offset2, &wop); + + if (wop.type != -1) + ObjGen_RelocateObj(section, offset2, wop.x2, wop.type); + } + } + + if (copts.filesyminfo) + ObjGenMach_SymFuncEnd(func, codesize); + + if (copts.traceback) + memcpy(*gl->data + codebase + codesize, tbdata, offset); + + if (copts.traceback) + return codesize + tbsize; + else + return codesize; +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/PCodeListing.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/PCodeListing.c new file mode 100644 index 0000000..7a9fa13 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/PCodeListing.c @@ -0,0 +1,536 @@ +#include "compiler/PCodeListing.h" +#include "compiler/CError.h" +#include "compiler/CMangler.h" +#include "compiler/CParser.h" +#include "compiler/Alias.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/InterferenceGraph.h" +#include "compiler/LiveInfo.h" +#include "compiler/PCode.h" +#include "compiler/PCodeAssembly.h" +#include "compiler/Registers.h" +#include "compiler/Scheduler.h" +#include "compiler/objects.h" + +static FILE *pcfile; +static int ptime; +static int sourcetext; +static int sourcetext_is_main; +static int sourcelength; +int pclist_bad_operand; + +static void formatdataflowset(char *name, UInt32 *vec, UInt32 size, char *format) { + UInt32 i; + UInt32 counter; + char *separator; + + separator = ""; + fprintf(pcfile, "%s = {", name); + + for (i = 0, counter = 0; i < size; i++) { + if (bitvectorgetbit(i, vec)) { + if (i) + fprintf(pcfile, separator); + if (counter++ == 10) { + fprintf(pcfile, "\n\t\t"); + counter = 0; + } + fprintf(pcfile, format, i); + separator = ","; + } + } + + fprintf(pcfile, "}\n"); +} + +static void pclistblock(PCodeBlock *block, char *format, UInt32 vecSize) { + PCLink *link; + PCodeLabel *label; + int cpu; + int chr; + PCode *instr; + int offset; + int latency; + UInt32 opcode; + MachineInfo *mi; + char buf[500]; + WeirdOperand dummyArg; + + fprintf(pcfile, ":{%4.4x}::::::::::::::::::::::::::::::::::::::::LOOPWEIGHT=%" PRId32 "\n", block->flags, block->loopWeight); + fprintf(pcfile, "B%" PRId32 ": ", block->blockIndex); + + fprintf(pcfile, "Successors = { "); + for (link = block->successors; link; link = link->nextLink) { + if (link->block) + fprintf(pcfile, "B%" PRId32 " ", link->block->blockIndex); + } + fprintf(pcfile, "} "); + + fprintf(pcfile, "Predecessors = { "); + for (link = block->predecessors; link; link = link->nextLink) { + if (link->block) + fprintf(pcfile, "B%" PRId32 " ", link->block->blockIndex); + } + + if (block->labels) { + fprintf(pcfile, "} Labels = { "); + for (label = block->labels; label; label = label->nextLabel) + fprintf(pcfile, "L%" PRId32 " ", label->index); + } + + fprintf(pcfile, "}\n\n"); + + cpu = copts.scheduling; + if (cpu == 10) { + mi = &machine7450; + } else if (copts.altivec_model != 0 || cpu == 7) { + mi = &machine7400; + } else if (cpu == 2) { + mi = &machine603; + } else if (cpu == 5) { + mi = &machine603e; + } else if (cpu == 3) { + mi = &machine604; + } else if (cpu == 6) { + mi = &machine604; + } else if (cpu == 4) { + mi = &machine750; + } else if (cpu == 1) { + mi = &machine601; + } else if (cpu == 9) { + mi = &machine821; + } else { + mi = &machine603; + } + + for (offset = block->codeOffset, instr = block->firstPCode; instr; instr = instr->nextPCode, offset += 4) { + latency = mi->latency(instr); + formatoperands(instr, buf, 1); + chr = (PCODE_FLAG_SET_F(instr) & fRecordBit) ? '.' : ' '; + if (coloring) + opcode = 0; + else + opcode = assemblepcode(instr, offset, &dummyArg); + + fprintf( + pcfile, + " %.8" PRIX32 " %.8" PRIX32 " %4" PRId32 " %-7s%c %s\n", + offset, CTool_EndianConvertWord32(opcode), latency, + opcodeinfo[instr->op].name, chr, buf + ); + + if (instr->alias) + dumpalias(instr->alias, 0, 1, 0); + } + + if (vecSize) { + fprintf(pcfile, "............................................................\n"); + formatdataflowset("use", liveinfo[block->blockIndex].use, vecSize, format); + formatdataflowset("def", liveinfo[block->blockIndex].def, vecSize, format); + formatdataflowset("in ", liveinfo[block->blockIndex].in, vecSize, format); + formatdataflowset("out", liveinfo[block->blockIndex].out, vecSize, format); + } + + fflush(pcfile); + + if (pclist_bad_operand) + CError_FATAL(252); +} + +static void pclistonoff(int flag) { + if (flag) + fprintf(pcfile, "On\n"); + else + fprintf(pcfile, "Off\n"); +} + +void pcinitlisting() { + // unknown args, etc +} + +void pccleanuplisting(void) { +#ifdef CW_ENABLE_PCODE_DEBUG + // this code is not based on the original as we don't have it + if (pcfile) { + fclose(pcfile); + pcfile = NULL; + } +#endif +} + +void pclistblocks(char *name1, char *name2) { +#ifdef CW_ENABLE_PCODE_DEBUG + // this code is not based on the original as we don't have it + PCodeBlock *block; + if (copts.debuglisting) { + if (!pcfile) + pcfile = fopen("pcdump.txt", "a"); + + fprintf(pcfile, "\n%s\n%s\n", name1, name2); + for (block = pcbasicblocks; block; block = block->nextBlock) + pclistblock(block, NULL, 0); + } +#endif +} + +void pclistdataflow() { + // unknown args +} + +void pclistinterferences(char *class_format, int regcount) { +} + +void pclistspill() { + // unknown args +} + +void pclistcopypropitem() { + // unknown args +} + +void pclistcoalesce() { + // unknown args +} + +void pclistusedefs() { + // unknown args +} + +void pclistpropinfo() { + // unknown args +} + +static void listloop() { + // unknown args +} + +static void listloops() { + // unknown args +} + +void pclistloops() { + // unknown args +} + +static void listswitchtables() { + // unknown args +} + +void pclistswitchtables() { + // unknown args +} + +void pclistdominators() { + // unknown args +} + +void pclistbackedge() { + // unknown args +} + +static char *GetInterferenceFlags(IGNode *node) { + char *buf; + Boolean first; + + first = 1; + buf = oalloc(512); + buf[0] = 0; + + if (node->flags & fSpilled) { + strcat(buf, "fSpilled"); + first = 0; + } + + if (node->flags & fPushed) { + if (!first) + strcat(buf, "|"); + strcat(buf, "fPushed"); + first = 0; + } + + if (node->flags & fCoalesced) { + if (!first) + strcat(buf, "|"); + strcat(buf, "fCoalesced"); + first = 0; + } + + if (node->flags & fCoalescedInto) { + if (!first) + strcat(buf, "|"); + strcat(buf, "fCoalescedInto"); + first = 0; + } + + if (node->flags & fPairHigh) { + if (!first) + strcat(buf, "|"); + strcat(buf, "fPairHigh"); + first = 0; + } + + if (node->flags & fPairLow) { + if (!first) + strcat(buf, "|"); + strcat(buf, "fPairLow"); + first = 0; + } + + if (!*buf) + strcat(buf, "no_flags"); + + return buf; +} + +void pclistinterferencegraphnode() { + // unknown args +} + +void pclistinterferencegraph() { + // unknown args +} + +void pclistblock_scheduler() { + // unknown args +} + +void pclistblocks_start_scheduler(char *str1, char *str2) { +} + +void pclistblocks_end_scheduler(void) { + if (pclist_bad_operand) + CError_FATAL(1318); +} + +static void printheapsize() { + // unknown args +} + +void pctotalheap() { + // unknown args +} + +void pctotalmemory() { + // unknown args +} + +void pcmessage(char *probably_a_string, ...) { +} + +int formatalias(Alias *alias, char *buf, int bufSize) { + char *name; + char *typestr; + int len; + int len2; + + if (bufSize < 16) + return sprintf(buf, "..."); + + switch (alias->type) { + case AliasType0: + case AliasType1: + name = CMangler_GetLinkName(alias->object)->name; + if (!strlen(name) || name[0] < 0) + CError_FATAL(1458); + + if (strlen(name) + 16 > bufSize) + return sprintf(buf, "..."); + + switch (alias->object->datatype) { + case DNONLAZYPTR: + typestr = "{NL}"; + break; + case DDATA: + typestr = "{RW}"; + break; + case DLOCAL: + typestr = "{SP}"; + break; + default: + typestr = ""; + } + + len = sprintf(buf, "%0.*s%s", bufSize - 20, name, typestr, alias->size); + buf += len; + if (alias->type == AliasType0) + return len; + + if (alias->offset == 0) + len2 = sprintf(buf, ":%d", alias->size); + else if (alias->offset > 0) + len2 = sprintf(buf, "+%d:%d", alias->offset, alias->size); + else + len2 = sprintf(buf, "-%d:%d", -alias->offset, alias->size); + + return len + len2; + + case AliasType2: + len = 0; + + len2 = sprintf(buf, "{"); + buf += len2; + len += len2; + + len2 = sprintf(buf, "*"); + buf += len2; + len += len2; + + len2 = sprintf(buf, "}"); + buf += len2; + len += len2; + return len; + + default: + CError_FATAL(1543); + return 0; + } +} + +int dumpalias(Alias *alias, int len, Boolean flag1, Boolean flag2) { + char *name; + char *typestr; + AliasMember *member; + Boolean notFirst; + + if (!flag2 && alias == worst_case) { + fprintf(pcfile, " ALIAS = {worst_case}"); + if (flag1) + fprintf(pcfile, "\n"); + return 0; + } + + if (flag1) { + if (alias == worst_case) + fprintf(pcfile, "ALIAS worst_case = "); + else + fprintf(pcfile, " ALIAS = "); + } + + switch (alias->type) { + case AliasType0: + case AliasType1: + name = CMangler_GetLinkName(alias->object)->name; + if (!strlen(name) || name[0] < 0) + CError_FATAL(1581); + + switch (alias->object->datatype) { + case DNONLAZYPTR: + typestr = "{NL}"; + break; + case DDATA: + typestr = "{RW}"; + break; + case DLOCAL: + typestr = "{SP}"; + break; + default: + typestr = ""; + } + + len += fprintf(pcfile, "%0.80s%s", name, typestr); + + if (alias->type == AliasType0) { + if (flag1) + fprintf(pcfile, "\n"); + return len; + } + + if (alias->offset == 0) + len += fprintf(pcfile, ":%d", alias->size); + else if (alias->offset > 0) + len += fprintf(pcfile, "+%d:%d", alias->offset, alias->size); + else + len += fprintf(pcfile, "-%d:%d", -alias->offset, alias->size); + + if (flag1) + fprintf(pcfile, "\n"); + + return len; + + case AliasType2: + len += fprintf(pcfile, "{"); + notFirst = 0; + for (member = alias->parents; member; member = member->nextParent) { + if (member->child->type == AliasType0) { + if (notFirst) + len += fprintf(pcfile, ","); + if (len > 60) { + fprintf(pcfile, "\n "); + len = 0; + } + len = dumpalias(member->child, len, 0, 0); + notFirst = 1; + } + } + for (member = alias->parents; member; member = member->nextParent) { + if (member->child->type != AliasType0) { + if (notFirst) + len += fprintf(pcfile, ","); + if (len > 60) { + fprintf(pcfile, "\n "); + len = 0; + } + len = dumpalias(member->child, len, 0, 0); + notFirst = 1; + } + } + + len += fprintf(pcfile, "}"); + if (flag1) + fprintf(pcfile, "\n"); + return len; + + default: + CError_FATAL(1661); + return 0; + } +} + +void pcformatset() { + // unknown args +} + +int GetLineEndOffset(char *str, int lineNum, int len) { + int offset; + char *work; + + offset = GetLineOffset(str, lineNum, len); + if (offset < 0) + return offset; + + work = str + offset; + while (*work) { + if (*work == '\n') + return work - str - 1; + work++; + } + return -1; +} + +int GetLineOffset(char *str, int lineNum, int len) { + char *work = str; + char *end; + + if (lineNum < 0) + return -1; + + end = str + len; + while (work < end) { + if (*work == '\n' && --lineNum <= 0) + return work - str; + work++; + } + + return 0; +} + +void DumpSourceCode() { + // unknown args +} + +int DumpIR_SrcBreak() { + // unknown args + return 0; +} + diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/PCodeUtilities.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/PCodeUtilities.c new file mode 100644 index 0000000..b1f8ffb --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/PCodeUtilities.c @@ -0,0 +1,345 @@ +#include "compiler/PCodeUtilities.h" +#include "compiler/CError.h" +#include "compiler/CFunc.h" +#include "compiler/CParser.h" +#include "compiler/CodeGen.h" +#include "compiler/Exceptions.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/Registers.h" +#include "compiler/enode.h" +#include "compiler/objects.h" + +void pcsetrecordbit(PCode *pc) { + int reg; + PCodeArg *arg; + short argCount; + int argIdx; + + pc->flags &= ~(fIsMove | fCommutative | fIsCSE); + if ((pc->flags & fOpTypeMask) == fOpTypeFPR) { + reg = 1; + } else if ((pc->flags & fOpTypeMask) == fOpTypeVR) { + reg = 6; + } else { + reg = 0; + } + + if (pc->op == PC_ANDI || pc->op == PC_ANDIS) { + pc->flags |= fRecordBit; + } else if (pc->op == PC_ADDI || pc->op == PC_ADDIC) { + pc->flags |= fSetsCarry; + pc->flags |= fRecordBit; + change_num_operands(pc, 5); + pc->op = PC_ADDICR; + + CError_ASSERT(76, pc->args[3].kind == PCOp_PLACEHOLDEROPERAND); + pc->args[3].kind = PCOp_REGISTER; + pc->args[3].arg = RegClass_SPR; + pc->args[3].data.reg.reg = 0; + pc->args[3].data.reg.effect = EffectWrite; + CError_ASSERT(80, pc->args[4].kind == PCOp_PLACEHOLDEROPERAND); + pc->args[4].kind = PCOp_REGISTER; + pc->args[4].arg = RegClass_CRFIELD; + pc->args[4].data.reg.reg = reg; + pc->args[4].data.reg.effect = EffectWrite; + } else { + arg = pc->args; + argIdx = argCount = pc->argCount; + while (arg->kind != PCOp_PLACEHOLDEROPERAND && argIdx) { + if (arg->kind == PCOp_REGISTER && arg->arg == RegClass_CRFIELD && arg->data.reg.reg == reg) { + arg->data.reg.effect |= EffectWrite; + pc->flags |= fRecordBit; + return; + } + arg++; + argIdx--; + } + + if (argIdx <= 0) { + arg = &pc->args[argCount]; + pc->argCount++; + } + + CError_ASSERT(105, arg->kind == PCOp_PLACEHOLDEROPERAND); + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_CRFIELD; + arg->data.reg.reg = reg; + arg->data.reg.effect = EffectWrite; + if (pc->op != PC_ADDICR) + pc->flags |= fRecordBit; + } +} + +void pcsetsideeffects(PCode *pc) { + pc->flags &= ~(fIsMove | fCommutative | fIsCSE); + pc->flags |= fSideEffects; +} + +void pcsetlinkbit(PCode *pc) { + PCodeArg *arg; + int argIdx; + + switch (pc->op) { + case PC_B: + pc->op = PC_BL; + break; + case PC_BCTR: + pc->op = PC_BCTRL; + break; + case PC_BLR: + pc->op = PC_BLRL; + break; + } + + arg = pc->args; + argIdx = pc->argCount; + while (arg->kind != PCOp_PLACEHOLDEROPERAND && argIdx) { + if (arg->kind == PCOp_REGISTER && arg->arg == RegClass_SPR && arg->data.reg.reg == 1) { + arg->data.reg.effect |= EffectWrite; + pc->flags |= fLink; + return; + } + arg++; + argIdx--; + } + + CError_ASSERT(169, arg->kind == PCOp_PLACEHOLDEROPERAND); + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_SPR; + arg->data.reg.reg = 1; + arg->data.reg.effect = EffectWrite; + + if (opcodeinfo[pc->op].flags & fIsCall) { + pc->flags &= ~fIsBranch; + pc->flags |= fIsCall; + } + pc->flags |= fLink; +} + +void branch_label(PCodeLabel *label) { + if (pclastblock->pcodeCount) { + pcbranch(pclastblock, label); + makepcblock(); + } + pclabel(pclastblock, label); +} + +void branch_conditional(short a, short compareop, short c, PCodeLabel *label) { + PCodeBlock *tmpblock; + PCodeLabel *tmplabel; + int r28; + + tmpblock = pclastblock; + tmplabel = makepclabel(); + + switch (compareop) { + case ENOTEQU: + c = !c; + case EEQU: + r28 = 2; + break; + case EGREATEREQU: + c = !c; + case ELESS: + r28 = 0; + break; + case ELESSEQU: + c = !c; + case EGREATER: + r28 = 1; + break; + } + + emitpcode(c ? PC_BT : PC_BF, a, r28, label); + pcbranch(pclastblock, label); + pcbranch(pclastblock, tmplabel); + makepcblock(); + pclabel(pclastblock, tmplabel); +} + +void branch_always(PCodeLabel *label) { + emitpcode(PC_B, label); + pcbranch(pclastblock, label); + makepcblock(); +} + +void branch_decrement_always(Opcode opcode, PCodeLabel *label) { + PCodeLabel *tmplabel = makepclabel(); + emitpcode(opcode, label); + pcbranch(pclastblock, label); + pcbranch(pclastblock, tmplabel); + makepcblock(); + pclabel(pclastblock, tmplabel); +} + +void branch_indirect(Object *obj) { + emitpcode(PC_BCTR, obj, 0); + makepcblock(); +} + +int branch_count_volatiles(void) { + int count = 0; + int i; + RegClass rclass; + + for (rclass = 0; rclass < RegClassMax; rclass++) { + for (i = 0; i < n_scratch_registers[rclass]; i++) { + count++; + } + } + + return count; +} + +PCodeArg *branch_record_volatiles(PCodeArg *arglist, UInt32 *masks) { + int i; + RegClass rclass; + + for (rclass = RegClassMax - 1; rclass >= 0; rclass--) { + for (i = 0; i < n_scratch_registers[rclass]; i++) { + arglist->kind = PCOp_REGISTER; + arglist->arg = rclass; + arglist->data.reg.reg = scratch_registers[rclass][i]; + arglist->data.reg.effect = EffectWrite; + if (masks[rclass] & (1 << scratch_registers[rclass][i])) + arglist->data.reg.effect |= EffectRead; + arglist++; + } + } + + return arglist; +} + +void branch_subroutine(Object *obj, short add_nop, UInt32 *masks) { + int count; + PCode *pc; + PCodeArg *arg; + + count = branch_count_volatiles(); + if (copts.exceptions && current_statement) + count += countexceptionactionregisters(current_statement->dobjstack); + + pc = makepcode(PC_BL, count, obj, 0); + arg = branch_record_volatiles(pc->args + 1, masks); + if (copts.exceptions && current_statement) + noteexceptionactionregisters(current_statement->dobjstack, arg); + appendpcode(pclastblock, pc); + + if (add_nop) + emitpcode(PC_NOP); + + branch_label(makepclabel()); + if (copts.exceptions && current_statement) + recordexceptionactions(pc, current_statement->dobjstack); +} + +void branch_subroutine_ctr(UInt32 *masks) { + int count; + PCode *pc; + PCodeArg *arg; + + count = branch_count_volatiles(); + if (copts.exceptions && current_statement) + count += countexceptionactionregisters(current_statement->dobjstack); + + pc = makepcode(PC_BCTRL, count); + arg = branch_record_volatiles(pc->args + 1, masks); + if (copts.exceptions && current_statement) + noteexceptionactionregisters(current_statement->dobjstack, arg); + appendpcode(pclastblock, pc); + + branch_label(makepclabel()); + if (copts.exceptions && current_statement) + recordexceptionactions(pc, current_statement->dobjstack); +} + +void add_immediate(short dest_reg, short base_reg, Object *obj, SInt16 offset) { + short tmp_reg = base_reg; + + if (obj && offset && obj->datatype != DLOCAL) { + tmp_reg = used_virtual_registers[RegClass_GPR]++; + add_immediate_lo(tmp_reg, base_reg, obj, 0, 1); + obj = NULL; + } + + if (!obj && !offset) + emitpcode(PC_MR, dest_reg, tmp_reg); + else + emitpcode(PC_ADDI, dest_reg, tmp_reg, obj, offset); +} + +PCode *add_immediate_lo(short dest_reg, short base_reg, Object *obj, SInt16 offset, char add_to_block) { + PCode *pc; + + CError_ASSERT(577, obj); + + pc = makepcode(PC_ADDI, dest_reg, base_reg, obj, offset); + if (add_to_block) + appendpcode(pclastblock, pc); + return pc; +} + +PCode *op_absolute_ha(short dest_reg, short base_reg, Object *obj, short offset, char add_to_block) { + PCode *pc; + int tmp_reg; + + if (obj->datatype == DLOCAL) { + pc = makepcode(PC_ADDIS, dest_reg, base_reg, obj, offset); + } else if (copts.codegen_pic) { + tmp_reg = base_reg; + CError_ASSERT(601, tmp_reg); + pc = makepcode(PC_ADDIS, dest_reg, tmp_reg, obj, offset); + } else { + CError_ASSERT(606, base_reg == 0); + pc = makepcode(PC_LIS, dest_reg, obj, offset); + } + + if (add_to_block) + appendpcode(pclastblock, pc); + return pc; +} + +void load_store_register(Opcode opcode, short dest_reg, short base_reg, Object *obj, SInt32 offset) { + short addi_tmp; + short offset_reg1; + short offset_reg2; + + offset_reg1 = base_reg; + if (obj && offset && obj->datatype != DLOCAL) { + offset_reg1 = used_virtual_registers[RegClass_GPR]++; + add_immediate_lo(offset_reg1, base_reg, obj, 0, 1); + obj = NULL; + } + + if (offset != (short)offset) { + if (opcode == PC_LWZ && dest_reg == 12) + offset_reg2 = 12; + else if (opcode == PC_LWZ && dest_reg == 11) + offset_reg2 = 11; + else + offset_reg2 = used_virtual_registers[RegClass_GPR]++; + + emitpcode(PC_ADDIS, offset_reg2, offset_reg1, 0, (short) ((offset >> 16) + ((offset & 0x8000) >> 15))); + offset = (short) offset; + offset_reg1 = offset_reg2; + } + + if (opcode == PC_STVX || opcode == PC_LVX) { + offset_reg2 = 0; + if (obj) { + addi_tmp = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_ADDI, addi_tmp, offset_reg1, obj, offset); + offset_reg1 = addi_tmp; + } else if (offset) { + offset_reg2 = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_LI, offset_reg2, offset); + } + if (!offset_reg2) + emitpcode(opcode, dest_reg, 0, offset_reg1); + else + emitpcode(opcode, dest_reg, offset_reg1, offset_reg2); + } else { + emitpcode(opcode, dest_reg, offset_reg1, obj, offset); + } +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Peephole.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Peephole.c new file mode 100644 index 0000000..151e448 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Peephole.c @@ -0,0 +1,2753 @@ +#include "compiler/Peephole.h" +#include "compiler/CompilerTools.h" +#include "compiler/InstrSelection.h" +#include "compiler/PCode.h" +#include "compiler/objects.h" +#include "compiler/types.h" +#include "compiler/Scheduler.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/Alias.h" +#include "compiler/CParser.h" + +typedef int (*PeepholeFunc)(PCode *instr, UInt32 *masks); + +typedef struct Pattern { + struct Pattern *next; + PeepholeFunc func; +} Pattern; + +typedef struct LiveRegs { + UInt32 x0; + UInt32 x4; + UInt32 x8; + UInt32 xC; +} LiveRegs; + +static LiveRegs *liveregs[RegClassMax]; +static Pattern *peepholepatterns[OPCODE_MAX]; +static PCode **defininginstruction; + +static void computeregisterusedefs(void) { + PCodeBlock *block; + PCode *instr; + PCodeArg *op; + int i; + RegClass rclass; + LiveRegs *lr; + UInt32 array1[RegClassMax]; + UInt32 array2[RegClassMax]; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (rclass = 0; rclass < RegClassMax; rclass++) { + array1[rclass] = 0; + array2[rclass] = 0; + } + + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + for (op = instr->args, i = instr->argCount; i--; op++) { + if ( + op->kind == PCOp_REGISTER && + (op->data.reg.effect & EffectRead) && + !((1 << op->data.reg.reg) & array2[op->arg]) + ) + array1[op->arg] |= 1 << op->data.reg.reg; + } + + for (op = instr->args, i = instr->argCount; i--; op++) { + if ( + op->kind == PCOp_REGISTER && + (op->data.reg.effect & EffectWrite) && + !((1 << op->data.reg.reg) & array1[op->arg]) + ) + array2[op->arg] |= 1 << op->data.reg.reg; + } + } + + for (rclass = 0; rclass < RegClassMax; rclass++) { + lr = liveregs[rclass] + block->blockIndex; + lr->x0 = array1[rclass]; + lr->x4 = array2[rclass]; + if (rclass == RegClass_GPR) { + lr->x8 = 1 << 1; + lr->xC = 1 << 1; + } else { + lr->x8 = 0; + lr->xC = 0; + } + } + } +} + +static void computeliveness(LiveRegs *lrarray, UInt32 x) { + PCodeBlock *block; + LiveRegs *lr; + PCLink *link; + UInt32 newC; + UInt32 new8; + int i; + int flag; + + flag = 1; + while (flag) { + flag = 0; + i = pcblockcount; + while (i) { + if ((block = depthfirstordering[--i])) { + lr = lrarray + block->blockIndex; + newC = x; + for (link = block->successors; link; link = link->nextLink) + newC |= lrarray[link->block->blockIndex].x8; + lr->xC = newC; + + new8 = lr->x0 | (lr->xC & ~lr->x4); + if (new8 != lr->x8) { + lr->x8 = new8; + flag = 1; + } + } + } + } +} + +static void computeliveregisters(Object *func) { + Type *returntype; + RegClass rclass; + + returntype = TYPE_FUNC(func->type)->functype; + + for (rclass = 0; rclass < RegClassMax; rclass++) + liveregs[rclass] = lalloc(sizeof(LiveRegs) * pcblockcount); + + computedepthfirstordering(); + computeregisterusedefs(); + + if (TYPE_FITS_IN_REGISTER(returntype)) { + liveregs[RegClass_GPR][epilogue->blockIndex].x0 |= 1 << 3; + if (TYPE_IS_8BYTES(returntype)) + liveregs[RegClass_GPR][epilogue->blockIndex].x0 |= 1 << 4; + } else if (IS_TYPE_FLOAT(returntype)) { + liveregs[RegClass_FPR][epilogue->blockIndex].x0 |= 1 << 1; + } else if (IS_TYPE_VECTOR(returntype)) { + liveregs[RegClass_VR][epilogue->blockIndex].x0 |= 1 << 2; + } + + for (rclass = 0; rclass < RegClassMax; rclass++) { + if (rclass == RegClass_GPR) + computeliveness(liveregs[rclass], 2); + else + computeliveness(liveregs[rclass], 0); + } +} + +static void computeinstructionpredecessors(PCodeBlock *block) { + PCode *nop; + RegClass rclass; + SInt32 i; + SInt32 defID; + SInt32 totalOps; + PCode *instr; + PCodeArg *op; + PCode *array[RegClassMax][32]; + + nop = makepcode(PC_NOP); + for (rclass = 0; rclass < RegClassMax; rclass++) { + for (i = 0; i < 32; i++) { + array[rclass][i] = nop; + } + } + + totalOps = 0; + for (instr = block->firstPCode; instr; instr = instr->nextPCode) + totalOps += instr->argCount; + + if (totalOps) { + defininginstruction = oalloc(sizeof(PCode *) * totalOps); + for (i = 0; i < totalOps; i++) + defininginstruction[i] = nop; + + defID = 0; + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + instr->defID = defID; + + for (i = 0, op = instr->args; i < instr->argCount; i++, op++) { + if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectRead)) + defininginstruction[defID + i] = array[op->arg][op->data.reg.reg]; + } + + for (i = 0, op = instr->args; i < instr->argCount; i++, op++) { + if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite)) + array[op->arg][op->data.reg.reg] = instr; + } + + defID += instr->argCount; + } + } +} + +static int dead(PCode *instr, UInt32 *masks) { + int i; + PCodeArg *op; + + if (instr->block->flags & (fIsProlog | fIsEpilogue)) + return 0; + if (instr->flags & (fIsBranch | fIsWrite | fIsCall | fIsVolatile | fSideEffects)) + return 0; + if (!instr->block->predecessors) + return 1; + + for (op = instr->args, i = instr->argCount; i--; op++) { + if ( + op->kind == PCOp_REGISTER && + (op->data.reg.effect & EffectWrite) && + ((1 << op->data.reg.reg) & masks[op->arg]) + ) + return 0; + } + + return 1; +} + +static int definedbetween(PCode *start, PCode *end, PCodeArg *checkOp) { + PCode *instr; + PCodeArg *op; + int i; + + for (instr = start->prevPCode; instr != end; instr = instr->prevPCode) { + for (op = instr->args, i = instr->argCount; i--; op++) { + if (PC_OP_IS_WRITE_REGISTER(op, checkOp->arg, checkOp->data.reg.reg)) + return 1; + } + } + + return 0; +} + +static int usedbetween(PCode *start, PCode *end, PCodeArg *checkOp) { + PCode *instr; + PCodeArg *op; + int i; + + for (instr = start->prevPCode; instr != end; instr = instr->prevPCode) { + for (op = instr->args, i = instr->argCount; i--; op++) { + if (PC_OP_IS_READ_REGISTER(op, checkOp->arg, checkOp->data.reg.reg)) + return 1; + } + } + + return 0; +} + +static int isSPRlive(PCode *instr, int reg) { + PCode *scan; + PCodeArg *op; + int i; + + for (scan = instr->nextPCode; scan; scan = scan->nextPCode) { + for (op = scan->args, i = scan->argCount; i--; op++) { + if (PC_OP_IS_READ_REGISTER(op, RegClass_SPR, reg)) + return 1; + + if (PC_OP_IS_WRITE_REGISTER(op, RegClass_SPR, reg)) + return 0; + } + } + + return 0; +} + +static SInt32 extractedbits(PCode *instr) { + SInt32 a = instr->args[2].data.imm.value; + SInt32 b = instr->args[3].data.imm.value; + SInt32 c = instr->args[4].data.imm.value; + SInt32 val; + + if (b <= c) + val = ((b > 31) ? 0 : (0xFFFFFFFFu >> b)) & ~(((c + 1) > 31) ? 0 : (0xFFFFFFFFu >> (c + 1))); + else + val = ((b > 31) ? 0 : (0xFFFFFFFFu >> b)) | ~(((c + 1) > 31) ? 0 : (0xFFFFFFFFu >> (c + 1))); + + return ((UInt32) val >> a) | (val << (32 - a)); +} + +static int canmergemasks(SInt32 b1, SInt32 c1, SInt32 a, SInt32 b2, SInt32 c2, short *first, short *last) { + SInt32 val1; + SInt32 val2; + + if (b1 <= c1) + val1 = ((b1 > 31) ? 0 : (0xFFFFFFFFu >> b1)) & ~(((c1 + 1) > 31) ? 0 : (0xFFFFFFFFu >> (c1 + 1))); + else + val1 = ((b1 > 31) ? 0 : (0xFFFFFFFFu >> b1)) | ~(((c1 + 1) > 31) ? 0 : (0xFFFFFFFFu >> (c1 + 1))); + + if (b2 <= c2) + val2 = ((b2 > 31) ? 0 : (0xFFFFFFFFu >> b2)) & ~(((c2 + 1) > 31) ? 0 : (0xFFFFFFFFu >> (c2 + 1))); + else + val2 = ((b2 > 31) ? 0 : (0xFFFFFFFFu >> b2)) | ~(((c2 + 1) > 31) ? 0 : (0xFFFFFFFFu >> (c2 + 1))); + + return ismaskconstant(val2 & ((val1 << a) | ((UInt32) val1 >> (32 - a))), first, last); +} + +static int canuseupdatetest(PCodeBlock *block, PCode *instr, int count1, int count2, int count3, int count4, int count5) { + int i; + PCLink *link; + + while (instr) { + if (++count1 > 17) + return 1; + + switch (instr->op) { + case PC_DIVW: + case PC_DIVWU: + case PC_MULHW: + case PC_MULHWU: + case PC_MULLI: + case PC_MULLW: + return count3 == 0; + case PC_MTXER: + case PC_MTCTR: + case PC_MTLR: + case PC_MTCRF: + case PC_MTMSR: + case PC_MTSPR: + case PC_MFMSR: + case PC_MFSPR: + case PC_MFXER: + case PC_MFCTR: + case PC_MFLR: + case PC_MFCR: + case PC_ECIWX: + case PC_ECOWX: + case PC_DCBI: + case PC_ICBI: + case PC_MCRFS: + case PC_MCRXR: + case PC_MFTB: + case PC_MFSR: + case PC_MTSR: + case PC_MFSRIN: + case PC_MTSRIN: + case PC_MTFSB0: + case PC_MTFSB1: + case PC_MTFSFI: + case PC_SC: + case PC_TLBIA: + case PC_TLBIE: + case PC_TLBLD: + case PC_TLBLI: + case PC_TLBSYNC: + case PC_TW: + case PC_TRAP: + case PC_TWI: + case PC_MFROM: + case PC_DSA: + case PC_ESA: + return 1; + case PC_CRAND: + case PC_CRANDC: + case PC_CREQV: + case PC_CRNAND: + case PC_CRNOR: + case PC_CROR: + case PC_CRORC: + case PC_CRXOR: + case PC_MCRF: + if (++count5 > 1) + return 1; + } + + if (instr->flags & (fIsRead | fIsWrite)) { + if (++count4 > 1) + return 1; + } else if (instr->flags & fIsBranch) { + if (++count2 > 2) + return 1; + + for (i = 0; i < instr->argCount; i++) { + if (PC_OP_IS_ANY_REGISTER(&instr->args[i], RegClass_CRFIELD)) { + ++count3; + break; + } + } + } + + instr = instr->nextPCode; + } + + if (block && block->successors) { + for (link = block->successors; link; link = link->nextLink) { + if (link->block && !canuseupdatetest(link->block, link->block->firstPCode, count1, count2, count3, count4, count5)) + return 0; + } + } + + return 1; +} + +static int canuseupdate(PCode *instr) { + return canuseupdatetest(instr->block, instr->nextPCode, 0, 0, 0, 0, 0); +} + +static int MR_Rx_Rx(PCode *instr, UInt32 *masks) { + if ( + instr->args[0].data.reg.reg == instr->args[1].data.reg.reg && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) + ) + { + deletepcode(instr); + return 1; + } + + return 0; +} + +static int FMR_Fx_Fx(PCode *instr, UInt32 *masks) { + if ( + instr->args[0].data.reg.reg == instr->args[1].data.reg.reg && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) + ) + { + deletepcode(instr); + return 1; + } + + return 0; +} + +static int VMR_Vx_Vx(PCode *instr, UInt32 *masks) { + if ( + instr->args[0].data.reg.reg == instr->args[1].data.reg.reg && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) + ) + { + deletepcode(instr); + return 1; + } + + return 0; +} + +static int MR_MR(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + defInstr->op == PC_MR && + instr->args[0].data.reg.reg == defInstr->args[1].data.reg.reg && + !definedbetween(instr, defInstr, &instr->args[0]) && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) + ) + { + deletepcode(instr); + return 1; + } + + return 0; +} + +static int FMR_FMR(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + defInstr->op == PC_FMR && + instr->args[0].data.reg.reg == defInstr->args[1].data.reg.reg && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + !definedbetween(instr, defInstr, &instr->args[0]) + ) + { + deletepcode(instr); + return 1; + } + + return 0; +} + +static int VMR_VMR(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + defInstr->op == PC_VMR && + instr->args[0].data.reg.reg == defInstr->args[1].data.reg.reg && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + !definedbetween(instr, defInstr, &instr->args[0]) + ) + { + deletepcode(instr); + return 1; + } + + return 0; +} + +static int VMR_VMRP(PCode *instr, UInt32 *masks) { + PCode *prev = instr->prevPCode; + PCode *next = instr->nextPCode; + int prevFlag = 0; + int prevPermute = 0; + int nextFlag = 0; + int nextPermute = 0; + + if (prev) { + prevFlag = (prev->flags & fOpTypeMask) == fOpTypeVR; + prevPermute = uses_vpermute_unit(prev); + } + if (next) { + nextFlag = (next->flags & fOpTypeMask) == fOpTypeVR; + nextPermute = uses_vpermute_unit(next); + } + + if (prev) { + if (next) { + if (prevFlag && !prevPermute) { + if (nextFlag) { + if (!nextPermute) { + change_opcode(instr, PC_VMRP); + return 1; + } + } else { + change_opcode(instr, PC_VMRP); + return 1; + } + } + } else { + if (prevFlag && !prevPermute) { + change_opcode(instr, PC_VMRP); + return 1; + } + } + } else { + if (next && nextFlag && !nextPermute) { + change_opcode(instr, PC_VMRP); + return 1; + } + } + + return 0; +} + +static int MR_CMPI(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + PCodeArg op; + + if ( + instr->args[0].data.reg.reg == 0 && + instr->args[2].data.imm.value == 0 && + (PCODE_FLAG_SET_F(defInstr) & (fSideEffects | fCanSetRecordBit | fOpTypeGPR)) == (fCanSetRecordBit | fOpTypeGPR) && + !usedbetween(instr, defInstr, &instr->args[0]) && + !definedbetween(instr, defInstr, &instr->args[0]) + ) + { + if (defInstr->op == PC_ADDI) { + op.kind = PCOp_REGISTER; + op.arg = RegClass_SPR; + op.data.reg.reg = 0; + op.data.reg.effect = EffectRead | EffectWrite; + if (usedbetween(instr, defInstr, &op)) + return 0; + } + + pcsetrecordbit(defInstr); + deletepcode(instr); + return 1; + } + + return 0; +} + +static int EXTSB_RLWINM(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + defInstr->op == PC_EXTSB && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !(PCODE_FLAG_SET_F(defInstr) & fRecordBit) && + !definedbetween(instr, defInstr, &defInstr->args[1]) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + (extractedbits(instr) & 0xFFFFFF00) == 0 + ) + { + instr->args[1].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + deletepcode(defInstr); + return 1; + } + + return 0; +} + +static int EXTSH_RLWINM(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + defInstr->op == PC_EXTSH && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !(PCODE_FLAG_SET_F(defInstr) & fRecordBit) && + !definedbetween(instr, defInstr, &defInstr->args[1]) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + (extractedbits(instr) & 0xFFFF0000) == 0 + ) + { + instr->args[1].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + deletepcode(defInstr); + return 1; + } + + return 0; +} + +static int LBZ_RLWINM(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + (defInstr->op == PC_LBZ || defInstr->op == PC_LBZX) && + instr->args[2].data.imm.value == 0 && + instr->args[3].data.imm.value <= 24 && + instr->args[4].data.imm.value == 31 && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + !definedbetween(instr, defInstr, &instr->args[0]) && + !usedbetween(instr, defInstr, &instr->args[0]) + ) + { + defInstr->args[0].data.reg.reg = instr->args[0].data.reg.reg; + deletepcode(instr); + return 1; + } + + return 0; +} + +static int LHZ_RLWINM(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + (defInstr->op == PC_LHZ || defInstr->op == PC_LHZX) && + instr->args[2].data.imm.value == 0 && + instr->args[3].data.imm.value <= 16 && + instr->args[4].data.imm.value == 31 && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + !definedbetween(instr, defInstr, &instr->args[0]) && + !usedbetween(instr, defInstr, &instr->args[0]) + ) + { + defInstr->args[0].data.reg.reg = instr->args[0].data.reg.reg; + deletepcode(instr); + return 1; + } + + return 0; +} + +static int LHA_EXTSH(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + defInstr->op == PC_LHA && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + !definedbetween(instr, defInstr, &instr->args[0]) && + !usedbetween(instr, defInstr, &instr->args[0]) + ) + { + defInstr->args[0].data.reg.reg = instr->args[0].data.reg.reg; + deletepcode(instr); + return 1; + } + + if ( + defInstr->op == PC_EXTSB && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) + ) + { + if (defInstr->args[0].data.reg.reg == defInstr->args[1].data.reg.reg) { + if ( + !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg)) && + !usedbetween(instr, defInstr, &defInstr->args[0]) + ) + { + change_opcode(instr, PC_EXTSB); + deletepcode(defInstr); + return 1; + } + } else { + if (!definedbetween(instr, defInstr, &defInstr->args[1])) { + change_opcode(instr, PC_EXTSB); + instr->args[1] = defInstr->args[1]; + } else { + change_opcode(instr, PC_MR); + } + return 1; + } + } + + return 0; +} + +static int ADDI_L_S(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + SInt32 addleft; + SInt32 addright; + + if (defInstr->op == PC_ADDI && instr->args[2].kind == PCOp_IMMEDIATE) { + if (!PC_OP_IS_REGISTER(&instr->args[0], RegClass_GPR, instr->args[1].data.reg.reg)) { + if ( + instr->args[2].data.imm.value == 0 && + defInstr->args[0].data.reg.reg == defInstr->args[1].data.reg.reg && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + (!(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg)) || canuseupdate(instr)) + ) + { + if (!(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) { + instr->args[2] = defInstr->args[2]; + } else { + instr->op++; + instr->args[1].data.reg.effect |= EffectWrite; + instr->args[2] = defInstr->args[2]; + } + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + deletepcode(defInstr); + return 1; + } + + addleft = 0x1FFFF; + addright = instr->args[2].data.imm.value; + if (defInstr->args[2].kind == PCOp_IMMEDIATE) { + addleft = defInstr->args[2].data.imm.value; + } else if (defInstr->args[2].kind == PCOp_MEMORY) { + if (defInstr->args[2].data.mem.obj->datatype == DLOCAL) + addleft = defInstr->args[2].data.mem.offset + defInstr->args[2].data.mem.obj->u.var.uid; + else if (addright == 0) + addleft = 0; + else + return 0; + } + + if (!FITS_IN_SHORT(addleft + addright)) + return 0; + + if ( + !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg)) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + !definedbetween(instr, defInstr, &defInstr->args[1]) + ) + { + instr->args[1].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + + if (defInstr->args[2].kind == PCOp_MEMORY) { + instr->args[2] = defInstr->args[2]; + instr->args[2].data.mem.offset += addright; + if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) { + instr->alias = make_alias( + instr->args[2].data.mem.obj, + instr->args[2].data.mem.offset, + nbytes_loaded_or_stored_by(instr) + ); + } + } else { + instr->args[2].data.imm.value = addleft + addright; + } + + deletepcode(defInstr); + return 1; + } + + if ( + instr->args[1].data.reg.reg != defInstr->args[1].data.reg.reg && + !definedbetween(instr, defInstr, &defInstr->args[1]) + ) + { + if (defInstr->args[2].kind == PCOp_MEMORY && defInstr->args[2].data.mem.obj->datatype != DLOCAL) + return 0; + + instr->args[1].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + + if (defInstr->args[2].kind == PCOp_MEMORY) { + instr->args[2] = defInstr->args[2]; + instr->args[2].data.mem.offset += addright; + if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) { + instr->alias = make_alias( + instr->args[2].data.mem.obj, + instr->args[2].data.mem.offset, + nbytes_loaded_or_stored_by(instr) + ); + } + } else { + instr->args[2].data.imm.value = addleft + addright; + } + + return 1; + } + } + } else { + if ( + defInstr->op == PC_MR && + PC_OP_IS_ANY_REGISTER(&defInstr->args[1], RegClass_GPR) && + defInstr->args[1].data.reg.reg != 0 && + !definedbetween(instr, defInstr, &defInstr->args[1]) + ) + { + instr->args[1].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + } + } + + return 0; +} + +static int ADDI_LU_SU(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + instr->args[2].kind == PCOp_IMMEDIATE && + defInstr->args[2].kind == PCOp_IMMEDIATE && + defInstr->op == PC_ADDI && + defInstr->args[0].data.reg.reg == defInstr->args[1].data.reg.reg && + !(instr->args[0].arg == instr->args[1].arg && instr->args[0].data.reg.reg == instr->args[1].data.reg.reg) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + FITS_IN_SHORT(instr->args[2].data.imm.value + defInstr->args[2].data.imm.value) + ) + { + if ((instr->args[2].data.imm.value + defInstr->args[2].data.imm.value) == 0) { + instr->op--; + instr->args[1].data.reg.effect &= ~EffectWrite; + instr->args[2].data.imm.value = 0; + } else { + instr->args[2].data.imm.value += defInstr->args[2].data.imm.value; + } + + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + deletepcode(defInstr); + return 1; + } + + return 0; +} + +static int L_S_ADDI(PCode *instr, UInt32 *masks) { + PCode *scan; + PCodeArg *op; + int i; + short reg; + + if (instr->args[2].kind != PCOp_IMMEDIATE) + return 0; + + reg = instr->args[1].data.reg.reg; + if (!canuseupdate(instr)) + return 0; + + for (scan = instr->nextPCode; scan; scan = scan->nextPCode) { + for (op = scan->args, i = scan->argCount; i--; op++) { + if (PC_OP_IS_READ_REGISTER(op, RegClass_GPR, reg)) + return 0; + + if (PC_OP_IS_WRITE_REGISTER(op, RegClass_GPR, reg)) { + if (scan->op != PC_ADDI) + return 0; + if (scan->args[2].kind != PCOp_IMMEDIATE) + return 0; + + if ( + instr->args[2].data.imm.value == scan->args[2].data.imm.value && + scan->args[0].data.reg.reg == scan->args[1].data.reg.reg && + !(instr->args[0].arg == instr->args[1].arg && instr->args[0].data.reg.reg == instr->args[1].data.reg.reg) + ) + { + if (!(masks[RegClass_GPR] & (1 << scan->args[0].data.reg.reg))) { + instr->args[2] = scan->args[2]; + } else { + instr->op++; + instr->args[1].data.reg.effect |= EffectWrite; + instr->args[2] = scan->args[2]; + } + + change_opcode(scan, PC_NOP); + change_num_operands(scan, 0); + deletepcode(scan); + return 1; + } + + return 0; + } + } + } + + return 0; +} + +static int LI_CMP_BC(PCode *instr, UInt32 *masks) { + PCode *defInstr; + PCode *defInstr2; + PCLink *link; + PCLink **ptr; + + if (instr->args[1].data.imm.value == 2) { + defInstr = defininginstruction[instr->defID]; + if ((defInstr->op == PC_CMPLI || defInstr->op == PC_CMPI) && defInstr->args[0].data.reg.reg == 0) { + defInstr2 = defininginstruction[defInstr->defID + 1]; + if ( + defInstr2->op == PC_LI && + defInstr2->args[1].kind == PCOp_IMMEDIATE && + (instr->op == PC_BT) == (defInstr2->args[1].data.imm.value == defInstr->args[2].data.imm.value) + ) + { + change_opcode(instr, PC_B); + instr->args[0] = instr->args[2]; + change_num_operands(instr, 1); + + defininginstruction[instr->defID] = defininginstruction[instr->defID + 1]; + + for (ptr = &instr->block->successors; (link = *ptr); ptr = &link->nextLink) { + if (link->block == instr->block->nextBlock) { + *ptr = link->nextLink; + break; + } + } + + for (ptr = &instr->block->nextBlock->predecessors; (link = *ptr); ptr = &link->nextLink) { + if (link->block == instr->block) { + *ptr = link->nextLink; + break; + } + } + } + } + } + + return 0; +} + +static int RLWINM_CMPLI_BC(PCode *instr, UInt32 *masks) { + PCode *defInstr; + PCode *defInstr2; + + if (instr->args[1].data.imm.value == 2) { + defInstr = defininginstruction[instr->defID]; + if (defInstr->op == PC_CMPLI && defInstr->args[0].data.reg.reg == 0 && defInstr->args[2].data.imm.value == 0) { + defInstr2 = defininginstruction[defInstr->defID + 1]; + if ( + (PCODE_FLAG_SET_F(defInstr2) & (fSideEffects | fCanSetRecordBit | fOpTypeGPR)) == (fCanSetRecordBit | fOpTypeGPR) && + !usedbetween(defInstr, defInstr2, &defInstr->args[0]) && + !definedbetween(defInstr, defInstr2, &defInstr->args[0]) + ) + { + pcsetrecordbit(defInstr2); + defininginstruction[instr->defID] = defininginstruction[defInstr->defID + 1]; + deletepcode(defInstr); + return 1; + } + } + } + + return 0; +} + +static int LBZ_EXTSB_CMPI_BC(PCode *instr, UInt32 *masks) { + PCode *defInstr; + PCode *defInstr2; + + if (instr->args[1].data.imm.value == 2) { + defInstr = defininginstruction[instr->defID]; + if ( + ( + (defInstr->op == PC_CMPI || defInstr->op == PC_CMPLI) && + defInstr->args[2].data.imm.value >= 0 && + defInstr->args[2].data.imm.value <= 127 + ) + || + ( + (defInstr->op == PC_EXTSB || defInstr->op == PC_EXTSH) && + (PCODE_FLAG_SET_F(defInstr) & fRecordBit) + ) + ) + { + defInstr2 = defininginstruction[defInstr->defID + 1]; + if ( + defInstr2->op == PC_EXTSB && + defininginstruction[defInstr2->defID + 1]->op == PC_LBZ && + !(masks[RegClass_GPR] & (1 << defInstr2->args[0].data.reg.reg)) && + !usedbetween(instr, defInstr, &defInstr2->args[0]) && + !usedbetween(defInstr, defInstr2, &defInstr2->args[0]) && + !definedbetween(defInstr, defInstr2, &defInstr2->args[1]) + ) + { + defInstr->args[1].data.reg.reg = defInstr2->args[1].data.reg.reg; + defininginstruction[defInstr->defID + 1] = defininginstruction[defInstr2->defID + 1]; + deletepcode(defInstr2); + return 1; + } + } + } + + return 0; +} + +static int FRSP_STFS(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID]; + + if ( + defInstr->op == PC_FRSP && + !(masks[RegClass_FPR] & (1 << defInstr->args[0].data.reg.reg)) && + !(PCODE_FLAG_SET_F(defInstr) & fRecordBit) && + !definedbetween(instr, defInstr, &defInstr->args[1]) && + !usedbetween(instr, defInstr, &defInstr->args[0]) + ) + { + instr->args[0].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID] = defininginstruction[defInstr->defID + 1]; + deletepcode(defInstr); + return 1; + } + + return 0; +} + +static int NOT_AND(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 2]; + + if ( + defInstr->op == PC_NOT && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !(PCODE_FLAG_SET_F(defInstr) & fRecordBit) && + !definedbetween(instr, defInstr, &defInstr->args[1]) && + !usedbetween(instr, defInstr, &defInstr->args[0]) + ) + { + instr->args[2].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 2] = defininginstruction[defInstr->defID + 1]; + change_opcode(instr, PC_ANDC); + deletepcode(defInstr); + return 1; + } + + return 0; +} + +static int LI_MR(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + defInstr->op == PC_LI && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + !usedbetween(instr, defInstr, &defInstr->args[0]) + ) + { + change_opcode(instr, PC_LI); + instr->args[1] = defInstr->args[1]; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + deletepcode(defInstr); + return 1; + } + + return 0; +} + +static int VSPLTIS_VMR(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + short opcode = defInstr->op; + + if ( + (opcode == PC_VSPLTISB || opcode == PC_VSPLTISH || opcode == PC_VSPLTISW) && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_VR] & (1 << defInstr->args[0].data.reg.reg))) && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + !usedbetween(instr, defInstr, &defInstr->args[0]) + ) + { + change_opcode(instr, opcode); + instr->args[1] = defInstr->args[1]; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + deletepcode(defInstr); + return 1; + } + + return 0; +} + +static int L_MR(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + (defInstr->flags & fIsRead) && + !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg)) && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + !usedbetween(instr, defInstr, &instr->args[0]) && + !definedbetween(instr, defInstr, &instr->args[0]) + ) + { + defInstr->args[0].data.reg.reg = instr->args[0].data.reg.reg; + deletepcode(instr); + return 1; + } + + return 0; +} + +static int L_FMR(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + (defInstr->flags & fIsRead) && + !(masks[RegClass_FPR] & (1 << defInstr->args[0].data.reg.reg)) && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + !usedbetween(instr, defInstr, &instr->args[0]) && + !definedbetween(instr, defInstr, &instr->args[0]) + ) + { + defInstr->args[0].data.reg.reg = instr->args[0].data.reg.reg; + deletepcode(instr); + return 1; + } + + return 0; +} + +static int L_S(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID]; + SInt32 isIndexed; + SInt32 isFloat; + SInt32 isVector; + PCode *scan; + SInt32 loadSize; + SInt32 defInstrOffset; + SInt32 scanOffset; + SInt32 storeSize; + + if (PCODE_FLAG_SET_F(instr) & (fIsVolatile | fSideEffects | fUpdatesPtr)) + return 0; + if (PCODE_FLAG_SET_F(defInstr) & (fIsVolatile | fSideEffects | fUpdatesPtr)) + return 0; + + if ( + (defInstr->flags & fIsRead) && + PC_OP_IS_REGISTER(&defInstr->args[1], RegClass_GPR, instr->args[1].data.reg.reg) && + defInstr->args[2].kind == instr->args[2].kind && + !definedbetween(instr, defInstr, &instr->args[1]) + ) + { + if (instr->args[2].kind == PCOp_IMMEDIATE) { + if (instr->args[2].data.imm.value != defInstr->args[2].data.imm.value) + return 0; + } else if (instr->args[2].kind == PCOp_MEMORY) { + if (instr->args[2].data.mem.offset != defInstr->args[2].data.mem.offset || + instr->args[2].data.mem.obj != defInstr->args[2].data.mem.obj) + return 0; + } else if (instr->args[2].kind == PCOp_REGISTER && instr->args[2].arg == RegClass_GPR) { + if (instr->args[2].data.reg.reg != defInstr->args[2].data.reg.reg || + definedbetween(instr, defInstr, &instr->args[2])) + return 0; + } else { + return 0; + } + + isIndexed = 0; + isFloat = 0; + isVector = 0; + switch (defInstr->op) { + case PC_LBZX: + isIndexed = 1; + case PC_LBZ: + loadSize = 1; + break; + case PC_LHZX: + case PC_LHAX: + isIndexed = 1; + case PC_LHZ: + case PC_LHA: + loadSize = 2; + break; + case PC_LWZX: + isIndexed = 1; + case PC_LWZ: + loadSize = 4; + break; + case PC_LFSX: + isIndexed = 1; + case PC_LFS: + isFloat = 1; + loadSize = 4; + break; + case PC_LFDX: + isIndexed = 1; + case PC_LFD: + isFloat = 1; + loadSize = 8; + break; + case PC_LVX: + case PC_LVXL: + isIndexed = 1; + isVector = 1; + loadSize = 16; + break; + default: + return 0; + } + + switch (instr->op) { + case PC_STBX: + if (!isIndexed) return 0; + case PC_STB: + if (isFloat) return 0; + if (loadSize != 1) return 0; + break; + case PC_STHX: + if (!isIndexed) return 0; + case PC_STH: + if (isFloat) return 0; + if (loadSize != 2) return 0; + break; + case PC_STWX: + if (!isIndexed) return 0; + case PC_STW: + if (isFloat) return 0; + if (loadSize != 4) return 0; + break; + case PC_STFSX: + if (!isIndexed) return 0; + case PC_STFS: + if (!isFloat) return 0; + if (loadSize != 4) return 0; + break; + case PC_STFDX: + if (!isIndexed) return 0; + case PC_STFD: + if (!isFloat) return 0; + if (loadSize != 8) return 0; + break; + case PC_STVX: + case PC_STVXL: + if (!isIndexed) return 0; + if (!isVector) return 0; + if (loadSize != 16) return 0; + break; + default: + return 0; + } + + for (scan = instr->prevPCode; scan && scan != defInstr; scan = scan->prevPCode) { + if (scan->flags & fIsWrite) { + if (scan->args[1].data.reg.reg != instr->args[1].data.reg.reg) + return 0; + if (scan->args[2].kind != defInstr->args[2].kind) + return 0; + + if (scan->args[2].kind == PCOp_MEMORY) { + if (instr->args[2].data.mem.obj == scan->args[2].data.mem.obj) { + if (instr->args[2].data.mem.offset == defInstr->args[2].data.mem.offset) + return 0; + defInstrOffset = defInstr->args[2].data.mem.offset; + scanOffset = scan->args[2].data.mem.offset; + } + } else if (scan->args[2].kind == PCOp_IMMEDIATE) { + if (instr->args[1].data.reg.reg != scan->args[1].data.reg.reg) + return 0; + if (instr->args[2].data.imm.value == scan->args[2].data.imm.value) + return 0; + defInstrOffset = defInstr->args[2].data.imm.value; + scanOffset = scan->args[2].data.imm.value; + } else { + return 0; + } + + switch (scan->op) { + case PC_STB: + case PC_STBX: + storeSize = 1; + break; + case PC_STH: + case PC_STHX: + storeSize = 2; + break; + case PC_STW: + case PC_STWX: + case PC_STFS: + case PC_STFSX: + storeSize = 4; + break; + case PC_STFD: + case PC_STFDX: + storeSize = 8; + break; + case PC_STVX: + case PC_STVXL: + storeSize = 16; + break; + default: + return 0; + } + + if (defInstrOffset > scanOffset) { + if ((scanOffset + storeSize) > defInstrOffset) + return 0; + } else { + if ((defInstrOffset + loadSize) > scanOffset) + return 0; + } + } + } + + deletepcode(instr); + return 1; + } + + return 0; +} + +static int RLWINM_RLWINM(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + short start; + short end; + + if ( + defInstr->op == PC_RLWINM && + !(PCODE_FLAG_SET_F(defInstr) & fRecordBit) && + !definedbetween(instr, defInstr, &defInstr->args[1]) && + !( + defInstr->args[0].data.reg.reg == defInstr->args[1].data.reg.reg && + ( + (defInstr->args[0].data.reg.reg != instr->args[0].data.reg.reg && (masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) || + usedbetween(instr, defInstr, &defInstr->args[0]) + ) + ) && + canmergemasks( + defInstr->args[3].data.imm.value, + defInstr->args[4].data.imm.value, + instr->args[2].data.imm.value, + instr->args[3].data.imm.value, + instr->args[4].data.imm.value, + &start, &end) + ) + { + if (instr->op == PC_RLWIMI) { + if (instr->args[0].data.reg.reg == defInstr->args[0].data.reg.reg) + return 0; + if (start != instr->args[3].data.imm.value || end != instr->args[4].data.imm.value) + return 0; + } + + instr->args[1].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + + instr->args[2].data.imm.value = (instr->args[2].data.imm.value + defInstr->args[2].data.imm.value) & 31; + instr->args[3].data.imm.value = start; + instr->args[4].data.imm.value = end; + + if ( + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !usedbetween(instr, defInstr, &defInstr->args[0]) + ) + deletepcode(defInstr); + + return 1; + } + + if ( + defInstr->op == PC_MR && + instr->op == PC_RLWINM && + !definedbetween(instr, defInstr, &defInstr->args[1]) + ) + { + instr->args[1].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + + if ( + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !(PCODE_FLAG_SET_F(defInstr) & fRecordBit) && + !usedbetween(instr, defInstr, &defInstr->args[0]) + ) + deletepcode(defInstr); + + return 1; + } + + return 0; +} + +static int MULLI_MULLI(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + defInstr->op == PC_MULLI && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !(PCODE_FLAG_SET_F(defInstr) & fRecordBit) && + !definedbetween(instr, defInstr, &defInstr->args[1]) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + FITS_IN_SHORT(instr->args[2].data.imm.value * defInstr->args[2].data.imm.value) + ) + { + instr->args[1].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + + instr->args[2].data.imm.value *= defInstr->args[2].data.imm.value; + deletepcode(defInstr); + return 1; + } + + return 0; +} + +static int ADDI_ADDI(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + defInstr->op == PC_ADDI && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !(PCODE_FLAG_SET_F(defInstr) & fRecordBit) && + !definedbetween(instr, defInstr, &defInstr->args[1]) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + instr->args[2].kind == PCOp_IMMEDIATE && + defInstr->args[2].kind == PCOp_IMMEDIATE && + FITS_IN_SHORT(instr->args[2].data.imm.value + defInstr->args[2].data.imm.value) + ) + { + instr->args[1].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + + instr->args[2].data.imm.value += defInstr->args[2].data.imm.value; + deletepcode(defInstr); + return 1; + } + + return 0; +} + +static int SRAWI_SRAWI(PCode *instr, UInt32 *masks) { + PCode *defInstr = defininginstruction[instr->defID + 1]; + + if ( + defInstr->op == PC_SRAWI && + (defInstr->args[0].data.reg.reg == instr->args[0].data.reg.reg || !(masks[RegClass_GPR] & (1 << defInstr->args[0].data.reg.reg))) && + !(PCODE_FLAG_SET_F(defInstr) & fRecordBit) && + !definedbetween(instr, defInstr, &defInstr->args[1]) && + !usedbetween(instr, defInstr, &defInstr->args[0]) && + instr->args[2].kind == PCOp_IMMEDIATE && + defInstr->args[2].kind == PCOp_IMMEDIATE && + (instr->args[2].data.imm.value + defInstr->args[2].data.imm.value) < 32 && + (instr->args[2].data.imm.value + defInstr->args[2].data.imm.value) > 0 + ) + { + instr->args[1].data.reg.reg = defInstr->args[1].data.reg.reg; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr->defID + 1]; + + instr->args[2].data.imm.value += defInstr->args[2].data.imm.value; + deletepcode(defInstr); + return 1; + } + + return 0; +} + +static int MR_ADDI(PCode *instr, UInt32 *masks) { + PCode *prev = instr->prevPCode; + PCode *next = instr->nextPCode; + int prevFlag = 0; + int nextFlag = 0; + + if (copts.processor == CPU_PPC603e) { + if (prev) + prevFlag = (prev->flags & fOpTypeMask) == fOpTypeGPR; + if (next) + nextFlag = (next->flags & fOpTypeMask) == fOpTypeGPR; + + if ( + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + instr->argCount >= 2 && + instr->args[1].data.reg.reg != 0 && + (prevFlag || nextFlag) + ) + { + change_opcode(instr, PC_ADDI); + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = 0; + instr->args[2].data.imm.obj = NULL; + change_num_operands(instr, 3); + } + } + + return 0; +} + +static int rotatedefinedusedtest(UInt32 *masks, PCode *instr, PCode *a, PCode *b, PCode *subfic) { + PCode *scan; + PCodeArg *op; + int i; + int reg1; + int reg2; + + if ( + (masks[RegClass_GPR] & (1 << subfic->args[0].data.reg.reg)) && + subfic->args[0].data.reg.reg != instr->args[0].data.reg.reg && + subfic->args[0].data.reg.reg != a->args[2].data.reg.reg + ) + return 1; + + for (scan = instr->block->firstPCode; scan != instr->block->lastPCode; scan = scan->nextPCode) { + if (scan == a) break; + if (scan == b) break; + if (scan == subfic) break; + } + + reg1 = a->args[1].data.reg.reg; + reg2 = subfic->args[1].data.reg.reg; + while (scan != instr) { + for (op = instr->args, i = instr->argCount; i--; op++) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + ( + ( + (op->data.reg.reg == reg1 || op->data.reg.reg == reg2) && + (op->data.reg.effect & EffectWrite) + ) + || + ( + op->data.reg.reg == subfic->args[0].data.reg.reg && (op->data.reg.effect & EffectRead) + ) + ) && + scan != a && + scan != b && + scan != subfic + ) + return 1; + } + + scan = scan->nextPCode; + } + + return 0; +} + +static int SRW_SUBFIC_RLW_OR(PCode *instr, UInt32 *masks) { + PCode *subfic; + PCode *defInstr1 = defininginstruction[instr->defID + 1]; + PCode *defInstr2 = defininginstruction[instr->defID + 2]; + + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + return 0; + + if ( + (masks[RegClass_GPR] & (1 << instr->args[1].data.reg.reg)) && + instr->args[1].data.reg.reg != instr->args[0].data.reg.reg + ) + return 0; + + if ( + (masks[RegClass_GPR] & (1 << instr->args[2].data.reg.reg)) && + instr->args[1].data.reg.reg != instr->args[0].data.reg.reg + ) + return 0; + + if (defInstr1->op != PC_SRW && defInstr1->op != PC_SLW) + return 0; + if (defInstr2->op != PC_SRW && defInstr2->op != PC_SLW) + return 0; + + if (usedbetween(instr, defInstr1, &defInstr1->args[0])) + return 0; + if (usedbetween(instr, defInstr2, &defInstr2->args[0])) + return 0; + + if ( + defInstr1->op == PC_SRW && defInstr2->op == PC_SLW && + defInstr1->args[1].data.reg.reg == defInstr2->args[1].data.reg.reg + ) + { + subfic = defininginstruction[defInstr1->defID + 2]; + if ( + subfic->op == PC_SUBFIC && + subfic->args[1].data.reg.reg == defInstr2->args[2].data.reg.reg && + subfic->args[2].data.imm.value == 32 + ) + { + if (rotatedefinedusedtest(masks, instr, defInstr2, defInstr1, subfic)) + return 0; + + change_opcode(instr, PC_RLWNM); + + instr->args[1] = defInstr1->args[1]; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr1->defID + 1]; + + instr->args[2] = defInstr2->args[2]; + defininginstruction[instr->defID + 2] = defininginstruction[defInstr2->defID + 2]; + + instr->args[3].kind = PCOp_IMMEDIATE; + instr->args[3].data.imm.value = 0; + instr->args[3].data.imm.obj = NULL; + + instr->args[4].kind = PCOp_IMMEDIATE; + instr->args[4].data.imm.value = 31; + instr->args[4].data.imm.obj = NULL; + + deletepcode(defInstr1); + deletepcode(defInstr2); + deletepcode(subfic); + return 1; + } + + subfic = defininginstruction[defInstr2->defID + 2]; + if ( + subfic->op == PC_SUBFIC && + subfic->args[1].data.reg.reg == defInstr1->args[2].data.reg.reg && + subfic->args[2].data.imm.value == 32 + ) + { + if (rotatedefinedusedtest(masks, instr, defInstr2, defInstr1, subfic)) + return 0; + + change_opcode(instr, PC_RLWNM); + + instr->args[1] = defInstr1->args[1]; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr1->defID + 1]; + + instr->args[2] = defInstr2->args[2]; + defininginstruction[instr->defID + 2] = defininginstruction[defInstr2->defID + 2]; + + instr->args[3].kind = PCOp_IMMEDIATE; + instr->args[3].data.imm.value = 0; + instr->args[3].data.imm.obj = NULL; + + instr->args[4].kind = PCOp_IMMEDIATE; + instr->args[4].data.imm.value = 31; + instr->args[4].data.imm.obj = NULL; + + deletepcode(defInstr1); + deletepcode(defInstr2); + return 1; + } + } else if ( + defInstr1->op == PC_SLW && defInstr2->op == PC_SRW && + defInstr1->args[1].data.reg.reg == defInstr2->args[1].data.reg.reg + ) + { + subfic = defininginstruction[defInstr1->defID + 2]; + if ( + subfic->op == PC_SUBFIC && + subfic->args[1].data.reg.reg == defInstr2->args[2].data.reg.reg && + subfic->args[2].data.imm.value == 32 + ) + { + if (rotatedefinedusedtest(masks, instr, defInstr1, defInstr2, subfic)) + return 0; + + change_opcode(instr, PC_RLWNM); + + instr->args[1] = defInstr1->args[1]; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr1->defID + 1]; + + instr->args[2] = defInstr1->args[2]; + defininginstruction[instr->defID + 2] = defininginstruction[defInstr1->defID + 2]; + + instr->args[3].kind = PCOp_IMMEDIATE; + instr->args[3].data.imm.value = 0; + instr->args[3].data.imm.obj = NULL; + + instr->args[4].kind = PCOp_IMMEDIATE; + instr->args[4].data.imm.value = 31; + instr->args[4].data.imm.obj = NULL; + + deletepcode(defInstr1); + deletepcode(defInstr2); + return 1; + } + + subfic = defininginstruction[defInstr2->defID + 2]; + if ( + subfic->op == PC_SUBFIC && + subfic->args[1].data.reg.reg == defInstr1->args[2].data.reg.reg && + subfic->args[2].data.imm.value == 32 + ) + { + if (rotatedefinedusedtest(masks, instr, defInstr1, defInstr2, subfic)) + return 0; + + change_opcode(instr, PC_RLWNM); + + instr->args[1] = defInstr1->args[1]; + defininginstruction[instr->defID + 1] = defininginstruction[defInstr1->defID + 1]; + + instr->args[2] = defInstr1->args[2]; + defininginstruction[instr->defID + 2] = defininginstruction[defInstr1->defID + 2]; + + instr->args[3].kind = PCOp_IMMEDIATE; + instr->args[3].data.imm.value = 0; + instr->args[3].data.imm.obj = NULL; + + instr->args[4].kind = PCOp_IMMEDIATE; + instr->args[4].data.imm.value = 31; + instr->args[4].data.imm.obj = NULL; + + deletepcode(defInstr1); + deletepcode(defInstr2); + deletepcode(subfic); + return 1; + } + } + + return 0; +} + +static int RLWINM_RLWIMI_STW(PCode *instr, UInt32 *masks) { + PCode *newInstr; + Boolean isZeroOffset; + int flags; + PCode *scan; + int i; + PCode *array[4]; + + flags = 0; + isZeroOffset = 0; + if (instr->op == PC_STW && instr->args[2].kind == PCOp_IMMEDIATE && instr->args[2].data.imm.value == 0) + isZeroOffset = 1; + + scan = instr; + for (i = 0; i < 4; i++) { + if (scan->op == PC_RLWINM) + array[i] = defininginstruction[scan->defID + 1]; + else + array[i] = defininginstruction[scan->defID]; + + scan = array[i]; + if (array[0]->args[1].data.reg.reg != scan->args[1].data.reg.reg) + return 0; + + if (i < 3) { + if (scan->op != PC_RLWIMI) + return 0; + } else { + if (scan->op != PC_RLWINM) + return 0; + } + + if (scan->args[2].data.imm.value == 8) { + if (scan->args[3].data.imm.value == 24 && scan->args[4].data.imm.value == 31) { + if (flags & 1) + return 0; + flags |= 1; + } else if (scan->args[3].data.imm.value == 8 && scan->args[4].data.imm.value == 15) { + if (flags & 4) + return 0; + flags |= 4; + } else { + return 0; + } + } else if (scan->args[2].data.imm.value == 24) { + if (scan->args[3].data.imm.value == 0 && scan->args[4].data.imm.value == 7) { + if (flags & 8) + return 0; + flags |= 8; + } else if (scan->args[3].data.imm.value == 16 && scan->args[4].data.imm.value == 23) { + if (flags & 2) + return 0; + flags |= 2; + } else { + return 0; + } + } else { + return 0; + } + } + + if (definedbetween(instr, array[3], &array[0]->args[1])) + return 0; + + if (instr->op == PC_STWX) { + change_opcode(instr, PC_STWBRX); + instr->args[0] = array[0]->args[1]; + defininginstruction[instr->defID] = defininginstruction[array[3]->defID + 1]; + return 1; + } + + if (instr->op == PC_STW) { + if (!isZeroOffset) { + if (masks[RegClass_GPR] & (1 << array[0]->args[0].data.reg.reg)) + return 0; + + if (usedbetween(array[2], array[3], &array[0]->args[0])) + return 0; + if (usedbetween(array[1], array[2], &array[0]->args[0])) + return 0; + if (usedbetween(array[0], array[1], &array[0]->args[0])) + return 0; + if (usedbetween(instr, array[0], &array[0]->args[0])) + return 0; + } + + defininginstruction[instr->defID] = defininginstruction[array[3]->defID + 1]; + + if (!isZeroOffset) { + newInstr = makepcode(PC_STWBRX, array[3]->args[1].data.reg.reg, 0, instr->args[0].data.reg.reg); + newInstr->alias = instr->alias; + change_opcode(instr, PC_ADDI); + insertpcodeafter(instr, newInstr); + + masks[RegClass_GPR] |= 1 << newInstr->args[0].data.reg.reg; + masks[RegClass_GPR] |= 1 << newInstr->args[2].data.reg.reg; + instr->args[0].data.reg.effect &= ~EffectRead; + instr->args[0].data.reg.effect |= EffectWrite; + defininginstruction[instr->defID] = instr; + + deletepcode(array[0]); + deletepcode(array[1]); + deletepcode(array[2]); + deletepcode(array[3]); + } else { + change_opcode(instr, PC_STWBRX); + instr->args[0] = array[0]->args[1]; + instr->args[2] = instr->args[1]; + defininginstruction[instr->defID + 2] = defininginstruction[instr->defID + 1]; + + instr->args[1].kind = PCOp_REGISTER; + instr->args[1].arg = RegClass_GPR; + instr->args[1].data.reg.reg = 0; + instr->args[1].data.reg.effect = 0; + } + + return 1; + } + + return 0; +} + +static int RLWINM_RLWIMI_STH(PCode *instr, UInt32 *masks) { + PCode *newInstr; + Boolean isZeroOffset; + int flags; + PCode *scan; + int i; + PCode *array[2]; + + flags = 0; + isZeroOffset = 0; + if (instr->op == PC_STH && instr->args[2].kind == PCOp_IMMEDIATE && instr->args[2].data.imm.value == 0) + isZeroOffset = 1; + + scan = instr; + for (i = 0; i < 2; i++) { + if (scan->op == PC_RLWINM) + array[i] = defininginstruction[scan->defID + 1]; + else + array[i] = defininginstruction[scan->defID]; + + scan = array[i]; + if (array[0]->args[1].data.reg.reg != scan->args[1].data.reg.reg) + return 0; + + if (i < 1) { + if (scan->op != PC_RLWIMI) + return 0; + } else { + if (scan->op != PC_RLWINM) + return 0; + } + + if (scan->args[2].data.imm.value == 8) { + if (scan->args[3].data.imm.value == 16 && scan->args[4].data.imm.value == 23) { + if (flags & 2) + return 0; + flags |= 2; + } else { + return 0; + } + } else if (scan->args[2].data.imm.value == 24) { + if (scan->args[3].data.imm.value == 24 && scan->args[4].data.imm.value == 31) { + if (flags & 1) + return 0; + flags |= 1; + } else { + return 0; + } + } else { + return 0; + } + } + + if (definedbetween(instr, array[1], &array[0]->args[1])) + return 0; + + if (instr->op == PC_STHX) { + change_opcode(instr, PC_STHBRX); + instr->args[0] = array[0]->args[1]; + defininginstruction[instr->defID] = defininginstruction[array[1]->defID + 1]; + return 1; + } + + if (instr->op == PC_STH) { + if (!isZeroOffset) { + if (masks[RegClass_GPR] & (1 << array[0]->args[0].data.reg.reg)) + return 0; + + if (usedbetween(array[0], array[1], &array[0]->args[0])) + return 0; + if (usedbetween(instr, array[0], &array[0]->args[0])) + return 0; + } + + defininginstruction[instr->defID] = defininginstruction[array[1]->defID + 1]; + + if (!isZeroOffset) { + newInstr = makepcode(PC_STHBRX, array[1]->args[1].data.reg.reg, 0, instr->args[0].data.reg.reg); + newInstr->alias = instr->alias; + change_opcode(instr, PC_ADDI); + + instr->args[0].data.reg.effect &= ~EffectRead; + instr->args[0].data.reg.effect |= EffectWrite; + defininginstruction[instr->defID] = instr; + + insertpcodeafter(instr, newInstr); + + masks[RegClass_GPR] |= 1 << newInstr->args[0].data.reg.reg; + masks[RegClass_GPR] |= 1 << newInstr->args[2].data.reg.reg; + + deletepcode(array[0]); + deletepcode(array[1]); + } else { + change_opcode(instr, PC_STHBRX); + instr->args[0] = array[0]->args[1]; + instr->args[2] = instr->args[1]; + defininginstruction[instr->defID + 2] = defininginstruction[instr->defID + 1]; + + instr->args[1].kind = PCOp_REGISTER; + instr->args[1].arg = RegClass_GPR; + instr->args[1].data.reg.reg = 0; + instr->args[1].data.reg.effect = 0; + } + + return 1; + } + + return 0; +} + +static void peepholeoptimizeblock(PCodeBlock *block) { + RegClass rclass; + PCode *instr; + PCodeArg *op; + int i; + Pattern *pattern; + UInt32 masks[RegClassMax]; + + for (rclass = 0; rclass < RegClassMax; rclass++) + masks[rclass] = liveregs[rclass][block->blockIndex].xC; + + for (instr = block->lastPCode; instr; instr = instr->prevPCode) { + if (dead(instr, masks)) { + deletepcode(instr); + } else { + pattern = peepholepatterns[instr->op]; + + while (pattern) { + if (pattern->func(instr, masks)) { + if (!instr->block) + break; + pattern = peepholepatterns[instr->op]; + } else { + pattern = pattern->next; + } + } + + if (instr->block) { + for (op = instr->args, i = instr->argCount; i--; op++) { + if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite)) + masks[op->arg] &= ~(1 << op->data.reg.reg); + } + + for (op = instr->args, i = instr->argCount; i--; op++) { + if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectRead)) + masks[op->arg] |= 1 << op->data.reg.reg; + } + } + } + } +} + +static SInt32 computepossiblemask(PCode *instr, short reg) { + SInt32 mask; + SInt32 val; + PCodeArg *op; + int i; + + mask = 0xFFFFFFFF; + while (instr) { + for (op = instr->args, i = instr->argCount; i--; op++) { + if (PC_OP_IS_WRITE_REGISTER(op, RegClass_GPR, reg)) { + switch (instr->op) { + case PC_LBZ: + case PC_LBZU: + case PC_LBZX: + case PC_LBZUX: + mask = 0xFF; + break; + + case PC_LHZ: + case PC_LHZU: + case PC_LHZX: + case PC_LHZUX: + mask = 0xFFFF; + break; + + case PC_LI: + mask = instr->args[1].data.imm.value; + break; + + case PC_SRAWI: + mask = computepossiblemask(instr->prevPCode, instr->args[1].data.reg.reg) >> instr->args[2].data.imm.value; + break; + + case PC_RLWINM: + val = computepossiblemask(instr->prevPCode, instr->args[1].data.reg.reg); + mask = (val << instr->args[2].data.imm.value) | ((UInt32) val >> (32 - instr->args[2].data.imm.value)); + + if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) + val = ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> instr->args[3].data.imm.value)) & ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (instr->args[4].data.imm.value + 1))); + else + val = ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> instr->args[3].data.imm.value)) | ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (instr->args[4].data.imm.value + 1))); + + mask &= val; + break; + + case PC_RLWIMI: + val = computepossiblemask(instr->prevPCode, instr->args[1].data.reg.reg); + mask = (val << instr->args[2].data.imm.value) | ((UInt32) val >> (32 - instr->args[2].data.imm.value)); + + if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) + val = ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> instr->args[3].data.imm.value)) & ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (instr->args[4].data.imm.value + 1))); + else + val = ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> instr->args[3].data.imm.value)) | ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (instr->args[4].data.imm.value + 1))); + + mask &= val; + mask |= computepossiblemask(instr->prevPCode, instr->args[0].data.reg.reg); + break; + + case PC_OR: + mask = computepossiblemask(instr->prevPCode, instr->args[1].data.reg.reg) | + computepossiblemask(instr->prevPCode, instr->args[2].data.reg.reg); + break; + + case PC_ORI: + mask = instr->args[2].data.imm.value | + computepossiblemask(instr->prevPCode, instr->args[1].data.reg.reg); + break; + + case PC_AND: + mask = computepossiblemask(instr->prevPCode, instr->args[1].data.reg.reg) & + computepossiblemask(instr->prevPCode, instr->args[2].data.reg.reg); + break; + + case PC_ANDI: + mask = instr->args[2].data.imm.value & + computepossiblemask(instr->prevPCode, instr->args[1].data.reg.reg); + break; + + case PC_MR: + mask = computepossiblemask(instr->prevPCode, instr->args[1].data.reg.reg); + break; + } + + return mask; + } + } + instr = instr->prevPCode; + } + + return mask; +} + +static UInt32 fillmaskholes(UInt32 mask) { + UInt32 oneBit; + UInt32 allBits; + UInt32 result; + + oneBit = 1; + allBits = 0xFFFFFFFF; + result = 0; + + if ((mask & 1) && (mask & 0x80000000)) { + result = 0xFFFFFFFF; + while ((mask & oneBit) == 1) { + oneBit <<= 1; + } + + while ((mask & oneBit) == 0) { + result &= ~oneBit; + oneBit <<= 1; + } + + return result; + } else { + while ((mask & oneBit) == 0 && (mask & allBits) != 0) { + oneBit <<= 1; + allBits <<= 1; + } + while ((mask & allBits) != 0) { + result |= oneBit; + oneBit <<= 1; + allBits <<= 1; + } + return result; + } +} + +static int canuseinsert(PCode *instr1, PCode *instr2, short reg) { + if (computepossiblemask(instr2, reg) & fillmaskholes(computepossiblemask(instr1, instr1->args[0].data.reg.reg))) + return 0; + return 1; +} + +static PCode *find_def_backwords(PCode *instr, short reg) { + int i; + + while (instr) { + for (i = 0; i < instr->argCount; i++) { + if (PC_OP_IS_WRITE_REGISTER(&instr->args[i], RegClass_GPR, reg)) + return instr; + } + instr = instr->prevPCode; + } + + return NULL; +} + +static void adjustforward(PCodeBlock *block) { + PCode *instr; + PCode *scan; + PCode *tmp; + PCodeArg *op; + int i; + short opcode; + short reg0; + short reg1; + SInt32 valA; + SInt32 valB; + + instr = block->firstPCode; + while (instr) { + if (instr->op == PC_RLWINM) { + SInt32 val2; + SInt32 val3; + SInt32 val4; + short start; + short end; + + short flag1 = 0; + short flag2 = 0; + reg0 = instr->args[0].data.reg.reg; + reg1 = instr->args[1].data.reg.reg; + val2 = instr->args[2].data.imm.value; + val3 = instr->args[3].data.imm.value; + val4 = instr->args[4].data.imm.value; + + for (scan = instr->nextPCode; scan; scan = scan->nextPCode) { + opcode = scan->op; + if (opcode == PC_RLWINM && scan->args[1].data.reg.reg == reg0) { + if ( + scan->args[3].data.imm.value == val3 && + scan->args[4].data.imm.value == val4 && + scan->args[2].data.imm.value == 0 + ) + { + if (PCODE_FLAG_SET_F(scan) & fRecordBit) { + if (!flag1) { + pcsetrecordbit(instr); + change_opcode(scan, PC_MR); + scan->flags &= ~fRecordBit; + scan->flags |= fIsMove; + change_num_operands(scan, 2); + } else { + change_opcode(scan, PC_MR); + scan->args[2] = scan->args[5]; + change_num_operands(scan, 3); + } + } else { + change_opcode(scan, PC_MR); + change_num_operands(scan, 2); + } + } + else if ( + reg0 != reg1 && + !flag2 && + canmergemasks( + val3, + val4, + scan->args[2].data.imm.value, + scan->args[3].data.imm.value, + scan->args[4].data.imm.value, + &start, &end) + ) + { + scan->args[1].data.reg.reg = reg1; + scan->args[2].data.imm.value = (scan->args[2].data.imm.value + instr->args[2].data.imm.value) & 31; + scan->args[3].data.imm.value = start; + scan->args[4].data.imm.value = end; + } + } + else if ( + opcode == PC_SRAWI && + scan->args[1].data.reg.reg == reg0 && + reg0 != reg1 && + instr->args[2].data.imm.value == 0 && + !(computepossiblemask(instr, reg0) & 0x80000000) && + !flag2 && + canmergemasks(val3, val4, 32 - scan->args[2].data.imm.value, scan->args[2].data.imm.value, 31, &start, &end) && + !isSPRlive(scan, 0) + ) + { + insertpcodeafter(scan, makepcode( + PC_RLWINM, scan->args[0].data.reg.reg, reg1, + 32 - scan->args[2].data.imm.value, start, end + )); + if (PCODE_FLAG_SET_F(scan) & fRecordBit) + pcsetrecordbit(scan->nextPCode); + deletepcode(scan); + } + else if ( + opcode == PC_OR && + !flag2 && + reg0 != reg1 && + !(PCODE_FLAG_SET_F(scan) & fRecordBit) && + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + scan->args[0].data.reg.reg != instr->args[1].data.reg.reg + ) + { + if (scan->args[1].data.reg.reg == reg0 && canuseinsert(instr, scan, scan->args[2].data.reg.reg)) { + op = &scan->args[2]; + tmp = find_def_backwords(scan->prevPCode, scan->args[2].data.reg.reg); + if (tmp->op == PC_RLWINM && tmp->args[2].data.imm.value == 0) { + if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) + valA = ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> instr->args[3].data.imm.value)) & ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (instr->args[4].data.imm.value + 1))); + else + valA = ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> instr->args[3].data.imm.value)) | ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (instr->args[4].data.imm.value + 1))); + + if (tmp->args[3].data.imm.value <= tmp->args[4].data.imm.value) + valB = ((tmp->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> tmp->args[3].data.imm.value)) & ~(((tmp->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (tmp->args[4].data.imm.value + 1))); + else + valB = ((tmp->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> tmp->args[3].data.imm.value)) | ~(((tmp->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (tmp->args[4].data.imm.value + 1))); + + if (valA == ~valB) + op = &tmp->args[1]; + } + + change_opcode(scan, PC_MR); + scan->args[1] = *op; + change_num_operands(scan, 2); + + tmp = copypcode(instr); + change_opcode(tmp, PC_RLWIMI); + tmp->args[0] = scan->args[0]; + tmp->args[0].data.reg.effect |= EffectRead; + + if (ismaskconstant(fillmaskholes(computepossiblemask(instr, instr->args[0].data.reg.reg)), &start, &end)) { + tmp->args[3].data.imm.value = start; + tmp->args[4].data.imm.value = end; + } + + insertpcodeafter(scan, tmp); + break; + } + + if ( + scan->args[2].data.reg.reg == reg0 && + canuseinsert(instr, scan, scan->args[1].data.reg.reg) + ) + { + op = &scan->args[1]; + tmp = find_def_backwords(scan->prevPCode, scan->args[1].data.reg.reg); + if (tmp->op == PC_RLWINM && tmp->args[2].data.imm.value == 0) { + if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) + valA = ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> instr->args[3].data.imm.value)) & ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (instr->args[4].data.imm.value + 1))); + else + valA = ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> instr->args[3].data.imm.value)) | ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (instr->args[4].data.imm.value + 1))); + + if (tmp->args[3].data.imm.value <= tmp->args[4].data.imm.value) + valB = ((tmp->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> tmp->args[3].data.imm.value)) & ~(((tmp->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (tmp->args[4].data.imm.value + 1))); + else + valB = ((tmp->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFu >> tmp->args[3].data.imm.value)) | ~(((tmp->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFu >> (tmp->args[4].data.imm.value + 1))); + + if (valA == ~valB) + op = &tmp->args[1]; + } + + change_opcode(scan, PC_MR); + scan->args[1] = *op; + change_num_operands(scan, 2); + + tmp = copypcode(instr); + change_opcode(tmp, PC_RLWIMI); + tmp->args[0] = scan->args[0]; + tmp->args[0].data.reg.effect |= EffectRead; + + if (ismaskconstant(fillmaskholes(computepossiblemask(instr, instr->args[0].data.reg.reg)), &start, &end)) { + tmp->args[3].data.imm.value = start; + tmp->args[4].data.imm.value = end; + } + + insertpcodeafter(scan, tmp); + break; + } + } + else if ( + !flag2 && + reg0 != reg1 && + val4 == 31 && + instr->args[2].data.imm.value == 0 && + ( + ((opcode == PC_STB || opcode == PC_STBX) && val3 <= 24) || + ((opcode == PC_STH || opcode == PC_STHX) && val3 <= 16) + ) && + scan->args[0].data.reg.reg == reg0 + ) + { + scan->args[0].data.reg.reg = reg1; + } + else if ( + opcode == PC_EXTSH && + scan->args[1].data.reg.reg == reg0 && + val2 == 0 && + val3 > 16 && + val3 < val4 + ) + { + change_opcode(scan, PC_MR); + if ((PCODE_FLAG_SET_F(scan) & fRecordBit) && !flag1) { + pcsetrecordbit(instr); + scan->flags &= ~fRecordBit; + scan->flags |= fIsMove; + change_num_operands(scan, 2); + } + } + else if ( + opcode == PC_EXTSB && + scan->args[1].data.reg.reg == reg0 && + val2 == 0 && + val3 > 24 && + val3 < val4 + ) + { + change_opcode(scan, PC_MR); + if ((PCODE_FLAG_SET_F(scan) & fRecordBit) && !flag1) { + pcsetrecordbit(instr); + scan->flags &= ~fRecordBit; + scan->flags |= fIsMove; + change_num_operands(scan, 2); + } + } + + for (op = scan->args, i = scan->argCount; i--; op++) { + if (PC_OP_IS_WRITE_REGISTER(op, RegClass_GPR, reg0)) { + scan = block->lastPCode; + break; + } + + if (PC_OP_IS_WRITE_REGISTER(op, RegClass_GPR, reg1)) + flag2 = 1; + + if (PC_OP_IS_REGISTER(op, RegClass_CRFIELD, 0)) + flag1 = 1; + } + } + + } else if ( + instr->op == PC_EXTSB && + (reg0 = instr->args[0].data.reg.reg) != (reg1 = instr->args[1].data.reg.reg) + ) + { + short flag = 0; + for (scan = instr->nextPCode; scan; scan = scan->nextPCode) { + if ( + (scan->op >= PC_STB && scan->op <= PC_STBUX) && + scan->args[0].data.reg.reg == reg0 + ) + { + scan->args[0].data.reg.reg = reg1; + } + else if ( + (scan->op == PC_EXTSH || scan->op == PC_EXTSB) && + scan->args[1].data.reg.reg == reg0 + ) + { + change_opcode(scan, PC_MR); + if ((PCODE_FLAG_SET_F(scan) & fRecordBit) && !flag) { + pcsetrecordbit(instr); + scan->flags &= ~fRecordBit; + scan->flags |= fIsMove; + change_num_operands(scan, 2); + } + } + + for (op = scan->args, i = scan->argCount; i--; op++) { + if ( + PC_OP_IS_WRITE_ANY_REGISTER(op, RegClass_GPR) && + (op->data.reg.reg == reg0 || op->data.reg.reg == reg1) + ) + { + scan = block->lastPCode; + break; + } + + if (PC_OP_IS_REGISTER(op, RegClass_CRFIELD, 0)) + flag = 1; + } + } + } else if ( + instr->op == PC_EXTSH && + (reg0 = instr->args[0].data.reg.reg) != (reg1 = instr->args[1].data.reg.reg) + ) + { + short flag = 0; + for (scan = instr->nextPCode; scan; scan = scan->nextPCode) { + if ( + ((scan->op >= PC_STB && scan->op <= PC_STBUX) || (scan->op >= PC_STH && scan->op <= PC_STHUX)) && + scan->args[0].data.reg.reg == reg0 + ) + { + scan->args[0].data.reg.reg = reg1; + } + else if (scan->op == PC_EXTSH && scan->args[1].data.reg.reg == reg0) + { + change_opcode(scan, PC_MR); + if ((PCODE_FLAG_SET_F(scan) & fRecordBit) && !flag) { + pcsetrecordbit(instr); + scan->flags &= ~fRecordBit; + scan->flags |= fIsMove; + change_num_operands(scan, 2); + } + } + + for (op = scan->args, i = scan->argCount; i--; op++) { + if ( + PC_OP_IS_WRITE_ANY_REGISTER(op, RegClass_GPR) && + (op->data.reg.reg == reg0 || op->data.reg.reg == reg1) + ) + { + scan = block->lastPCode; + break; + } + + if (PC_OP_IS_REGISTER(op, RegClass_CRFIELD, 0)) + flag = 1; + } + } + } else if ( + instr->op == PC_ADDI && + (reg0 = instr->args[0].data.reg.reg) == (reg1 = instr->args[1].data.reg.reg) && + instr->args[2].kind == PCOp_IMMEDIATE + ) + { + Boolean flag1 = 0; + Boolean flag2 = 0; + SInt32 val2 = instr->args[2].data.imm.value; + + for (scan = instr->nextPCode; scan; scan = scan->nextPCode) { + if ((scan->flags & fIsWrite) && scan->args[0].data.reg.reg == reg0) + break; + + if ( + (scan->flags & (fIsRead | fIsWrite)) && + scan->args[1].data.reg.reg == reg0 && + scan->args[2].kind == PCOp_IMMEDIATE && + FITS_IN_SHORT(val2 + scan->args[2].data.imm.value) + ) + { + scan->args[2].data.imm.value += val2; + tmp = instr->prevPCode; + if ( + (scan->flags & fIsRead) && + scan->args[0].data.reg.reg == reg0 && + scan->args[0].kind == PCOp_REGISTER && + scan->args[0].arg == RegClass_GPR + ) + { + deletepcode(instr); + } else { + deletepcode(instr); + insertpcodeafter(scan, instr); + } + instr = tmp; + break; + } + + if ( + scan->op == PC_ADDI && + scan->args[1].data.reg.reg == reg0 && + scan->args[2].kind == PCOp_IMMEDIATE && + FITS_IN_SHORT(val2 + scan->args[2].data.imm.value) + ) + { + scan->args[2].data.imm.value += val2; + tmp = instr->prevPCode; + if (scan->args[0].data.reg.reg == reg0) { + deletepcode(instr); + } else { + deletepcode(instr); + insertpcodeafter(scan, instr); + } + instr = tmp; + break; + } + + if (scan->flags & (fIsBranch | fIsCall)) { + if (flag1 && scan->prevPCode != instr) { + tmp = instr->prevPCode; + deletepcode(instr); + insertpcodebefore(scan, instr); + instr = tmp; + } + break; + } + + for (op = scan->args, i = scan->argCount; i--; op++) { + if (PC_OP_IS_R_OR_W_REGISTER(op, RegClass_GPR, reg0)) { + if (flag1 && scan->prevPCode != instr) { + tmp = instr->prevPCode; + deletepcode(instr); + insertpcodebefore(scan, instr); + instr = tmp; + } + flag2 = 1; + break; + } + } + + if (flag2) + break; + + if (scan->op != PC_ADDI) + flag1 = 1; + + if (flag1 && !scan->nextPCode) { + tmp = instr->prevPCode; + deletepcode(instr); + appendpcode(block, instr); + instr = tmp; + break; + } + } + } + + if (instr) + instr = instr->nextPCode; + else + instr = block->firstPCode; + } +} + +static void installpattern(Opcode opcode, PeepholeFunc func) { + Pattern *pattern = lalloc(sizeof(Pattern)); + pattern->func = func; + pattern->next = peepholepatterns[opcode]; + peepholepatterns[opcode] = pattern; +} + +static void installpeepholepatterns(void) { + int i; + + for (i = 0; i < OPCODE_MAX; i++) + peepholepatterns[i] = NULL; + + installpattern(PC_AND, NOT_AND); + installpattern(PC_MR, LI_MR); + installpattern(PC_MR, L_MR); + installpattern(PC_FMR, L_FMR); + installpattern(PC_MR, MR_MR); + installpattern(PC_MR, MR_Rx_Rx); + installpattern(PC_FMR, FMR_FMR); + installpattern(PC_FMR, FMR_Fx_Fx); + installpattern(PC_VMR, VMR_VMRP); + installpattern(PC_VMR, VMR_VMR); + installpattern(PC_VMR, VMR_Vx_Vx); + installpattern(PC_VMR, VSPLTIS_VMR); + installpattern(PC_CMPI, MR_CMPI); + installpattern(PC_RLWIMI, RLWINM_RLWINM); + installpattern(PC_RLWINM, RLWINM_RLWINM); + installpattern(PC_RLWINM, EXTSB_RLWINM); + installpattern(PC_RLWINM, EXTSH_RLWINM); + installpattern(PC_RLWINM, LBZ_RLWINM); + installpattern(PC_RLWINM, LHZ_RLWINM); + installpattern(PC_EXTSH, LHA_EXTSH); + installpattern(PC_STW, RLWINM_RLWIMI_STW); + installpattern(PC_STWX, RLWINM_RLWIMI_STW); + installpattern(PC_STH, RLWINM_RLWIMI_STH); + installpattern(PC_STHX, RLWINM_RLWIMI_STH); + installpattern(PC_LBZ, ADDI_L_S); + installpattern(PC_LHZ, ADDI_L_S); + installpattern(PC_LHA, ADDI_L_S); + installpattern(PC_LWZ, ADDI_L_S); + installpattern(PC_STB, ADDI_L_S); + installpattern(PC_STH, ADDI_L_S); + installpattern(PC_STW, ADDI_L_S); + installpattern(PC_LFS, ADDI_L_S); + installpattern(PC_LFD, ADDI_L_S); + installpattern(PC_STFS, ADDI_L_S); + installpattern(PC_STFD, ADDI_L_S); + installpattern(PC_LBZU, ADDI_LU_SU); + installpattern(PC_LHZU, ADDI_LU_SU); + installpattern(PC_LHAU, ADDI_LU_SU); + installpattern(PC_LWZU, ADDI_LU_SU); + installpattern(PC_STBU, ADDI_LU_SU); + installpattern(PC_STHU, ADDI_LU_SU); + installpattern(PC_STWU, ADDI_LU_SU); + installpattern(PC_LFSU, ADDI_LU_SU); + installpattern(PC_LFDU, ADDI_LU_SU); + installpattern(PC_STFSU, ADDI_LU_SU); + installpattern(PC_STFDU, ADDI_LU_SU); + installpattern(PC_LBZ, L_S_ADDI); + installpattern(PC_LHZ, L_S_ADDI); + installpattern(PC_LHA, L_S_ADDI); + installpattern(PC_LWZ, L_S_ADDI); + installpattern(PC_STB, L_S_ADDI); + installpattern(PC_STH, L_S_ADDI); + installpattern(PC_STW, L_S_ADDI); + installpattern(PC_LFS, L_S_ADDI); + installpattern(PC_LFD, L_S_ADDI); + installpattern(PC_STFS, L_S_ADDI); + installpattern(PC_STFD, L_S_ADDI); + installpattern(PC_STB, L_S); + installpattern(PC_STH, L_S); + installpattern(PC_STW, L_S); + installpattern(PC_STFS, L_S); + installpattern(PC_STFD, L_S); + installpattern(PC_STBX, L_S); + installpattern(PC_STHX, L_S); + installpattern(PC_STWX, L_S); + installpattern(PC_STFSX, L_S); + installpattern(PC_STFDX, L_S); + installpattern(PC_BT, LBZ_EXTSB_CMPI_BC); + installpattern(PC_BF, LBZ_EXTSB_CMPI_BC); + installpattern(PC_BT, RLWINM_CMPLI_BC); + installpattern(PC_BF, RLWINM_CMPLI_BC); + installpattern(PC_BT, LI_CMP_BC); + installpattern(PC_BF, LI_CMP_BC); + installpattern(PC_RLWINM, RLWINM_RLWINM); + installpattern(PC_MULLI, MULLI_MULLI); + installpattern(PC_ADDI, ADDI_ADDI); + installpattern(PC_SRAWI, SRAWI_SRAWI); + installpattern(PC_MR, MR_ADDI); + installpattern(PC_OR, SRW_SUBFIC_RLW_OR); +} + +void peepholeoptimizeforward(Object *func) { + PCodeBlock *block; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (block->pcodeCount >= 2) + adjustforward(block); + } +} + +void peepholemergeblocks(Object *func, Boolean flag) { + PCodeBlock *block; + PCodeBlock *next; + Boolean flag2; + PCode *instr; + PCode *nextinstr; + PCLink *link; + PCLink *link2; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + flag2 = 0; + next = block->nextBlock; + + if (!flag) { + flag2 = next && (next->flags & fIsEpilogue); + if (block->flags & fIsProlog) + continue; + } + + if (block->pcodeCount > 0) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->flags & (fIsCall | fSideEffects)) + break; + } + + if (instr) + continue; + + instr = block->lastPCode; + if (instr && instr->op == PC_B) { + if (instr->args[0].kind == PCOp_LABEL && instr->args[0].data.label.label->block == next) + deletepcode(instr); + else + continue; + } + + instr = block->lastPCode; + if (instr && (instr->flags & fIsBranch) && instr->op != PC_B) + continue; + + while ( + block->successors->block == next && + block->successors->nextLink == NULL && + next->predecessors->block == block && + next->predecessors->nextLink == NULL && + !flag2 && + !(next->flags & fPCBlockFlag8000) && + (block->pcodeCount + next->pcodeCount) <= 100 + ) + { + if (next->pcodeCount > 0) { + for (instr = next->firstPCode; instr; instr = nextinstr) { + nextinstr = instr->nextPCode; + if (instr->flags & (fIsCall | fSideEffects)) + break; + + deletepcode(instr); + if (instr->op != PC_B) { + appendpcode(block, instr); + } else if (instr->args[0].kind == PCOp_LABEL) { + if (instr->args[0].data.label.label->block != next->nextBlock) + appendpcode(block, instr); + } else { + appendpcode(block, instr); + } + } + } + + if (next->pcodeCount != 0) + break; + if (next == epilogue) + break; + + block->successors = next->successors; + + for (link = block->successors; link; link = link->nextLink) { + for (link2 = link->block->predecessors; link2; link2 = link2->nextLink) { + if (link2->block == next) { + link2->block = block; + break; + } + } + } + + block->nextBlock = next->nextBlock; + if (block->nextBlock) + block->nextBlock->prevBlock = block; + + next->flags |= fDeleted; + next = block->nextBlock; + + if (!flag) + flag2 = next && (next->flags & fIsEpilogue); + } + } + } +} + +void peepholeoptimizepcode(Object *func) { + PCodeBlock *block; + + installpeepholepatterns(); + computeliveregisters(func); + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (block->pcodeCount >= 1) { + computeinstructionpredecessors(block); + peepholeoptimizeblock(block); + freeoheap(); + } + } +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/StackFrame.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/StackFrame.c new file mode 100644 index 0000000..6fa4524 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/StackFrame.c @@ -0,0 +1,1252 @@ +#include "compiler/StackFrame.h" +#include "compiler/CError.h" +#include "compiler/CFunc.h" +#include "compiler/CMachine.h" +#include "compiler/CParser.h" +#include "compiler/CodeGen.h" +#include "compiler/CompilerTools.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/RegisterInfo.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +#define ALIGN(thing, alignment) ( ~((alignment) - 1) & ((thing) + (alignment) - 1) ) +#define ALIGN_REMAINDER(thing, alignment) ( ALIGN(thing, alignment) - (thing) ) + +Boolean requires_frame; +Boolean makes_call; +Boolean uses_globals; +Boolean dynamic_stack; +Boolean large_stack; +static SInt32 out_param_alignment; +static SInt32 in_parameter_size; +static SInt32 in_param_alignment; +static SInt32 frame_alignment; +static SInt32 alloca_alignment; +static SInt32 linkage_area_size; +static SInt32 parameter_area_size; +static SInt32 parameter_area_size_estimate; +static SInt32 local_data_size; +static SInt32 local_data_limit; +static SInt32 large_data_near_size; +static SInt32 large_data_far_size; +static ObjectList *local_objects[ObjClassMax]; +static ObjectList *local_objects_tail[ObjClassMax]; +static SInt32 non_volatile_save_offset[RegClassMax]; +static SInt32 VRSAVE_save_offset; +static SInt32 LR_save_offset; +static SInt32 nonvolatile_save_size; +static UInt32 vrsave_mask; +static SInt32 frame_size; +static SInt32 frame_size_estimate; +static SInt32 genuine_frame_size; +static Object *dummylocal; +static Boolean dynamic_align_stack; +static Boolean has_varargs; +static Boolean compressing_data_area; +static PCode *setup_caller_sp; +static PCode *align_instr1; +static PCode *align_instr2; +static short vrsave_register; +static PCode *loadvrsave; +static PCode *storevrsave; +Object *dummyvaparam; +void *dummyprofiler; + +// forward declarations +static void insert_local_object(UInt8 oclass, Object *obj); +static void compress_data_area(void); +static UInt32 align_bits(UInt32 value, UInt8 bitcount); +static Boolean use_helper_function(char rclass); +static Boolean need_link_register(void); +static void call_helper_function(char *name, char rclass, short effect); +static void save_nonvolatile_FPRs(int reg, SInt32 offset); +static void save_nonvolatile_VRs(int reg, SInt32 offset); +static void restore_nonvolatile_FPRs(int reg, SInt32 offset); +static void restore_nonvolatile_VRs(int reg, SInt32 offset); +static void save_nonvolatile_GPRs(int reg, SInt32 offset); +static void restore_nonvolatile_GPRs(int reg, SInt32 offset); +static void do_allocate_dynamic_stack_space(Boolean isConstantSize, int reg1, int reg2, SInt32 size); + +void init_stack_globals(Object *funcobj) { + RegClass rclass; + ObjClass oclass; + + requires_frame = 0; + makes_call = 0; + uses_globals = 0; + dynamic_stack = 0; + large_stack = 0; + vrsave_register = -1; + dynamic_align_stack = 0; + compressing_data_area = 0; + align_instr1 = NULL; + align_instr2 = NULL; + setup_caller_sp = NULL; + dummyvaparam = NULL; + loadvrsave = NULL; + storevrsave = NULL; + local_data_size = 0; + local_data_limit = 0x2000; + large_data_near_size = 0; + large_data_far_size = 0; + frame_size_estimate = 0; + in_parameter_size = 0; + parameter_area_size = 0; + parameter_area_size_estimate = 0; + frame_alignment = 8; + out_param_alignment = 8; + in_param_alignment = 8; + alloca_alignment = 0; + has_varargs = 0; + linkage_area_size = 0; + frame_size = 0; + genuine_frame_size = 0; + nonvolatile_save_size = -1; + VRSAVE_save_offset = -1; + LR_save_offset = -1; + + for (rclass = 0; rclass < RegClassMax; rclass++) + non_volatile_save_offset[rclass] = -1; + + dummyprofiler = NULL; + dummyvaparam = NULL; + dummylocal = NULL; + + for (oclass = 0; oclass < ObjClassMax; oclass++) { + local_objects[oclass] = NULL; + local_objects_tail[oclass] = NULL; + } +} + +void init_frame_sizes(Boolean has_varargs) { + ObjectList *scan; + Object *obj; + SInt32 r30; + SInt32 align; + SInt32 mask; + + r30 = in_parameter_size + parameter_area_size_estimate; + frame_size_estimate = r30 + 2484; + for (scan = locals; scan; scan = scan->next) { + obj = scan->object; + { + align = CMach_AllocationAlignment(obj->type, obj->qual) - 1; + mask = ~align; + } + frame_size_estimate = (frame_size_estimate + align) & mask; + frame_size_estimate += obj->type->size; + } + + if (frame_size_estimate > 0x8000) { + dynamic_stack = 1; + large_stack = 1; + requires_frame = 1; + } + + local_data_limit = 0x8000 - (r30 + 2484); + + if (dynamic_stack) { + requires_frame = 1; + dummylocal = galloc(sizeof(Object)); + memclrw(dummylocal, sizeof(Object)); + dummylocal->type = (Type *) &stvoid; + dummylocal->otype = OT_OBJECT; + dummylocal->name = GetHashNameNode("<dummy>"); + dummylocal->datatype = DLOCAL; + dummylocal->u.var.info = CodeGen_GetNewVarInfo(); + dummylocal->u.var.info->flags |= VarInfoFlag80; + dummylocal->u.var.info->noregister = 1; + } + + if (dynamic_stack) { + retain_register(NULL, RegClass_GPR, 31); + _FP_ = 31; + } else { + _FP_ = 1; + } +} + +void assign_local_memory(Object *obj) { + // some misassigned registers x.x + short align; + VarInfo *vi; + + align = CMach_AllocationAlignment(obj->type, obj->qual); + if (!compressing_data_area && (obj->u.var.info->flags & VarInfoFlag80)) + return; + + update_frame_align(align); + if (local_data_size + (ALIGN_REMAINDER(local_data_size, align) + ALIGN(obj->type->size, align)) < local_data_limit) { + local_data_size = ALIGN(local_data_size, align); + vi = Registers_GetVarInfo(obj); + vi->flags &= ~VarInfoFlag2; + vi->flags |= VarInfoFlag80; + obj->u.var.uid = local_data_size; + local_data_size += ALIGN(obj->type->size, align); + insert_local_object(ObjClass0, obj); + return; + } + if (compressing_data_area || obj->type->size <= 32) { + large_data_near_size = ALIGN(large_data_near_size, align); + vi = Registers_GetVarInfo(obj); + vi->flags &= ~VarInfoFlag2; + vi->flags |= VarInfoFlag80; + obj->u.var.uid = 0x8000 + large_data_near_size; + large_data_near_size += ALIGN(obj->type->size, align); + insert_local_object(ObjClass1, obj); + } else { + large_data_far_size = ALIGN(large_data_far_size, align); + vi = Registers_GetVarInfo(obj); + vi->flags &= ~VarInfoFlag2; + vi->flags |= VarInfoFlag80; + obj->u.var.uid = 0x10000 + large_data_far_size; + large_data_far_size += ALIGN(obj->type->size, align); + insert_local_object(ObjClass2, obj); + } +} + +void assign_locals_to_memory(ObjectList *first) { + ObjectList *list; + Object *obj; + SInt32 i; + + for (i = 1; i < 1024; i <<= 1) { + for (list = first; list; list = list->next) { + obj = list->object; + if (Registers_GetVarInfo(obj)->used) { + if ((Registers_GetVarInfo(obj) ? Registers_GetVarInfo(obj)->reg : 0) == 0) { + if (obj->type->size <= i) + assign_local_memory(obj); + } + } + } + } + + for (list = first; list; list = list->next) { + obj = list->object; + if (Registers_GetVarInfo(obj)->used) { + if ((Registers_GetVarInfo(obj) ? Registers_GetVarInfo(obj)->reg : 0) == 0) { + assign_local_memory(list->object); + } + } + + if (obj->type && IS_TYPE_ARRAY(obj->type) && IS_TYPE_VECTOR(TYPE_POINTER(obj->type)->target)) + has_altivec_arrays = 1; + } +} + +void compute_frame_sizes(void) { + SInt32 altivec_size; + SInt32 altivec_offset; + + CError_ASSERT(897, alloca_alignment == 0 || alloca_alignment == frame_alignment); + + update_asm_nonvolatile_registers(); + LR_save_offset = 8; + non_volatile_save_offset[RegClass_FPR] = -(used_nonvolatile_registers[RegClass_FPR] * 8); + non_volatile_save_offset[RegClass_GPR] = -(((15 - non_volatile_save_offset[RegClass_FPR]) & ~15) + used_nonvolatile_registers[RegClass_GPR] * 4); + nonvolatile_save_size = -non_volatile_save_offset[RegClass_GPR]; + non_volatile_save_offset[RegClass_CRFIELD] = 4; + VRSAVE_save_offset = -1; + non_volatile_save_offset[RegClass_VR] = -1; + + if (copts.altivec_model) { + if (vrsave_mask) { + VRSAVE_save_offset = non_volatile_save_offset[RegClass_GPR] - 4; + nonvolatile_save_size = nonvolatile_save_size + 4; + } + altivec_size = used_nonvolatile_registers[RegClass_VR] * 16; + if (altivec_size > 0) + nonvolatile_save_size = ALIGN(nonvolatile_save_size + altivec_size, frame_alignment); + } + + if (parameter_area_size) + requires_frame = 1; + + compress_data_area(); + local_data_size = ALIGN(local_data_size, frame_alignment); + nonvolatile_save_size = ALIGN(nonvolatile_save_size, frame_alignment); + if (!requires_frame && (local_data_size + nonvolatile_save_size) <= 224) { + CError_ASSERT(1005, !dynamic_align_stack); + linkage_area_size = 0; + frame_size = 0; + genuine_frame_size = local_data_size + nonvolatile_save_size; + } else { + requires_frame = 1; + if (parameter_area_size < 32) + parameter_area_size = 32; + parameter_area_size = ALIGN(parameter_area_size + 24, frame_alignment) - 24; + if (large_stack) { + CError_ASSERT(1019, !large_data_far_size); + large_data_near_size += parameter_area_size; + parameter_area_size = 0; + } + linkage_area_size = 24; + frame_size = nonvolatile_save_size + (altivec_offset = parameter_area_size + 24 + local_data_size); + if (copts.altivec_model && used_nonvolatile_registers[RegClass_VR]) + non_volatile_save_offset[RegClass_VR] = altivec_offset; + frame_size += ALIGN_REMAINDER(frame_size, 16); + frame_size = ALIGN(frame_size, frame_alignment); + genuine_frame_size = frame_size; + } + if (!large_stack && frame_size > 0x7FFF) + CError_ErrorTerm(CErrorStr210); +} + +static void allocate_new_frame(int reg1, int reg2) { + if (dynamic_align_stack) { + CError_ASSERT(1116, reg1 != _CALLER_SP_); + emitpcode(PC_RLWINM, reg1, 1, 0, align_bits(frame_alignment, 1), 31); + if (frame_size > 0x7FFF) { + CError_FATAL(1122); + return; + } + + if (frame_size) + emitpcode(PC_SUBFIC, reg1, reg1, -frame_size); + else + emitpcode(PC_SUBFIC, reg1, reg1, -genuine_frame_size); + + if (reg2) + emitpcode(PC_MR, reg2, 1); + + emitpcode(PC_STWUX, 1, 1, reg1); + } else { + if (frame_size > 0x7FFF) + CError_FATAL(1153); + else + emitpcode(PC_STWU, 1, 1, 0, -frame_size); + + if (reg2) + emitpcode(PC_MR, reg2, 1); + } +} + +void generate_prologue(PCodeBlock *block, Boolean has_varargs) { + PCodeBlock *save_block; + Boolean needs_lr; + Statement *save_statement; + Statement stmt; + UInt32 vrsave_low; + UInt32 vrsave_high; + + save_block = pclastblock; + needs_lr = need_link_register(); + save_statement = current_statement; + stmt.sourceoffset = functionbodyoffset; + current_statement = &stmt; + pclastblock = block; + + if (setup_caller_sp && setup_caller_sp->block) { + if ( + setup_caller_sp->op == PC_MR && + setup_caller_sp->args[1].kind == PCOp_REGISTER && + setup_caller_sp->args[1].arg == RegClass_GPR && + setup_caller_sp->args[1].data.reg.reg == _FP_ + ) + CError_FATAL(1197); + + _CALLER_SP_ = setup_caller_sp->args[0].data.reg.reg; + deletepcode(setup_caller_sp); + setup_caller_sp = NULL; + } else if (_CALLER_SP_ != _FP_) { + _CALLER_SP_ = -1; + } + + if (align_instr1 && align_instr1->block) { + deletepcode(align_instr1); + align_instr1 = NULL; + } + + if (align_instr2 && align_instr2->block) { + deletepcode(align_instr2); + align_instr2 = NULL; + } + + if (loadvrsave && loadvrsave->block) { + deletepcode(loadvrsave); + loadvrsave = NULL; + } + + if (storevrsave && storevrsave->block) { + deletepcode(storevrsave); + storevrsave = NULL; + } + + if (needs_lr) + emitpcode(PC_MFLR, 0); + + if (used_nonvolatile_registers[RegClass_CRFIELD]) { + emitpcode(PC_MFCR, 12); + emitpcode(PC_STW, 12, 1, 0, non_volatile_save_offset[RegClass_CRFIELD]); + } + + if (used_nonvolatile_registers[RegClass_FPR]) + save_nonvolatile_FPRs(1, 0); + if (used_nonvolatile_registers[RegClass_GPR]) + save_nonvolatile_GPRs(1, 0); + if (needs_lr) + emitpcode(PC_STW, 0, 1, 0, 8); + + if (frame_size) { + if (vrsave_mask) { + emitpcode(PC_MFSPR, 0, 256); + emitpcode(PC_STW, 0, 1, 0, VRSAVE_save_offset); + vrsave_register = 0; + } + allocate_new_frame(12, (_CALLER_SP_ > 0 && _CALLER_SP_ != _FP_) ? _CALLER_SP_ : 0); + } else { + CError_ASSERT(1326, !dynamic_align_stack); + if (vrsave_mask) + emitpcode(PC_MFSPR, vrsave_register, 256); + } + + if (vrsave_mask) { + vrsave_high = vrsave_mask >> 16; + vrsave_low = vrsave_mask & 0xFFFF; + if (vrsave_mask == 0xFFFFFFFF) { + emitpcode(PC_LI, 0, -1); + } else { + if (vrsave_high) + emitpcode(PC_ORIS, 0, vrsave_register, vrsave_high); + if (vrsave_low) + emitpcode(PC_ORI, 0, 0, vrsave_low); + } + emitpcode(PC_MTSPR, 256, 0); + } + + if (used_nonvolatile_registers[RegClass_VR]) + save_nonvolatile_VRs(1, 0); + + if (dynamic_stack) + emitpcode(PC_MR, 31, 1); + + if (large_stack) + do_allocate_dynamic_stack_space(1, 11, 0, large_data_near_size); + + block->flags |= fIsProlog; + pclastblock = save_block; + current_statement = save_statement; +} + +void generate_epilogue(PCodeBlock *block, Boolean add_blr) { + PCodeBlock *save_block; + Boolean needs_lr; + Statement *save_statement; + Statement stmt; + + save_block = pclastblock; + needs_lr = need_link_register(); + save_statement = current_statement; + if (!save_statement) { + stmt.sourceoffset = current_linenumber; + current_statement = &stmt; + } + pclastblock = block; + + if (used_nonvolatile_registers[RegClass_VR]) + restore_nonvolatile_VRs(_FP_, 0); + + if (dynamic_align_stack) { + load_store_register(PC_LWZ, 1, 1, NULL, 0); + setpcodeflags(fSideEffects); + if (needs_lr) + load_store_register(PC_LWZ, 0, 1, 0, 8); + } else { + if (needs_lr) + load_store_register(PC_LWZ, 0, _FP_, 0, frame_size + 8); + if (frame_size > 0) { + if (dynamic_stack) { + load_store_register(PC_LWZ, 1, 1, 0, 0); + setpcodeflags(fSideEffects); + } else { + emitpcode(PC_ADDI, 1, 1, 0, frame_size); + setpcodeflags(fSideEffects); + } + } + } + + if (used_nonvolatile_registers[RegClass_CRFIELD]) { + load_store_register(PC_LWZ, 12, 1, NULL, non_volatile_save_offset[RegClass_CRFIELD]); + emitpcode(PC_MTCRF, 255, 12); + } + + if (vrsave_mask) { + if (!requires_frame) { + emitpcode(PC_MTSPR, 256, vrsave_register); + } else { + emitpcode(PC_LWZ, 11, 1, 0, VRSAVE_save_offset); + emitpcode(PC_MTSPR, 256, 11); + } + } + + if (used_nonvolatile_registers[RegClass_FPR]) + restore_nonvolatile_FPRs(1, 0); + if (needs_lr && !use_helper_function(RegClass_GPR)) + emitpcode(PC_MTLR, 0); + + if (used_nonvolatile_registers[RegClass_GPR]) + restore_nonvolatile_GPRs(1, 0); + if (needs_lr && use_helper_function(RegClass_GPR)) + emitpcode(PC_MTLR, 0); + + if (add_blr) { + emitpcode(PC_BLR); + setpcodeflags(fIsVolatile); + } + + block->flags |= fIsEpilogue; + pclastblock = save_block; + current_statement = save_statement; +} + +static void load_base_offset(int dest_reg, int base_reg, SInt32 offset) { + if (offset) + emitpcode(PC_ADDI, dest_reg, base_reg, 0, offset); + else + emitpcode(PC_MR, dest_reg, base_reg); +} + +static void save_nonvolatile_FPRs(int reg, SInt32 offset) { + short i; + SInt32 o; + + o = offset + non_volatile_save_offset[RegClass_FPR]; + + if (!use_helper_function(RegClass_FPR)) { + for (i = 1; i <= used_nonvolatile_registers[RegClass_FPR]; i++) { + emitpcode(PC_STFD, 32 - i, reg, NULL, o + (used_nonvolatile_registers[RegClass_FPR] - i) * 8); + setpcodeflags(fIsVolatile); + } + } else { + load_base_offset(11, reg, o + used_nonvolatile_registers[RegClass_FPR] * 8); + call_helper_function("__save_fpr_%d", RegClass_FPR, EffectRead); + } +} + +static void save_nonvolatile_VRs(int reg, SInt32 offset) { + short i; + SInt32 o; + + o = offset + non_volatile_save_offset[RegClass_VR]; + + if (!use_helper_function(RegClass_VR)) { + for (i = 1; i <= used_nonvolatile_registers[RegClass_VR]; i++) { + emitpcode(PC_LI, 0, o + (used_nonvolatile_registers[RegClass_VR] - i) * 16); + emitpcode(PC_STVX, 32 - i, reg, 0); + setpcodeflags(fIsVolatile); + } + } else { + load_base_offset(0, reg, o + used_nonvolatile_registers[RegClass_VR] * 16); + call_helper_function("__savev%d", RegClass_VR, EffectRead); + } +} + +static void restore_nonvolatile_FPRs(int reg, SInt32 offset) { + short i; + SInt32 o; + + o = offset + non_volatile_save_offset[RegClass_FPR]; + + if (!use_helper_function(RegClass_FPR)) { + for (i = 1; i <= used_nonvolatile_registers[RegClass_FPR]; i++) { + load_store_register(PC_LFD, 32 - i, reg, NULL, o + (used_nonvolatile_registers[RegClass_FPR] - i) * 8); + setpcodeflags(fIsVolatile); + } + } else { + load_base_offset(11, reg, o + used_nonvolatile_registers[RegClass_FPR] * 8); + call_helper_function("__restore_fpr_%d", RegClass_FPR, EffectWrite); + } +} + +static void restore_nonvolatile_VRs(int reg, SInt32 offset) { + short i; + SInt32 o; + + o = offset + non_volatile_save_offset[RegClass_VR]; + + if (!use_helper_function(RegClass_VR)) { + for (i = 1; i <= used_nonvolatile_registers[RegClass_VR]; i++) { + emitpcode(PC_LI, 0, o + (used_nonvolatile_registers[RegClass_VR] - i) * 16); + setpcodeflags(fIsVolatile); + emitpcode(PC_LVX, 32 - i, reg, 0); + setpcodeflags(fIsVolatile); + } + } else { + load_base_offset(0, reg, o + used_nonvolatile_registers[RegClass_VR] * 16); + call_helper_function("__restv%d", RegClass_VR, EffectWrite); + } +} + +static void save_nonvolatile_GPRs(int reg, SInt32 offset) { + int i; + SInt32 o; + + o = offset + non_volatile_save_offset[RegClass_GPR]; + + if (!use_helper_function(RegClass_GPR)) { + if (copts.use_lmw_stmw && ((used_nonvolatile_registers[RegClass_GPR] > 4) || (copts.optimizesize && (used_nonvolatile_registers[RegClass_GPR] > 1)))) { + emitpcode(PC_STMW, used_nonvolatile_registers[RegClass_GPR] - 1, 32 - used_nonvolatile_registers[RegClass_GPR], reg, 0, o); + } else { + for (i = 1; i <= used_nonvolatile_registers[RegClass_GPR]; i++) { + emitpcode(PC_STW, 32 - i, reg, 0, o + (used_nonvolatile_registers[RegClass_GPR] - i) * 4); + } + } + } else { + load_base_offset(11, reg, o + used_nonvolatile_registers[RegClass_GPR] * 4); + call_helper_function("__savegpr_%d", RegClass_GPR, EffectRead); + } +} + +static void restore_nonvolatile_GPRs(int reg, SInt32 offset) { + int i; + SInt32 o; + + o = offset + non_volatile_save_offset[RegClass_GPR]; + + if (!use_helper_function(RegClass_GPR)) { + if (copts.use_lmw_stmw && ((used_nonvolatile_registers[RegClass_GPR] > 4) || (copts.optimizesize && (used_nonvolatile_registers[RegClass_GPR] > 1)))) { + emitpcode(PC_LMW, used_nonvolatile_registers[RegClass_GPR] - 1, 32 - used_nonvolatile_registers[RegClass_GPR], reg, 0, o); + setpcodeflags(fIsVolatile); + } else { + for (i = 1; i <= used_nonvolatile_registers[RegClass_GPR]; i++) { + emitpcode(PC_LWZ, 32 - i, reg, 0, o + (used_nonvolatile_registers[RegClass_GPR] - i) * 4); + setpcodeflags(fIsVolatile); + } + } + } else { + load_base_offset(11, reg, o + used_nonvolatile_registers[RegClass_GPR] * 4); + call_helper_function("__restgpr_%d", RegClass_GPR, EffectWrite); + } +} + +static void do_allocate_dynamic_stack_space(Boolean isConstantSize, int reg1, int reg2, SInt32 size) { + load_store_register(PC_LWZ, reg2, 1, NULL, 0); + if (isConstantSize) { + size = ALIGN(size, frame_alignment); + if (size < 0x8000) { + emitpcode(PC_STWU, reg2, 1, 0, -size); + } else { + emitpcode(PC_LIS, reg1, 0, (short) HIGH_PART(-size)); + if (-size) + emitpcode(PC_ADDI, reg1, reg1, 0, LOW_PART(-size)); + emitpcode(PC_STWUX, reg2, 1, reg1); + setpcodeflags(fIsVolatile | fSideEffects); + } + } else { + emitpcode(PC_STWUX, reg2, 1, reg1); + setpcodeflags(fIsVolatile | fSideEffects); + } +} + +void allocate_dynamic_stack_space(Boolean isConstantSize, int reg1, int reg2, SInt32 size) { + if (copts.altivec_model) + update_frame_align(16); + do_allocate_dynamic_stack_space(isConstantSize, reg1, reg2, size); + add_immediate(reg1, 1, dummylocal, 0); +} + +#ifdef __MWERKS__ +#pragma options align=mac68k +#endif +typedef struct Traceback { + UInt8 x0; + UInt8 x1; + UInt8 x2; + UInt8 x3; + UInt8 x4; + UInt8 x5; + + UInt8 x6_0 : 2; + UInt8 x6_1 : 1; // set to 1 + UInt8 x6_2 : 5; + + UInt8 x7_0 : 1; + UInt8 x7_1 : 1; // set to 1 + UInt8 has_dynamic_stack : 1; // set to 1 if dynamic_stack + UInt8 x7_3 : 3; + UInt8 uses_CRs : 1; // set to 1 if CRs used + UInt8 needs_link_register : 1; // set to 1 if link register used + + UInt8 has_frame_size : 1; // set to 1 if frame_size is nonzero + UInt8 x8_1 : 1; // set to 0 + UInt8 used_FPRs : 6; // stores non-volatile FPRs used + + UInt8 x9_0 : 1; // set to 0 + UInt8 x9_1 : 1; // set to 1 if VRs or vrsave used + UInt8 used_GPRs : 6; // stores non-volatile GPRs used + + UInt8 xA; + UInt8 xB; + + SInt32 funcsize; + SInt16 namelen; + char name[0]; +} Traceback; + +typedef struct TracebackExtra { + UInt8 used_VRs : 6; + UInt8 has_vrsave_mask : 1; + UInt8 is_varargs : 1; + UInt8 vec_arg_count : 7; + UInt8 has_vrsave_mask_or_used_VRs : 1; +} TracebackExtra; +#ifdef __MWERKS__ +#pragma options align=reset +#endif + +char *generate_traceback(SInt32 funcsize, char *funcname, SInt32 *tbsize, Object *func) { + char *work; + short namelen; + Traceback *buf; + SInt32 bufsize; + + namelen = strlen(funcname); + bufsize = ALIGN(sizeof(Traceback) + namelen + (dynamic_stack ? 1 : 0) + ((used_nonvolatile_registers[RegClass_VR] || vrsave_mask) ? sizeof(TracebackExtra) : 0), 4); + buf = lalloc(bufsize); + memclrw(buf, bufsize); + + buf->x4 = 0; + buf->x5 = copts.cplusplus ? 9 : 0; + buf->x6_1 = 1; + buf->x7_1 = 1; + if (dynamic_stack) + buf->has_dynamic_stack = 1; + if (used_nonvolatile_registers[RegClass_CRFIELD]) + buf->uses_CRs = 1; + if (need_link_register()) + buf->needs_link_register = 1; + if (frame_size) + buf->has_frame_size = 1; + buf->used_FPRs = used_nonvolatile_registers[RegClass_FPR]; + buf->used_GPRs = used_nonvolatile_registers[RegClass_GPR]; + buf->x8_1 = 0; + buf->x9_0 = 0; + buf->x9_1 = (used_nonvolatile_registers[RegClass_VR] || vrsave_mask) != 0; + buf->funcsize = funcsize; + buf->namelen = namelen; + + work = buf->name; + strcpy(work, funcname); + work += namelen; + if (dynamic_stack) { + *(work++) = 31; + } + + if (vrsave_mask || used_nonvolatile_registers[RegClass_VR]) { + TracebackExtra *extra; + Boolean is_varargs; + int vec_count; + FuncArg *args, *scan; + Type *type; + + extra = (TracebackExtra *) work; + vec_count = 0; + args = TYPE_FUNC(func->type)->args; + scan = args; + while (scan && scan != &elipsis) + scan = scan->next; + is_varargs = scan == &elipsis; + while (args) { + if ((type = args->type) && IS_TYPE_VECTOR(type)) + vec_count++; + args = args->next; + } + extra->used_VRs = used_nonvolatile_registers[RegClass_VR]; + extra->has_vrsave_mask = vrsave_mask != 0; + extra->is_varargs = is_varargs; + extra->vec_arg_count = vec_count; + extra->has_vrsave_mask_or_used_VRs = vrsave_mask || used_nonvolatile_registers[RegClass_VR]; + } + + *tbsize = bufsize; + return (char *) buf; +} + +static SInt32 localsbase(void) { + SInt32 size = parameter_area_size; + if (frame_size || dynamic_align_stack) + size += linkage_area_size; + else + size -= genuine_frame_size; + return size; +} + +static SInt32 parametersbase(int flag) { + if (flag) + return 24; + + return frame_size ? (genuine_frame_size + 24) : 24; +} + +void check_dynamic_aligned_frame(void) { + PCode *pc; + + if (used_nonvolatile_registers[RegClass_VR]) { + update_frame_align(16); + requires_frame = 1; + } + + if (frame_alignment > in_param_alignment) { + dynamic_align_stack = 1; + requires_frame = 1; + CError_ASSERT(2091, !has_varargs || _CALLER_SP_ != -1); + CError_ASSERT(2096, _CALLER_SP_ != _FP_); + if (setup_caller_sp && setup_caller_sp->block) { + align_instr1 = makepcode(PC_RLWINM, 12, 1, 0, 5, 31); + insertpcodebefore(setup_caller_sp, align_instr1); + align_instr2 = makepcode(PC_STWUX, 1, 1, 12); + insertpcodeafter(setup_caller_sp, align_instr2); + } + } else { + dynamic_align_stack = 0; + if (setup_caller_sp && setup_caller_sp->block) { + pc = makepcode(PC_MR, _CALLER_SP_, _FP_); + insertpcodebefore(setup_caller_sp, pc); + deletepcode(setup_caller_sp); + setup_caller_sp = pc; + } + _CALLER_SP_ = _FP_; + } + + vrsave_mask = 0; + if (copts.altivec_model) { + vrsave_mask = colored_vrs_as_vrsave(pcbasicblocks); + if (!requires_frame && vrsave_mask) { + vrsave_register = 11; + loadvrsave = makepcode(PC_LWZ, 11, 1, 0, -4); + appendpcode(prologue, loadvrsave); + storevrsave = makepcode(PC_STW, 11, 1, 0, -4); + appendpcode(epilogue, storevrsave); + } + } +} + +void move_varargs_to_memory(void) { + short reg; + + has_varargs = 1; + dummyvaparam = galloc(sizeof(Object)); + memclrw(dummyvaparam, sizeof(Object)); + + dummyvaparam->type = TYPE(&stvoid); + dummyvaparam->otype = OT_OBJECT; + dummyvaparam->name = GetHashNameNode("<vaparam>"); + dummyvaparam->datatype = DLOCAL; + dummyvaparam->u.var.info = CodeGen_GetNewVarInfo(); + dummyvaparam->u.var.uid = 0; + dummyvaparam->u.var.info->noregister = 1; + Registers_GetVarInfo(dummyvaparam)->flags = (Registers_GetVarInfo(dummyvaparam)->flags & ~VarInfoFlag1) | VarInfoFlag1; + + for (reg = last_argument_register[RegClass_GPR] + 1; (int)reg <= 10; reg++) { + emitpcode(PC_STW, reg, local_base_register(dummyvaparam), dummyvaparam, (reg - 3) * 4); + setpcodeflags(fIsPtrOp | fIsArgInit); + } +} + +void assign_arguments_to_memory(Object *func, UInt8 mysteryFlag, Boolean hasVarargs) { + // almost matches except for the not/andc issue + SInt32 pos; + ObjectList *list; + Object *obj; + Type *type; + short reg; + SInt32 chk; + Boolean flag; + + pos = 0; + reg = 2; + + for (list = arguments; list; list = list->next) { + obj = list->object; + type = obj->type; + if (!IS_TYPE_VECTOR(type)) { + obj->datatype = DLOCAL; + obj->u.var.info = CodeGen_GetNewVarInfo(); + if (IS_TYPE_ARRAY(type) || IS_TYPE_NONVECTOR_STRUCT(type) || IS_TYPE_CLASS(type) || + IS_TYPE_12BYTES_MEMBERPOINTER(type)) { + chk = CMach_ArgumentAlignment(type); + if (chk > 4) { + pos = ALIGN(pos, chk); + update_in_param_align(chk); + } + } + obj->u.var.uid = pos; + Registers_GetVarInfo(obj)->flags = (Registers_GetVarInfo(obj)->flags & ~VarInfoFlag1) | VarInfoFlag1; + if (!copts.littleendian && (IS_TYPE_INT(obj->type) || IS_TYPE_ENUM(obj->type)) && obj->type->size < 4) + obj->u.var.uid += 4 - obj->type->size; + pos += type->size; + pos = ALIGN(pos, 4); + } else { + obj->u.var.info = CodeGen_GetNewVarInfo(); + obj->u.var.uid = 0; + obj->datatype = DLOCAL; + flag = 1; + if (reg <= 13) + flag = hasVarargs; + if (flag) { + pos = ALIGN(pos + 24, 16) - 24; + obj->u.var.uid = pos; + pos += 16; + update_in_param_align(16); + Registers_GetVarInfo(obj)->flags = (Registers_GetVarInfo(obj)->flags & ~VarInfoFlag1) | VarInfoFlag1; + } else { + assign_local_memory(obj); + Registers_GetVarInfo(obj)->flags = Registers_GetVarInfo(obj)->flags & ~VarInfoFlag1; + } + reg++; + } + } + + in_parameter_size = (in_parameter_size < pos) ? pos : in_parameter_size; + CError_ASSERT(2408, !dummyvaparam); +} + +SInt32 set_out_param_displ(SInt32 a, Type *type, Boolean flag, SInt32 *outvar, SInt32 b) { + // does not match due to errant andc + SInt32 argAlign; + + if (!flag && !b) { + *outvar = 0; + return a; + } + + if (IS_TYPE_VECTOR(type)) { + update_out_param_align(16); + a = ALIGN(a + 16 + 24, 16) - 24; + } else if (IS_TYPE_ARRAY(type) || IS_TYPE_NONVECTOR_STRUCT(type) || IS_TYPE_CLASS(type) || IS_TYPE_12BYTES_MEMBERPOINTER(type)) { + argAlign = CMach_ArgumentAlignment(type); + if (argAlign > 4) { + a = ALIGN(a + 24, argAlign) - 24; + update_in_param_align(argAlign); + } + } + + *outvar = a; + a = ALIGN(a + b, 4); + return a; +} + +SInt32 out_param_displ_to_offset(SInt32 displ) { + return displ + 24; +} + +Boolean needs_frame(void) { + return (frame_size > 224) || requires_frame; +} + +void update_out_param_size(SInt32 size) { + if (size < 32) + size = 32; + if (parameter_area_size < size) + parameter_area_size = size; +} + +void estimate_out_param_size(SInt32 size) { + if (parameter_area_size_estimate < size) + parameter_area_size_estimate = size; +} + +void update_out_param_align(SInt32 align) { + if (out_param_alignment < align) + out_param_alignment = align; + update_frame_align(align); +} + +void update_in_param_align(SInt32 align) { + if (in_param_alignment < align) + in_param_alignment = align; +} + +void update_frame_align(SInt32 align) { + if (frame_alignment < align) + frame_alignment = align; +} + +SInt32 local_offset_32(Object *obj) { + short align; + SInt32 offset; + + if (obj->u.var.info->flags & VarInfoFlag1) + align = CMach_ArgumentAlignment(obj->type); + else + align = CMach_AllocationAlignment(obj->type, obj->qual); + + offset = obj->u.var.uid; + if (offset > 0x7FFF) + offset = 0x8000 - offset - ALIGN(obj->type->size, align); + + if (obj->u.var.info->flags & VarInfoFlag1) + return offset + parametersbase(local_base_register(obj) != _FP_); + else + return offset + localsbase(); +} + +SInt32 local_offset_lo(Object *obj, SInt32 offset) { + SInt32 combo = offset + local_offset_32(obj); + return LOW_PART(combo); + //return (SInt16) (offset + local_offset_32(obj)); +} + +SInt32 local_offset_ha(Object *obj, SInt32 offset) { + SInt32 combo = offset + local_offset_32(obj); + return HIGH_PART(combo); + //return (SInt16) ((combo >> 16) + ((combo & 0x8000) >> 15)); +} + +SInt32 local_offset_16(Object *obj) { + SInt32 offset32 = local_offset_32(obj); + SInt16 offset16 = (SInt16) offset32; + CError_ASSERT(2662, offset32 == offset16); + return offset16; +} + +Boolean local_is_16bit_offset(Object *obj) { + SInt32 offset32 = local_offset_32(obj); + SInt16 offset16 = (SInt16) offset32; + return offset32 == offset16; +} + +int local_base_register(Object *obj) { + PCode *pc; + + if (obj->u.var.info->flags & VarInfoFlag1) { + if (coloring && _CALLER_SP_ == -1) { + _CALLER_SP_ = used_virtual_registers[RegClass_GPR]++; + pc = makepcode(PC_LWZ, _CALLER_SP_, 1, 0, 0); + setup_caller_sp = pc; + appendpcode(prologue, pc); + } + return _CALLER_SP_; + } else { + return _FP_; + } +} + +static UInt32 align_bits(UInt32 value, UInt8 bitcount) { + UInt32 base = bitcount != 0; + switch (value) { + case 0x0002: return base + 30; + case 0x0004: return base + 29; + case 0x0008: return base + 28; + case 0x0010: return base + 27; + case 0x0020: return base + 26; + case 0x0040: return base + 25; + case 0x0080: return base + 24; + case 0x0100: return base + 23; + case 0x0200: return base + 22; + case 0x0400: return base + 21; + case 0x0800: return base + 20; + case 0x1000: return base + 19; + case 0x2000: return base + 18; + default: + CError_FATAL(2754); + return base + 27; + } +} + +Boolean is_large_frame(void) { + CError_ASSERT(2769, frame_size != -1); + return large_stack; +} + +void no_frame_for_asm(void) { + frame_size = 0; +} + +Boolean can_add_displ_to_local(Object *obj, SInt32 displ) { + if (obj->datatype != DLOCAL) + return 0; + + if (local_offset_32(obj) == (short) local_offset_32(obj)) + if ((displ + local_offset_32(obj)) == (short) (displ + local_offset_32(obj))) + return 1; + + return 0; +} + +SInt32 get_alloca_alignment(void) { + SInt32 align = frame_alignment; + if (copts.altivec_model) + align = ALIGN(align, 16); + + if (!alloca_alignment) + alloca_alignment = align; + else + CError_ASSERT(2825, alloca_alignment == align); + + return align_bits(align, 0); +} + +static Boolean use_helper_function(char rclass) { + if (copts.no_register_save_helpers) + return 0; + + switch (rclass) { + case RegClass_GPR: + if (copts.use_lmw_stmw) + return 0; + return (used_nonvolatile_registers[RegClass_GPR] > 4) || (copts.optimizesize && used_nonvolatile_registers[RegClass_GPR] > 2); + case RegClass_FPR: + return (used_nonvolatile_registers[RegClass_FPR] > 3) || (copts.optimizesize && used_nonvolatile_registers[RegClass_FPR] > 2); + case RegClass_VR: + return (used_nonvolatile_registers[RegClass_VR] > 3) || (copts.optimizesize && used_nonvolatile_registers[RegClass_VR] > 2); + default: + CError_FATAL(2862); + return 0; + } +} + +static Boolean need_link_register(void) { + if (copts.codegen_pic && uses_globals) + return 1; + + if (makes_call) + return 1; + + return use_helper_function(RegClass_FPR) || use_helper_function(RegClass_GPR) || use_helper_function(RegClass_VR); +} + +static void call_helper_function(char *name, char rclass, short effect) { + char str[32]; + Object *func; + NameSpace *save_scope; + PCode *pc; + int extra_args; + PCodeArg *arg; + short i; + + extra_args = 1; + if (rclass == RegClass_VR) + extra_args = 2; + + sprintf(str, name, 32 - used_nonvolatile_registers[rclass]); + + save_scope = cscope_current; + cscope_current = cscope_root; + func = CParser_NewRTFunc(&stvoid, NULL, 2, 0); + cscope_current = save_scope; + + func->name = GetHashNameNodeExport(str); + + pc = makepcode(PC_BL, extra_args + used_nonvolatile_registers[rclass], func, 0); + for (i = 1, arg = &pc->args[1]; i <= used_nonvolatile_registers[rclass]; i++, arg++) { + arg->kind = PCOp_REGISTER; + arg->arg = rclass; + arg->data.reg.reg = n_real_registers[rclass] - i; + arg->data.reg.effect = effect; + } + + if (rclass == RegClass_VR) { + arg[1].kind = PCOp_REGISTER; + arg[1].arg = RegClass_GPR; + arg[1].data.reg.reg = 12; + arg[1].data.reg.effect = EffectWrite; + arg[2].kind = PCOp_REGISTER; + arg[2].arg = RegClass_GPR; + arg[2].data.reg.reg = 0; + arg[2].data.reg.effect = EffectRead; + } else { + arg[1].kind = PCOp_REGISTER; + arg[1].arg = RegClass_GPR; + arg[1].data.reg.reg = 11; + arg[1].data.reg.effect = EffectRead; + } + + appendpcode(pclastblock, pc); + setpcodeflags(fSideEffects); +} + +static SInt32 nearest_power_of_two(SInt32 n) { + SInt32 power = 1; + do { + power <<= 1; + } while (power && power < n); + + CError_ASSERT(2933, power != 0); + return power; +} + +static void compress_data_area(void) { + // doesn't quite match + SInt32 r0; + SInt32 r7; + ObjectList *list; + Object *obj; + PCodeBlock *block; + PCode *pc; + int i; + + compressing_data_area = 1; + + if (large_stack) { + r0 = 0; + } else { + r0 = parameter_area_size; + if (r0 < 32) + r0 = 32; + } + r7 = ALIGN(r0 + 24, frame_alignment) - 24; + local_data_limit = 0x8000 - ALIGN(24 + in_parameter_size + nonvolatile_save_size + r7, frame_alignment); + + if (local_objects_tail[ObjClass0]) { + if (local_objects[ObjClass1]) { + local_objects_tail[ObjClass0]->next = local_objects[ObjClass1]; + local_objects_tail[ObjClass0] = local_objects_tail[ObjClass1]; + } + if (local_objects[ObjClass2]) { + local_objects_tail[ObjClass0]->next = local_objects[ObjClass2]; + local_objects_tail[ObjClass0] = local_objects_tail[ObjClass2]; + } + } else if (local_objects_tail[ObjClass1]) { + local_objects[ObjClass0] = local_objects[ObjClass1]; + local_objects_tail[ObjClass0] = local_objects_tail[ObjClass1]; + if (local_objects[ObjClass2]) { + local_objects_tail[ObjClass0]->next = local_objects[ObjClass2]; + local_objects_tail[ObjClass0] = local_objects_tail[ObjClass2]; + } + } else { + local_objects[ObjClass0] = local_objects[ObjClass2]; + local_objects_tail[ObjClass0] = local_objects_tail[ObjClass2]; + } + + for (list = local_objects[ObjClass0]; list; list = list->next) + Registers_GetVarInfo(list->object)->used = 0; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (pc = block->firstPCode; pc; pc = pc->nextPCode) { + for (i = 0; i < pc->argCount; i++) { + if (pc->args[i].kind == PCOp_MEMORY && pc->args[i].data.mem.obj && pc->args[i].data.mem.obj->datatype == DLOCAL) + Registers_GetVarInfo(pc->args[i].data.mem.obj)->used = 1; + } + } + } + + local_data_size = 0; + large_data_near_size = 0; + large_data_far_size = 0; + + for (list = local_objects[ObjClass0]; list; list = list->next) { + obj = list->object; + if (Registers_GetVarInfo(obj)->used) + assign_local_memory(obj); + } +} + +static void insert_local_object(UInt8 oclass, Object *obj) { + ObjectList *list; + + if (!compressing_data_area) { + list = lalloc(sizeof(ObjectList)); + memclrw(list, sizeof(ObjectList)); + list->object = obj; + if (!local_objects[oclass]) + local_objects[oclass] = list; + if (local_objects_tail[oclass]) + local_objects_tail[oclass]->next = list; + local_objects_tail[oclass] = list; + } +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/StructMoves.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/StructMoves.c new file mode 100644 index 0000000..7c28b88 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/StructMoves.c @@ -0,0 +1,792 @@ +#include "compiler/StructMoves.h" +#include "compiler/CError.h" +#include "compiler/CParser.h" +#include "compiler/CodeGen.h" +#include "compiler/Operands.h" +#include "compiler/PCode.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/Registers.h" + +void make_addressable(Operand *opnd, SInt32 offset, int unusedArg) { + int reg; + + if (opnd->optype == OpndType_IndirectSymbol) + coerce_to_addressable(opnd); + + if (opnd->optype != OpndType_IndirectGPR_ImmOffset || (opnd->immOffset + offset) > 0x7FFF) { + reg = used_virtual_registers[RegClass_GPR]++; + load_address(reg, opnd); + opnd->optype = OpndType_IndirectGPR_ImmOffset; + opnd->reg = reg; + opnd->object = NULL; + opnd->immOffset = 0; + } +} + +static void load_displaced_address(Operand *opnd, SInt32 offset) { + int reg; + + reg = used_virtual_registers[RegClass_GPR]++; + if (opnd->optype == OpndType_IndirectSymbol) + coerce_to_addressable(opnd); + + if (opnd->optype == OpndType_IndirectGPR_ImmOffset) { + offset += opnd->immOffset; + if (!FITS_IN_SHORT(offset)) { + add_immediate(reg, opnd->reg, opnd->object, opnd->immOffset); + emitpcode(PC_ADDI, reg, reg, 0, offset - opnd->immOffset); + } else { + add_immediate(reg, opnd->reg, opnd->object, offset); + } + } else if (opnd->optype == OpndType_IndirectGPR_Indexed) { + emitpcode(PC_ADD, reg, opnd->reg, opnd->regOffset); + emitpcode(PC_ADDI, reg, reg, 0, offset); + } else { + CError_FATAL(80); + } + + opnd->optype = OpndType_IndirectGPR_ImmOffset; + opnd->reg = reg; + opnd->object = NULL; + opnd->immOffset = 0; +} + +static void move_block_via_load_store(Operand *dst, Operand *src, SInt32 len, SInt32 align) { + SInt32 step; + SInt32 pos; + int floatReg; + int reg; + + if (src->optype == OpndType_IndirectSymbol) + coerce_to_addressable(src); + if (dst->optype == OpndType_IndirectSymbol) + coerce_to_addressable(dst); + + if (len == 8) { + floatReg = used_virtual_registers[RegClass_FPR]++; + if (src->optype == OpndType_IndirectGPR_ImmOffset) { + load_store_register(PC_LFD, floatReg, src->reg, src->object, src->immOffset); + setpcodeflags(src->flags); + } else if (src->optype == OpndType_IndirectGPR_Indexed) { + emitpcode(PC_LFDX, floatReg, src->reg, src->regOffset); + setpcodeflags(src->flags); + } else { + CError_FATAL(145); + } + + if (dst->optype == OpndType_IndirectGPR_ImmOffset) { + load_store_register(PC_STFD, floatReg, dst->reg, dst->object, dst->immOffset); + setpcodeflags(dst->flags); + } else if (dst->optype == OpndType_IndirectGPR_Indexed) { + emitpcode(PC_STFDX, floatReg, dst->reg, dst->regOffset); + setpcodeflags(dst->flags); + } else { + CError_FATAL(157); + } + + return; + } + + if (copts.misaligned_mem_access == 0 && (UInt32) align < 4) { + SInt32 tmp = (align == 0) ? 1 : (align > len) ? len : align; + step = ((UInt32) tmp > 4) ? 4 : ((UInt32) tmp <= 2) ? (UInt32) tmp : 2; + } else { + step = ((UInt32) len > 4) ? 4 : ((UInt32) len <= 2) ? len : 2; + } + + if (step != len) { + if (dst->optype == OpndType_IndirectGPR_Indexed) + make_addressable(dst, len, 0); + if (src->optype == OpndType_IndirectGPR_Indexed) + make_addressable(src, len, 0); + } + + for (pos = 0; len != 0; len -= step, pos += step) { + reg = used_virtual_registers[RegClass_GPR]++; + if (src->optype == OpndType_IndirectGPR_ImmOffset) { + load_store_register( + (step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ, + reg, + src->reg, + src->object, + src->immOffset + pos + ); + setpcodeflags(src->flags); + } else if (src->optype == OpndType_IndirectGPR_Indexed) { + emitpcode( + (step == 1) ? PC_LBZX : (step == 2) ? PC_LHZX : PC_LWZX, + reg, + src->reg, + src->regOffset + ); + setpcodeflags(src->flags); + } else { + CError_FATAL(183); + } + + if (dst->optype == OpndType_IndirectGPR_ImmOffset) { + load_store_register( + (step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW, + reg, + dst->reg, + dst->object, + dst->immOffset + pos + ); + setpcodeflags(dst->flags); + } else if (dst->optype == OpndType_IndirectGPR_Indexed) { + emitpcode( + (step == 1) ? PC_STBX : (step == 2) ? PC_STHX : PC_STWX, + reg, + dst->reg, + dst->regOffset + ); + setpcodeflags(dst->flags); + } else { + CError_FATAL(195); + } + } +} + +static void move_block_via_load_store_sequence(Operand *dst, Operand *src, SInt32 len, SInt32 align) { + SInt32 pos; + int i; + SInt32 step; + + pos = 0; + make_addressable(dst, len, 0); + make_addressable(src, len, 0); + + if ((align % 8) == 0) { + while (len >= 16) { + int reg1 = used_virtual_registers[RegClass_FPR]++; + int reg2 = used_virtual_registers[RegClass_FPR]++; + load_store_register(PC_LFD, reg1, src->reg, src->object, src->immOffset + pos); + setpcodeflags(src->flags); + load_store_register(PC_LFD, reg2, src->reg, src->object, src->immOffset + pos + 8); + setpcodeflags(src->flags); + + load_store_register(PC_STFD, reg1, dst->reg, dst->object, dst->immOffset + pos); + setpcodeflags(dst->flags); + load_store_register(PC_STFD, reg2, dst->reg, dst->object, dst->immOffset + pos + 8); + setpcodeflags(dst->flags); + + pos += 16; + len -= 16; + } + } + + while (len >= 8) { + if ((align % 8) == 0) { + int reg = used_virtual_registers[RegClass_FPR]++; + + load_store_register(PC_LFD, reg, src->reg, src->object, src->immOffset + pos); + setpcodeflags(src->flags); + + load_store_register(PC_STFD, reg, dst->reg, dst->object, dst->immOffset + pos); + setpcodeflags(dst->flags); + + pos += 8; + len -= 8; + } else { + if (copts.misaligned_mem_access == 0 && (UInt32) align < 4) { + SInt32 tmp = (align == 0) ? 1 : (align > len) ? len : align; + step = ((UInt32) tmp > 4) ? 4 : ((UInt32) tmp > 2) ? 2 : 1; + } else { + step = 4; + } + + for (i = 0; i < 8; i += (step * 2)) { + int reg1 = used_virtual_registers[RegClass_GPR]++; + int reg2 = used_virtual_registers[RegClass_GPR]++; + + load_store_register( + (step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ, + reg1, + src->reg, + src->object, + src->immOffset + pos + ); + setpcodeflags(src->flags); + + load_store_register( + (step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ, + reg2, + src->reg, + src->object, + src->immOffset + pos + step + ); + setpcodeflags(src->flags); + + load_store_register( + (step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW, + reg1, + dst->reg, + dst->object, + dst->immOffset + pos + ); + setpcodeflags(dst->flags); + + load_store_register( + (step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW, + reg2, + dst->reg, + dst->object, + dst->immOffset + pos + step + ); + setpcodeflags(dst->flags); + + pos += (step * 2); + len -= (step * 2); + } + } + } + + while (len) { + int reg; + + if (copts.misaligned_mem_access == 0 && (UInt32) align < 4) { + SInt32 tmp = (align == 0) ? 1 : (align > len) ? len : align; + step = ((UInt32) tmp > 4) ? 4 : ((UInt32) tmp <= 2) ? (UInt32) tmp : 2; + } else { + step = ((UInt32) len > 4) ? 4 : ((UInt32) len <= 2) ? len : 2; + } + + reg = used_virtual_registers[RegClass_GPR]++; + + load_store_register( + (step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ, + reg, + src->reg, + src->object, + src->immOffset + pos + ); + setpcodeflags(src->flags); + + load_store_register( + (step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW, + reg, + dst->reg, + dst->object, + dst->immOffset + pos + ); + setpcodeflags(dst->flags); + + len -= step; + pos += step; + } +} + +static void move_block_via_inline_loop(Operand *dst, Operand *src, SInt32 len, SInt32 align) { + PCodeLabel *label; // r25 + SInt32 pos; // r25 + SInt32 step; // r24 + int reg1; // r22 + int reg2; // r23 + SInt32 remainder; // r23 + + label = makepclabel(); + + if (copts.misaligned_mem_access == 0 && (UInt32) align < 4) { + SInt32 tmp = (align == 0) ? 1 : (align > len) ? len : align; + step = ((UInt32) tmp > 4) ? 4 : ((UInt32) tmp <= 2) ? (UInt32) tmp : 2; + } else { + step = 4; + } + + load_displaced_address(dst, -step); + load_displaced_address(src, -step); + + CError_ASSERT(377, (len / step) != 0); + + reg1 = used_virtual_registers[RegClass_GPR]++; + load_immediate(reg1, len / (step * 2)); + emitpcode(PC_MTCTR, reg1); + branch_label(label); + + reg1 = used_virtual_registers[RegClass_GPR]++; + reg2 = used_virtual_registers[RegClass_GPR]++; + + load_store_register( + (step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ, + reg1, + src->reg, + NULL, + step + ); + setpcodeflags(src->flags); + + load_store_register( + (step == 1) ? PC_LBZU : (step == 2) ? PC_LHZU : PC_LWZU, + reg2, + src->reg, + NULL, + step * 2 + ); + setpcodeflags(src->flags); + + load_store_register( + (step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW, + reg1, + dst->reg, + NULL, + step + ); + setpcodeflags(dst->flags); + + load_store_register( + (step == 1) ? PC_STBU : (step == 2) ? PC_STHU : PC_STWU, + reg2, + dst->reg, + NULL, + step * 2 + ); + setpcodeflags(dst->flags); + + branch_decrement_always(PC_BDNZ, label); + + for (remainder = len & 7, pos = step; remainder != 0; remainder -= step, pos += step) { + int reg; + + if (copts.misaligned_mem_access == 0 && (UInt32) align < 4) { + SInt32 tmp = (align == 0) ? 1 : (align > remainder) ? remainder : align; + step = ((UInt32) tmp > 4) ? 4 : ((UInt32) tmp <= 2) ? (UInt32) tmp : 2; + } else { + step = ((UInt32) remainder > 4) ? 4 : ((UInt32) remainder <= 2) ? remainder : 2; + } + + reg = used_virtual_registers[RegClass_GPR]++; + + load_store_register( + (step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ, + reg, + src->reg, + NULL, + pos + ); + setpcodeflags(src->flags); + + load_store_register( + (step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW, + reg, + dst->reg, + NULL, + pos + ); + setpcodeflags(dst->flags); + } +} + +void move_block(Operand *dst, Operand *src, SInt32 len, SInt32 align) { + Operand myDst; + + myDst = *dst; + + CError_ASSERT(447, myDst.optype >= OpndType_IndirectGPR_ImmOffset); + CError_ASSERT(449, src->optype >= OpndType_IndirectGPR_ImmOffset); + + if (len == 1 || len == 2 || len == 4) + move_block_via_load_store(&myDst, src, len, align); + else if (len == 8 && align == 8) + move_block_via_load_store(&myDst, src, len, align); + else if (len <= 16 || (copts.optimizesize == 0 && len <= 64)) + move_block_via_load_store_sequence(&myDst, src, len, align); + else + move_block_via_inline_loop(&myDst, src, len, align); +} + +static void load_word_of_small_struct(short dstReg, short srcReg, Operand *opnd, SInt32 offset, SInt32 len, SInt32 align) { + short tmpReg; + short extra = 0; + + switch (len) { + case 1: + tmpReg = used_virtual_registers[RegClass_GPR]++; + load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset); + setpcodeflags(opnd->flags); + emitpcode(PC_RLWINM, dstReg, tmpReg, 24, 0, 7); + setpcodeflags(opnd->flags); + break; + case 2: + case 3: + if (align > 1) { + tmpReg = used_virtual_registers[RegClass_GPR]++; + load_store_register(PC_LHZ, tmpReg, srcReg, opnd->object, offset); + extra += 2; + setpcodeflags(opnd->flags); + emitpcode(PC_RLWINM, dstReg, tmpReg, 16, 0, 15); + setpcodeflags(opnd->flags); + } else { + tmpReg = used_virtual_registers[RegClass_GPR]++; + load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset); + setpcodeflags(opnd->flags); + emitpcode(PC_RLWINM, dstReg, tmpReg, 24, 0, 7); + setpcodeflags(opnd->flags); + + load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset + 1); + extra += 2; + setpcodeflags(opnd->flags); + emitpcode(PC_RLWIMI, dstReg, tmpReg, 16, 8, 15); + setpcodeflags(opnd->flags); + } + if (len == 3) { + load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset + extra); + setpcodeflags(opnd->flags); + emitpcode(PC_RLWIMI, dstReg, tmpReg, 8, 16, 23); + setpcodeflags(opnd->flags); + } + break; + case 4: + if (align > 2) { + load_store_register(PC_LWZ, dstReg, srcReg, opnd->object, offset); + setpcodeflags(opnd->flags); + } else if (align > 1) { + tmpReg = used_virtual_registers[RegClass_GPR]++; + load_store_register(PC_LHZ, tmpReg, srcReg, opnd->object, offset); + setpcodeflags(opnd->flags); + emitpcode(PC_RLWINM, dstReg, tmpReg, 16, 0, 15); + setpcodeflags(opnd->flags); + + load_store_register(PC_LHZ, tmpReg, srcReg, opnd->object, offset + 2); + setpcodeflags(opnd->flags); + emitpcode(PC_RLWIMI, dstReg, tmpReg, 0, 16, 31); + setpcodeflags(opnd->flags); + } else { + tmpReg = used_virtual_registers[RegClass_GPR]++; + load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset); + setpcodeflags(opnd->flags); + emitpcode(PC_RLWINM, dstReg, tmpReg, 24, 0, 7); + setpcodeflags(opnd->flags); + + load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset + 1); + setpcodeflags(opnd->flags); + emitpcode(PC_RLWIMI, dstReg, tmpReg, 16, 8, 15); + setpcodeflags(opnd->flags); + + load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset + 2); + setpcodeflags(opnd->flags); + emitpcode(PC_RLWIMI, dstReg, tmpReg, 8, 16, 23); + setpcodeflags(opnd->flags); + + load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset + 3); + setpcodeflags(opnd->flags); + emitpcode(PC_RLWIMI, dstReg, tmpReg, 0, 24, 31); + setpcodeflags(opnd->flags); + } + break; + } +} + +void load_small_block_into_reg(short dstReg, Operand *srcOpnd, Type *type, SInt32 align) { + short finalReg; + short tmpReg; + SInt32 absAddress; + + coerce_to_addressable(srcOpnd); + + if (srcOpnd->optype == OpndType_IndirectGPR_Indexed) { + CError_FATAL(557); + + tmpReg = used_virtual_registers[RegClass_GPR]++; + load_address(tmpReg, srcOpnd); + srcOpnd->optype = OpndType_IndirectGPR_ImmOffset; + srcOpnd->reg = tmpReg; + srcOpnd->object = NULL; + srcOpnd->immOffset = 0; + } + + if (copts.misaligned_mem_access) + align = 4; + + switch (srcOpnd->optype) { + case OpndType_GPRPair: + return; + case OpndType_GPR: + return; + case OpndType_GPR_ImmOffset: + finalReg = dstReg ? dstReg : used_virtual_registers[RegClass_GPR]++; + add_immediate(finalReg, srcOpnd->reg, srcOpnd->object, srcOpnd->immOffset); + break; + case OpndType_GPR_Indexed: + finalReg = dstReg ? dstReg : used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_ADD, finalReg, srcOpnd->reg, srcOpnd->regOffset); + break; + case OpndType_Absolute: + finalReg = dstReg ? dstReg : used_virtual_registers[RegClass_GPR]++; + absAddress = srcOpnd->immediate; + if (FITS_IN_SHORT(absAddress)) { + emitpcode(PC_LI, finalReg, absAddress); + } else { + tmpReg = finalReg; + if (copts.optimizationlevel > 1 && absAddress) + tmpReg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_LIS, tmpReg, 0, HIGH_PART(absAddress)); + if (absAddress) + emitpcode(PC_ADDI, finalReg, tmpReg, 0, LOW_PART(absAddress)); + } + break; + case OpndType_IndirectGPR_ImmOffset: + finalReg = dstReg ? dstReg : used_virtual_registers[RegClass_GPR]++; + load_word_of_small_struct(finalReg, srcOpnd->reg, srcOpnd, srcOpnd->immOffset, type->size, align); + break; + default: + CError_FATAL(606); + } + + srcOpnd->optype = OpndType_GPR; + srcOpnd->reg = finalReg; +} + +void load_small_block_into_reg_pair(short dstRegLo, short dstRegHi, Operand *srcOpnd, Type *type, SInt32 align) { + short finalRegLo; + short finalRegHi; + short tmpRegLo; + short tmpRegHi; + short tmpReg; + SInt32 absAddress; + + finalRegHi = -1; + coerce_to_addressable(srcOpnd); + + if (srcOpnd->optype == OpndType_IndirectGPR_Indexed) { + CError_FATAL(624); + + tmpReg = used_virtual_registers[RegClass_GPR]++; + load_address(tmpReg, srcOpnd); + srcOpnd->optype = OpndType_IndirectGPR_ImmOffset; + srcOpnd->reg = tmpReg; + srcOpnd->object = NULL; + srcOpnd->immOffset = 0; + } + + if (copts.misaligned_mem_access) + align = 4; + + switch (srcOpnd->optype) { + case OpndType_GPRPair: + if (dstRegLo != 0 && dstRegHi == 0) + dstRegHi = used_virtual_registers[RegClass_GPR]++; + if (dstRegHi != 0 && dstRegLo == 0) + dstRegLo = used_virtual_registers[RegClass_GPR]++; + + if (srcOpnd->reg != dstRegLo || srcOpnd->regHi != dstRegHi) { + tmpRegLo = dstRegLo ? dstRegLo : srcOpnd->reg; + tmpRegHi = dstRegHi ? dstRegHi : srcOpnd->regHi; + + if (tmpRegLo != srcOpnd->reg) { + if (tmpRegLo == srcOpnd->regHi) { + CError_ASSERT(657, tmpRegLo != tmpRegHi); + emitpcode(PC_MR, tmpRegHi, srcOpnd->regHi); + emitpcode(PC_MR, tmpRegLo, srcOpnd->reg); + } else { + emitpcode(PC_MR, tmpRegLo, srcOpnd->reg); + if (srcOpnd->regHi != tmpRegHi) + emitpcode(PC_MR, tmpRegHi, srcOpnd->regHi); + } + } else if (tmpRegHi != srcOpnd->regHi) { + if (tmpRegHi == srcOpnd->reg) { + CError_ASSERT(671, tmpRegLo != tmpRegHi); + emitpcode(PC_MR, tmpRegLo, srcOpnd->reg); + emitpcode(PC_MR, tmpRegHi, srcOpnd->regHi); + } else { + emitpcode(PC_MR, tmpRegHi, srcOpnd->regHi); + if (srcOpnd->reg != tmpRegLo) + emitpcode(PC_MR, tmpRegLo, srcOpnd->reg); + } + } + } + + finalRegLo = srcOpnd->reg; + finalRegHi = srcOpnd->regHi; + break; + case OpndType_GPR: + CError_FATAL(688); + break; + case OpndType_GPR_ImmOffset: + CError_FATAL(691); + break; + case OpndType_GPR_Indexed: + CError_FATAL(694); + break; + case OpndType_Absolute: + finalRegLo = dstRegLo ? dstRegLo : used_virtual_registers[RegClass_GPR]++; + absAddress = srcOpnd->immediate; + if (FITS_IN_SHORT(absAddress)) { + emitpcode(PC_LI, finalRegLo, absAddress); + } else { + tmpReg = finalRegLo; + if (copts.optimizationlevel > 1 && absAddress) + tmpReg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_LIS, tmpReg, 0, HIGH_PART(absAddress)); + if (absAddress) + emitpcode(PC_ADDI, finalRegLo, tmpReg, 0, LOW_PART(absAddress)); + } + + finalRegHi = dstRegHi ? dstRegHi : used_virtual_registers[RegClass_GPR]++; + if (is_unsigned(type) || absAddress >= 0) + load_immediate(finalRegHi, 0); + else + load_immediate(finalRegHi, -1); + + break; + case OpndType_IndirectGPR_ImmOffset: + finalRegLo = dstRegLo ? dstRegLo : used_virtual_registers[RegClass_GPR]++; + finalRegHi = dstRegHi ? dstRegHi : used_virtual_registers[RegClass_GPR]++; + if (srcOpnd->reg == finalRegHi) { + if (srcOpnd->reg == finalRegLo) { + CError_FATAL(726); + } else { + load_word_of_small_struct( + finalRegLo, srcOpnd->reg, srcOpnd, + srcOpnd->immOffset + low_offset, type->size - 4, align); + load_word_of_small_struct( + finalRegHi, srcOpnd->reg, srcOpnd, + srcOpnd->immOffset + high_offset, 4, align); + } + } else { + load_word_of_small_struct( + finalRegHi, srcOpnd->reg, srcOpnd, + srcOpnd->immOffset + high_offset, 4, align); + load_word_of_small_struct( + finalRegLo, srcOpnd->reg, srcOpnd, + srcOpnd->immOffset + low_offset, type->size - 4, align); + } + break; + default: + CError_FATAL(737); + } + + if (finalRegHi == -1) { + CError_FATAL(741); + } else { + srcOpnd->optype = OpndType_GPRPair; + srcOpnd->reg = finalRegLo; + srcOpnd->regHi = finalRegHi; + } +} + +static void store_word_of_small_struct(short srcReg, short dstReg, Operand *opnd, SInt32 offset, SInt32 len, SInt32 align) { + short tmpReg; + short extra = 0; + + switch (len) { + case 1: + tmpReg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_RLWINM, tmpReg, srcReg, 8, 24, 31); + setpcodeflags(opnd->flags); + load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset); + setpcodeflags(opnd->flags); + break; + case 2: + case 3: + if (align > 1) { + tmpReg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_RLWINM, tmpReg, srcReg, 16, 16, 31); + setpcodeflags(opnd->flags); + load_store_register(PC_STH, tmpReg, dstReg, opnd->object, offset); + extra += 2; + setpcodeflags(opnd->flags); + } else { + tmpReg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_RLWINM, tmpReg, srcReg, 8, 24, 31); + setpcodeflags(opnd->flags); + load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset); + setpcodeflags(opnd->flags); + + emitpcode(PC_RLWINM, tmpReg, srcReg, 16, 24, 31); + setpcodeflags(opnd->flags); + load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset + 1); + extra += 2; + setpcodeflags(opnd->flags); + } + if (len == 3) { + emitpcode(PC_RLWINM, tmpReg, srcReg, 24, 24, 31); + setpcodeflags(opnd->flags); + load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset + extra); + setpcodeflags(opnd->flags); + } + break; + case 4: + if (align > 2) { + load_store_register(PC_STW, srcReg, dstReg, opnd->object, offset); + setpcodeflags(opnd->flags); + } else if (align > 1) { + tmpReg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_RLWINM, tmpReg, srcReg, 16, 16, 31); + setpcodeflags(opnd->flags); + load_store_register(PC_STH, tmpReg, dstReg, opnd->object, offset); + setpcodeflags(opnd->flags); + + load_store_register(PC_STH, srcReg, dstReg, opnd->object, offset + 2); + setpcodeflags(opnd->flags); + } else { + tmpReg = used_virtual_registers[RegClass_GPR]++; + emitpcode(PC_RLWINM, tmpReg, srcReg, 8, 24, 31); + setpcodeflags(opnd->flags); + load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset); + setpcodeflags(opnd->flags); + + emitpcode(PC_RLWINM, tmpReg, srcReg, 16, 24, 31); + setpcodeflags(opnd->flags); + load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset + 1); + setpcodeflags(opnd->flags); + + emitpcode(PC_RLWINM, tmpReg, srcReg, 24, 24, 31); + setpcodeflags(opnd->flags); + load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset + 2); + setpcodeflags(opnd->flags); + + load_store_register(PC_STB, srcReg, dstReg, opnd->object, offset + 3); + setpcodeflags(opnd->flags); + } + break; + } +} + +void store_small_block_from_reg(short srcReg, Operand *dstOpnd, Type *type, SInt32 align) { + short tmpReg; + + coerce_to_addressable(dstOpnd); + + if (dstOpnd->optype == OpndType_IndirectGPR_Indexed) { + CError_FATAL(839); + + tmpReg = used_virtual_registers[RegClass_GPR]++; + load_address(tmpReg, dstOpnd); + dstOpnd->optype = OpndType_IndirectGPR_ImmOffset; + dstOpnd->reg = tmpReg; + dstOpnd->object = NULL; + dstOpnd->immOffset = 0; + } + + if (copts.misaligned_mem_access) + align = 4; + + store_word_of_small_struct(srcReg, dstOpnd->reg, dstOpnd, dstOpnd->immOffset, type->size, align); +} + +void store_small_block_from_reg_pair(short srcRegLo, short srcRegHi, Operand *dstOpnd, Type *type, SInt32 align) { + short tmpReg; + + coerce_to_addressable(dstOpnd); + + if (dstOpnd->optype == OpndType_IndirectGPR_Indexed) { + CError_FATAL(860); + + tmpReg = used_virtual_registers[RegClass_GPR]++; + load_address(tmpReg, dstOpnd); + dstOpnd->optype = OpndType_IndirectGPR_ImmOffset; + dstOpnd->reg = tmpReg; + dstOpnd->object = NULL; + dstOpnd->immOffset = 0; + } + + if (copts.misaligned_mem_access) + align = 4; + + store_word_of_small_struct( + srcRegLo, dstOpnd->reg, dstOpnd, + dstOpnd->immOffset + low_offset, type->size - 4, align); + store_word_of_small_struct( + srcRegHi, dstOpnd->reg, dstOpnd, + dstOpnd->immOffset + high_offset, 4, align); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Switch.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Switch.c new file mode 100644 index 0000000..4bbd82e --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/Switch.c @@ -0,0 +1,518 @@ +#include "compiler/Switch.h" +#include "compiler/CError.h" +#include "compiler/CFunc.h" +#include "compiler/CInt64.h" +#include "compiler/CParser.h" +#include "compiler/InstrSelection.h" +#include "compiler/ObjGenMachO.h" +#include "compiler/Operands.h" +#include "compiler/PCode.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/RegisterInfo.h" +#include "compiler/TOC.h" +#include "compiler/CompilerTools.h" +#include "compiler/objects.h" + +ObjectList *switchtables; +static SwitchCase **caselabels; +static CaseRange *caseranges; +static SInt32 ncases; +static SInt32 nranges_minus1; +static CInt64 min; +static CInt64 max; +static CInt64 first; +static short selector_gpr; +static short selector_gprHi; +static Type *selector_type; +static PCodeLabel *defaultlabel; +static CInt64 range; + +static int compare_cases(const void *a, const void *b) { + const SwitchCase **casea = (const SwitchCase **) a; + const SwitchCase **caseb = (const SwitchCase **) b; + + if (CInt64_Less((*casea)->min, (*caseb)->min)) + return -1; + if (CInt64_Greater((*casea)->min, (*caseb)->min)) + return 1; + return 0; +} + +static void build_case_ranges(Type *type, SwitchCase *cases, CLabel *label) { + SwitchCase **caseptr; + SInt32 i; + SwitchCase *curcase; + CaseRange *currange; + + if (type->size == 8) { + min.lo = 0; + min.hi = 0x80000000; + max.lo = 0xFFFFFFFF; + max.hi = 0x7FFFFFFF; + } else if (type->size == 4) { + CInt64_SetLong(&min, 0x80000000); + CInt64_SetLong(&max, 0x7FFFFFFF); + } else if (is_unsigned(type)) { + min.hi = 0; + min.lo = 0; + max.hi = 0; + max.lo = 0xFFFF; + } else { + CInt64_SetLong(&min, -0x8000); + CInt64_SetLong(&max, 0x7FFF); + } + + caselabels = lalloc(sizeof(SwitchCase *) * ncases); + caseptr = caselabels; + while (cases) { + *caseptr = cases; + cases = cases->next; + ++caseptr; + } + + caseranges = lalloc(((ncases * 2) + 2) * sizeof(CaseRange)); + if (type->size < 8) { + for (i = 0; i < ncases; i++) + CInt64_SetLong(&caselabels[i]->min, caselabels[i]->min.lo); + } + + qsort(caselabels, ncases, sizeof(SwitchCase *), &compare_cases); + + currange = caseranges; + currange->min = min; + currange->range = CInt64_Sub(max, min); + currange->label = label->pclabel; + + for (i = 0; i < ncases; i++) { + curcase = caselabels[i]; + if (CInt64_GreaterEqual(curcase->min, min) && CInt64_LessEqual(curcase->min, max)) { + if (CInt64_Equal(currange->min, min)) + first = curcase->min; + range = CInt64_Sub(curcase->min, first); + + if (CInt64_Greater(curcase->min, currange->min)) { + currange->range = CInt64_Sub(CInt64_Sub(curcase->min, currange->min), cint64_one); + (++currange)->min = curcase->min; + } else if (CInt64_Greater(currange->min, min) && curcase->label->pclabel == currange[-1].label) { + currange[-1].range = CInt64_Add(currange[-1].range, cint64_one); + if (CInt64_Equal(currange->range, cint64_zero)) { + currange--; + } else { + currange->min = CInt64_Add(currange->min, cint64_one); + currange->range = CInt64_Sub(currange->range, cint64_one); + } + continue; + } + + currange->range = cint64_zero; + currange->label = curcase->label->pclabel; + + if (CInt64_Less(curcase->min, max)) { + currange++; + currange->min = CInt64_Add(curcase->min, cint64_one); + currange->range = CInt64_Sub(max, currange->min); + currange->label = label->pclabel; + } + } + } + + nranges_minus1 = currange - caseranges; +} + +static void treecompare(SInt32 start, SInt32 end) { + SInt32 r30; + SInt32 r29; + CaseRange *currange; + int count; + + count = end - start; + CError_ASSERT(175, selector_type->size <= 4); + + r29 = start + (count >> 1) + 1; + currange = caseranges + r29; + + if (CInt64_Equal(currange[-1].range, cint64_zero) && (!(count & 1) || (CInt64_NotEqual(currange->range, cint64_zero) && count > 1))) { + currange--; + r29--; + } + + r30 = r29 - 1; + + if (selector_type->size < 4 && is_unsigned(selector_type)) { + emitpcode(PC_CMPLI, 0, selector_gpr, CInt64_GetULong(&currange->min)); + } else if (FITS_IN_SHORT((SInt32) CInt64_GetULong(&currange->min))) { + emitpcode(PC_CMPI, 0, selector_gpr, CInt64_GetULong(&currange->min)); + } else { + SInt32 value = CInt64_GetULong(&currange->min); + int reg = ALLOC_GPR(); + load_immediate(reg, value); + emitpcode(PC_CMP, 0, selector_gpr, reg); + } + + if (CInt64_Equal(currange->range, cint64_zero) && r29 < end) { + branch_conditional(0, EEQU, 1, currange->label); + r29++; + } + + if (r29 == end) { + if (start == r30) { + if (caseranges[start].label == caseranges[end].label) { + branch_always(caseranges[start].label); + } else { + branch_conditional(0, EGREATEREQU, 1, caseranges[end].label); + branch_always(caseranges[start].label); + } + } else { + branch_conditional(0, EGREATEREQU, 1, caseranges[end].label); + treecompare(start, r30); + } + } else { + if (start == r30) { + branch_conditional(0, ELESS, 1, caseranges[start].label); + treecompare(r29, end); + } else { + PCodeLabel *label = makepclabel(); + branch_conditional(0, EGREATEREQU, 1, label); + treecompare(start, r30); + branch_label(label); + treecompare(r29, end); + } + } +} + +static void I8_treecompare(SInt32 start, SInt32 end) { + SInt32 r30; + SInt32 r29; + CaseRange *currange; + int count; + + count = end - start; + + r29 = start + (count >> 1) + 1; + currange = caseranges + r29; + + if (CInt64_Equal(currange[-1].range, cint64_zero) && (!(count & 1) || (CInt64_NotEqual(currange->range, cint64_zero) && count > 1))) { + currange--; + r29--; + } + + r30 = r29 - 1; + + if (CInt64_Equal(currange->range, cint64_zero) && r29 < end) { + short a = ALLOC_GPR(); + short b = ALLOC_GPR(); + load_immediate(a, currange->min.lo); + load_immediate(b, currange->min.hi); + emitpcode(PC_XOR, a, selector_gpr, a); + emitpcode(PC_XOR, b, selector_gprHi, b); + emitpcode(PC_OR, b, a, b); + emitpcode(PC_CMPI, 0, b, 0); + branch_conditional(0, EEQU, 1, currange->label); + r29++; + } + + if (r29 == end) { + if (start == r30) { + if (caseranges[start].label == caseranges[end].label) { + branch_always(caseranges[start].label); + } else { + short a = ALLOC_GPR(); + short b = ALLOC_GPR(); + short c = ALLOC_GPR(); + short d = ALLOC_GPR(); + load_immediate(a, currange->min.lo); + load_immediate(b, currange->min.hi); + if (TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG && TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG) { + emitpcode(PC_XORIS, c, selector_gprHi, 0x8000); + emitpcode(PC_XORIS, d, b, 0x8000); + } else { + c = selector_gprHi; + d = b; + } + emitpcode(PC_SUBFC, a, a, selector_gpr); + emitpcode(PC_SUBFE, b, d, c); + emitpcode(PC_SUBFE, b, a, a); + emitpcode(PC_NEG, b, b); + emitpcode(PC_CMPI, 0, b, 0); + branch_conditional(0, EEQU, 1, caseranges[end].label); + branch_always(caseranges[start].label); + } + } else { + short a = ALLOC_GPR(); + short b = ALLOC_GPR(); + short c = ALLOC_GPR(); + short d = ALLOC_GPR(); + load_immediate(a, currange->min.lo); + load_immediate(b, currange->min.hi); + if (TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG && TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG) { + emitpcode(PC_XORIS, c, selector_gprHi, 0x8000); + emitpcode(PC_XORIS, d, b, 0x8000); + } else { + c = selector_gprHi; + d = b; + } + emitpcode(PC_SUBFC, a, a, selector_gpr); + emitpcode(PC_SUBFE, b, d, c); + emitpcode(PC_SUBFE, b, a, a); + emitpcode(PC_NEG, b, b); + emitpcode(PC_CMPI, 0, b, 0); + branch_conditional(0, EEQU, 1, caseranges[end].label); + I8_treecompare(start, r30); + } + } else { + if (start == r30) { + short a = ALLOC_GPR(); + short b = ALLOC_GPR(); + short c = ALLOC_GPR(); + short d = ALLOC_GPR(); + load_immediate(a, currange->min.lo); + load_immediate(b, currange->min.hi); + if (TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG && TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG) { + emitpcode(PC_XORIS, c, selector_gprHi, 0x8000); + emitpcode(PC_XORIS, d, b, 0x8000); + } else { + c = selector_gprHi; + d = b; + } + emitpcode(PC_SUBFC, a, selector_gpr, a); + emitpcode(PC_SUBFE, b, c, d); + emitpcode(PC_SUBFE, b, a, a); + emitpcode(PC_NEG, b, b); + emitpcode(PC_CMPI, 0, b, 0); + branch_conditional(0, ENOTEQU, 1, caseranges[end].label); + I8_treecompare(r29, end); + } else { + PCodeLabel *label; + short a = ALLOC_GPR(); + short b = ALLOC_GPR(); + short c = ALLOC_GPR(); + short d = ALLOC_GPR(); + load_immediate(a, currange->min.lo); + load_immediate(b, currange->min.hi); + if (TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG && TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG) { + emitpcode(PC_XORIS, c, selector_gprHi, 0x8000); + emitpcode(PC_XORIS, d, b, 0x8000); + } else { + c = selector_gprHi; + d = b; + } + emitpcode(PC_SUBFC, a, a, selector_gpr); + emitpcode(PC_SUBFE, b, d, c); + emitpcode(PC_SUBFE, b, a, a); + emitpcode(PC_NEG, b, b); + emitpcode(PC_CMPI, 0, b, 0); + label = makepclabel(); + branch_conditional(0, EEQU, 1, label); + I8_treecompare(start, r30); + branch_label(label); + I8_treecompare(r29, end); + } + } +} + +static void generate_tree(ENode *expr) { + Operand op; + + memclrw(&op, sizeof(Operand)); + if (TYPE_IS_8BYTES(expr->rtype)) { + GEN_NODE(expr, &op); + coerce_to_register_pair(&op, expr->rtype, 0, 0); + selector_type = expr->rtype; + selector_gpr = op.reg; + selector_gprHi = op.regHi; + I8_treecompare(0, nranges_minus1); + } else { + GEN_NODE(expr, &op); + if (expr->rtype->size < 4) + extend32(&op, expr->rtype, 0); + ENSURE_GPR(&op, expr->rtype, 0); + selector_type = expr->rtype; + selector_gpr = op.reg; + treecompare(0, nranges_minus1); + } +} + +static Object *create_switch_table(void) { + Object *obj; + ObjectList *list; + UInt32 *outptr; + CaseRange *currange; + SInt32 size; + CInt64 value; + + obj = galloc(sizeof(Object)); + list = galloc(sizeof(ObjectList)); + memclrw(obj, sizeof(Object)); + memclrw(list, sizeof(ObjectList)); + + obj->otype = OT_OBJECT; + obj->access = ACCESSPUBLIC; + obj->datatype = DDATA; + obj->name = CParser_GetUniqueName(); + obj->toc = NULL; + obj->sclass = TK_STATIC; + obj->qual = Q_CONST; + obj->flags |= OBJECT_FLAGS_2 | OBJECT_DEFINED; + obj->u.data.linkname = obj->name; + obj->type = NULL; + createIndirect(obj, 0, 0); + obj->type = TYPE(&void_ptr); + + size = CInt64_GetULong(&range) + 1; + obj->u.data.u.switchtable.size = size; + obj->u.data.u.switchtable.data = lalloc(4 * size); + + currange = caseranges; + outptr = (UInt32 *) obj->u.data.u.switchtable.data; + value = cint64_zero; + while (CInt64_LessEqual(value, range)) { + while (CInt64_Greater(CInt64_Add(first, value), CInt64_Add(currange->min, currange->range))) + currange++; + *outptr = CTool_CreateIndexFromPointer(currange->label); + value = CInt64_Add(value, cint64_one); + outptr++; + } + + list->object = obj; + list->next = switchtables; + switchtables = list; + return list->object; +} + +static void generate_table(ENode *expr, SwitchInfo *info) { + Object *table; + SwitchCase *curcase; + short reg; + short reg2; + short reg3; + Operand op1; + Operand op2; + + CInt64 val3 = {0, 3}; + memclrw(&op1, sizeof(Operand)); + memclrw(&op2, sizeof(Operand)); + + if (CInt64_Greater(first, cint64_zero) && CInt64_Less(first, val3)) { + range = CInt64_Add(range, first); + first = cint64_zero; + } + + table = create_switch_table(); + CError_ASSERT(553, !TYPE_IS_8BYTES(expr->rtype)); + + GEN_NODE(expr, &op1); + if (expr->rtype->size < 4) + extend32(&op1, expr->rtype, 0); + ENSURE_GPR(&op1, expr->rtype, 0); + + reg = op1.reg; + if (CInt64_NotEqual(first, cint64_zero)) { + SInt32 value; + reg = ALLOC_GPR(); + value = -CInt64_GetULong(&first); + if (!FITS_IN_SHORT(value)) { + emitpcode(PC_ADDIS, reg, op1.reg, 0, HIGH_PART(value)); + if (value) + emitpcode(PC_ADDI, reg, reg, 0, LOW_PART(value)); + } else { + emitpcode(PC_ADDI, reg, op1.reg, 0, value); + } + } + + if (!FITS_IN_SHORT(CInt64_GetULong(&range))) { + short tmp = ALLOC_GPR(); + load_immediate(tmp, CInt64_GetULong(&range)); + emitpcode(PC_CMPL, 0, reg, tmp); + } else { + emitpcode(PC_CMPLI, 0, reg, CInt64_GetULong(&range)); + } + + branch_conditional(0, EGREATER, 1, defaultlabel); + if (table->toc) { + op2.optype = OpndType_Symbol; + op2.object = table->toc; + indirect(&op2, NULL); + } else { + op2.optype = OpndType_Symbol; + op2.object = table; + } + + if (op2.optype != OpndType_GPR) { + Coerce_to_register(&op2, TYPE(&void_ptr), reg2 = ALLOC_GPR()); + } + + if (op2.optype != OpndType_GPR) { + CError_FATAL(599); + } else { + if (op2.reg != reg2) + emitpcode(PC_MR, reg2, op2.reg); + } + + if (CInt64_Equal(first, cint64_zero)) { + reg = ALLOC_GPR(); + emitpcode(PC_RLWINM, reg, op1.reg, 2, 0, 29); + } else { + emitpcode(PC_RLWINM, reg, reg, 2, 0, 29); + } + + reg3 = reg2; + emitpcode(PC_LWZX, reg3, reg3, reg); + for (curcase = info->cases; curcase; curcase = curcase->next) + pcbranch(pclastblock, curcase->label->pclabel); + pcbranch(pclastblock, info->defaultlabel->pclabel); + emitpcode(PC_MTCTR, reg3); + branch_indirect(table); +} + +void switchstatement(ENode *expr, SwitchInfo *info) { + Boolean use_table; + SwitchCase *swcase; + + use_table = copts.switch_tables; + + ncases = 0; + for (swcase = info->cases; swcase; swcase = swcase->next) { + if (!swcase->label->pclabel) + swcase->label->pclabel = makepclabel(); + ncases++; + } + + CError_ASSERT(656, ncases >= 0 && ncases <= 0x3333332U); + + if (!info->defaultlabel->pclabel) + info->defaultlabel->pclabel = makepclabel(); + defaultlabel = info->defaultlabel->pclabel; + + build_case_ranges(expr->rtype, info->cases, info->defaultlabel); + + if (TYPE_IS_8BYTES(expr->rtype)) { + generate_tree(expr); + return; + } + + if (!use_table || nranges_minus1 < 8 || (nranges_minus1 * 2) < ((range.lo / 2) + 4)) + generate_tree(expr); + else + generate_table(expr, info); +} + +void dumpswitchtables(Object *funcobj) { + Object *table; + ObjectList *list; + SInt32 size; + UInt32 *array; + + for (list = switchtables; list; list = list->next) { + table = list->object; + CError_ASSERT(694, table->otype == OT_OBJECT && table->access == ACCESSPUBLIC && table->datatype == DDATA); + + size = table->u.data.u.switchtable.size; + array = (UInt32 *) table->u.data.u.switchtable.data; + while (size--) { + *array = CTool_EndianConvertWord32(((PCodeLabel *) CTool_ResolveIndexToPointer(*array))->block->codeOffset); + array++; + } + + ObjGen_DeclareSwitchTable(table, funcobj); + } +} diff --git a/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/TOC.c b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/TOC.c new file mode 100644 index 0000000..7af09e3 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/CodeGenerator/TOC.c @@ -0,0 +1,2272 @@ +#include "cos.h" +#include "compiler/TOC.h" +#include "compiler/CDecl.h" +#include "compiler/CError.h" +#include "compiler/CExpr.h" +#include "compiler/CInit.h" +#include "compiler/CInt64.h" +#include "compiler/CFunc.h" +#include "compiler/CMachine.h" +#include "compiler/CMangler.h" +#include "compiler/CParser.h" +#include "compiler/CodeGen.h" +#include "compiler/CompilerTools.h" +#include "compiler/Exceptions.h" +#include "compiler/InlineAsm.h" +#include "compiler/InlineAsmPPC.h" +#include "compiler/InstrSelection.h" +#include "compiler/Intrinsics.h" +#include "compiler/ObjGenMachO.h" +#include "compiler/Operands.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/PPCError.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StackFrame.h" +#include "compiler/enode.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +ObjectList *toclist; +ObjectList *exceptionlist; +void *descriptorlist; +PoolEntry *floatconstpool; +PoolEntry *doubleconstpool; +ObjectList *floatconstlist; +PoolEntry *vectorconstpool; +ObjectList *vectorconstlist; +Object toc0; +Boolean no_descriptors; +Object pic_base; +VarInfo pic_base_varinfo; +short pic_base_reg; +CodeLabelList *codelabellist; + +UInt8 lvslBytes[16][16] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, + 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, + 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, + 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, + 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, + 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, + 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, + 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E +}; + +UInt8 lvsrBytes[16][16] = { + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, + 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, + 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, + 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, + 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, + 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, + 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, + 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10 +}; + +// forward decls +static void estimate_func_param_size(ENode *node); + +static int disables_optimizer(ENode *node) { + ENode *funcref = node->data.funccall.funcref; + if (ENODE_IS(funcref, EOBJREF)) { + if (!strcmp(CMangler_GetLinkName(funcref->data.objref)->name, "___setjmp")) + return 1; + if (!strcmp(CMangler_GetLinkName(funcref->data.objref)->name, "___vec_setjmp")) + return 1; + } + return 0; +} + +void setupaddressing(void) { + floatconstlist = NULL; + descriptorlist = NULL; + toclist = NULL; + exceptionlist = NULL; + vectorconstlist = NULL; + vectorconstpool = NULL; + floatconstpool = NULL; + doubleconstpool = NULL; + + no_descriptors = 1; + memclrw(&toc0, sizeof(toc0)); + + pic_base_reg = 0; + memclrw(&pic_base, sizeof(pic_base)); + memclrw(&pic_base_varinfo, sizeof(pic_base_varinfo)); + pic_base.otype = OT_OBJECT; + pic_base.type = (Type *) &void_ptr; + pic_base.datatype = DNONLAZYPTR; + pic_base.u.toc.info = &pic_base_varinfo; +} + +void createNonLazyPointer(Object *obj) { + Object *toc; + ObjectList *list; + + toc = galloc(sizeof(Object)); + obj->toc = toc; + memclrw(toc, sizeof(Object)); + + toc->otype = OT_OBJECT; + toc->name = CParser_GetUniqueName(); + toc->toc = NULL; + toc->section = SECT_NONLAZY_PTRS; + toc->u.toc.info = CodeGen_GetNewVarInfo(); + toc->sclass = TK_STATIC; + toc->qual = Q_CONST; + toc->datatype = DNONLAZYPTR; + toc->flags |= OBJECT_FLAGS_2; + toc->type = CDecl_NewPointerType(obj->type); + toc->u.toc.over_load = obj; + toc->u.toc.linkname = CMangler_GetLinkName(obj); + + list = galloc(sizeof(ObjectList)); + memclrw(list, sizeof(ObjectList)); + list->object = toc; + list->next = toclist; + toclist = list; +} + +void referenceIndirectPointer(Object *obj) { + VarInfo *vi = obj->toc->u.toc.info; + vi->used = 1; + vi->usage += copts.optimizesize ? 1 : curstmtvalue; +} + +Object *createIndirect(Object *obj, Boolean flag1, Boolean flag2) { + CError_ASSERT(622, !copts.no_common || (obj->section != SECT_COMMON_VARS) || (obj->qual & Q_20000)); + + if (CParser_HasInternalLinkage(obj)) + return NULL; + if (ObjGen_IsExported(obj)) + return NULL; + + if (!copts.no_common && obj->datatype == DDATA && obj->section == SECT_DEFAULT && (obj->qual & Q_1000000)) + obj->section = SECT_COMMON_VARS; + + if (copts.codegen_dynamic && (!copts.no_common || !(obj->qual & Q_1000000))) { + if (!obj->toc) + createNonLazyPointer(obj); + else if (flag1) + obj->toc->u.toc.info = CodeGen_GetNewVarInfo(); + + if (flag2) + referenceIndirectPointer(obj); + + return obj->toc; + } else { + return NULL; + } +} + +Object *createfloatconstant(Type *type, Float *data) { + ObjectList *list; + Object *obj; + UInt32 *check; + + for (list = floatconstlist; list; list = list->next) { + obj = list->object; + check = (UInt32 *) obj->u.data.u.floatconst; + if (obj->type == type && check[0] == ((UInt32 *) data)[0] && check[1] == ((UInt32 *) data)[1]) + return obj; + } + + obj = galloc(sizeof(Object)); + memclrw(obj, sizeof(Object)); + obj->otype = OT_OBJECT; + obj->type = type; + obj->name = CParser_GetUniqueName(); + obj->toc = NULL; + obj->u.data.info = NULL; + obj->u.data.linkname = obj->name; + obj->sclass = TK_STATIC; + obj->qual = Q_CONST | Q_INLINE_DATA; + obj->datatype = DDATA; + if (type->size == 8) + obj->section = SECT_8BYTE_LITERALS; + else if (type->size == 4) + obj->section = SECT_4BYTE_LITERALS; + else + CError_FATAL(807); + + obj->flags |= OBJECT_FLAGS_2; + + obj->u.data.u.floatconst = galloc(sizeof(Float)); + *obj->u.data.u.floatconst = *data; + + list = galloc(sizeof(ObjectList)); + memclrw(list, sizeof(ObjectList)); + list->object = obj; + list->next = floatconstlist; + floatconstlist = list; + + ObjGen_DeclareFloatConst(obj); + return obj; +} + +Object *createvectorconstant(Type *type, MWVector128 *data) { + ObjectList *list; + Object *obj; + MWVector128 *check; + + for (list = vectorconstlist; list; list = list->next) { + obj = list->object; + check = obj->u.data.u.vector128const; + if (check->ul[0] == data->ul[0] && check->ul[1] == data->ul[1] && check->ul[2] == data->ul[2] && check->ul[3] == data->ul[3]) + return obj; + } + + obj = galloc(sizeof(Object)); + memclrw(obj, sizeof(Object)); + obj->otype = OT_OBJECT; + obj->type = type; + obj->name = CParser_GetUniqueName(); + obj->toc = NULL; + obj->u.data.info = NULL; + obj->u.data.linkname = obj->name; + obj->sclass = TK_STATIC; + obj->qual = Q_CONST | Q_INLINE_DATA; + obj->datatype = DDATA; + if (type->size == 16) + obj->section = SECT_16BYTE_LITERALS; + else + CError_FATAL(900); + + obj->flags |= OBJECT_FLAGS_2; + + obj->u.data.u.vector128const = galloc(sizeof(MWVector128)); + *obj->u.data.u.vector128const = *data; + + list = galloc(sizeof(ObjectList)); + memclrw(list, sizeof(ObjectList)); + list->object = obj; + list->next = vectorconstlist; + vectorconstlist = list; + + ObjGen_DeclareVectorConst(obj); + return obj; +} + +void DeclarePooledConstants(void) { + PoolEntry *entry; + char *buffer; + SInt32 fsize; + SInt32 dsize; + SInt32 vsize; + + fsize = 0; + for (entry = floatconstpool; entry; entry = entry->next) + fsize += 4; + + if (fsize) { + floatconstpool->object->type = CDecl_NewArrayType(TYPE(&stfloat), fsize); + buffer = galloc(fsize); + for (entry = floatconstpool; entry; entry = entry->next) + memcpy(buffer + entry->offset, entry->buffer, 4); + CInit_DeclareReadOnlyData(floatconstpool->object, buffer, NULL, fsize); + } + + dsize = 0; + for (entry = doubleconstpool; entry; entry = entry->next) + dsize += 8; + + if (dsize) { + doubleconstpool->object->type = CDecl_NewArrayType(TYPE(&stdouble), dsize); + buffer = galloc(dsize); + for (entry = doubleconstpool; entry; entry = entry->next) + memcpy(buffer + entry->offset, entry->buffer, 8); + CInit_DeclareReadOnlyData(doubleconstpool->object, buffer, NULL, dsize); + } + + vsize = 0; + for (entry = vectorconstpool; entry; entry = entry->next) + vsize += 16; + + if (vsize) { + vectorconstpool->object->type = CDecl_NewArrayType(TYPE(&stvectorsignedlong), vsize); + buffer = galloc(vsize); + for (entry = vectorconstpool; entry; entry = entry->next) + memcpy(buffer + entry->offset, entry->buffer, 16); + CInit_DeclareReadOnlyData(vectorconstpool->object, buffer, NULL, vsize); + } +} + +static Object *CreatePooledFloatConst(Type *type, Float *data, SInt32 *pOffset) { + if (type->size == 8u) { + PoolEntry *entry; + void *buffer; + Object *object; + SInt32 offset; + + buffer = galloc(8u); + CMach_InitFloatMem(type, *data, buffer); + if (cparamblkptr->precompile == 1) + CError_Error(CErrorStr180); + + for (entry = doubleconstpool; entry; entry = entry->next) { + if (!memcmp(entry->buffer, buffer, 8u)) + break; + } + + if (!entry) { + if (doubleconstpool) { + object = doubleconstpool->object; + offset = doubleconstpool->offset + 8u; + doubleconstpool->object->type->size += 8u; + } else { + DeclInfo di; + memclrw(&di, sizeof(di)); + di.thetype = CDecl_NewArrayType(TYPE(&stdouble), 8u); + di.name = GetHashNameNodeExport("@doubleBase0"); + di.qual = Q_CONST; + di.storageclass = TK_STATIC; + di.is_extern_c = 1; + di.section = SECT_CONST; + object = CParser_NewGlobalDataObject(&di); + object->nspace = cscope_root; + offset = 0; + } + + entry = galloc(sizeof(PoolEntry)); + entry->next = doubleconstpool; + doubleconstpool = entry; + entry->object = object; + entry->offset = offset; + entry->buffer = galloc(8u); + memcpy(entry->buffer, buffer, 8u); + } + + *pOffset = entry->offset; + return entry->object; + } + + if (type->size == 4u) { + PoolEntry *entry; + void *buffer; + Object *object; + SInt32 offset; + + buffer = galloc(4u); + CMach_InitFloatMem(type, *data, buffer); + if (cparamblkptr->precompile == 1) + CError_Error(CErrorStr180); + + for (entry = floatconstpool; entry; entry = entry->next) { + if (!memcmp(entry->buffer, buffer, 4u)) + break; + } + + if (!entry) { + if (floatconstpool) { + object = floatconstpool->object; + offset = floatconstpool->offset + 4u; + object->type->size += 4u; + } else { + DeclInfo di; + memclrw(&di, sizeof(di)); + di.thetype = CDecl_NewArrayType(TYPE(&stfloat), 4u); + di.name = GetHashNameNodeExport("@floatBase0"); + di.qual = Q_CONST; + di.storageclass = TK_STATIC; + di.is_extern_c = 1; + di.section = SECT_CONST; + object = CParser_NewGlobalDataObject(&di); + object->nspace = cscope_root; + offset = 0; + } + + entry = galloc(sizeof(PoolEntry)); + entry->next = floatconstpool; + floatconstpool = entry; + entry->object = object; + entry->offset = offset; + entry->buffer = galloc(4u); + memcpy(entry->buffer, buffer, 4u); + } + + *pOffset = entry->offset; + return entry->object; + } + + CError_FATAL(1183); + return NULL; +} + +Object *CreateFloatConst(Type *type, Float *data, SInt32 *pOffset) { + *pOffset = 0; + return createfloatconstant(type, data); +} + +static void RewriteFloatConst(ENode *expr) { + Object *obj; + SInt32 n; + ENode *subexpr; + + obj = CreateFloatConst(expr->rtype, &expr->data.floatval, &n); + if (n) { + subexpr = makediadicnode(create_objectrefnode(obj), intconstnode(TYPE(&stunsignedlong), n), EADD); + } else { + subexpr = create_objectrefnode(obj); + } + + expr->type = EINDIRECT; + expr->cost = 1; + expr->flags |= Q_CONST; + expr->data.monadic = subexpr; +} + +static void RewriteVectorConst(ENode *expr) { + PoolEntry *entry; + Object *object; + SInt32 offset; + ENode *inner; + UInt8 data[16]; + + CMach_InitVectorMem(expr->rtype, expr->data.vector128val, data, 1); + + if (cparamblkptr->precompile == 1) + CError_Error(CErrorStr180); + + for (entry = vectorconstpool; entry; entry = entry->next) { + if (!memcmp(entry->buffer, data, 16)) + break; + } + + if (!entry) { + if (vectorconstpool) { + object = vectorconstpool->object; + offset = vectorconstpool->offset + 16; + vectorconstpool->object->type->size += 16; + } else { + DeclInfo di; + memclrw(&di, sizeof(di)); + di.thetype = CDecl_NewArrayType(TYPE(&stvectorsignedlong), 16); + di.name = GetHashNameNodeExport("@vectorBase0"); + di.qual = Q_CONST; + di.storageclass = TK_STATIC; + di.is_extern_c = 1; + di.section = SECT_CONST; + object = CParser_NewGlobalDataObject(&di); + object->nspace = cscope_root; + offset = 0; + } + + entry = galloc(sizeof(PoolEntry)); + entry->next = vectorconstpool; + vectorconstpool = entry; + entry->object = object; + entry->offset = offset; + entry->buffer = galloc(16); + memcpy(entry->buffer, data, 16); + } + + if (entry->offset) { + inner = makediadicnode( + create_objectrefnode(entry->object), + intconstnode(TYPE(&stunsignedlong), entry->offset), + EADD); + } else { + inner = create_objectrefnode(entry->object); + } + + expr->type = EINDIRECT; + expr->cost = 1; + expr->flags |= ENODE_FLAG_CONST; + expr->data.monadic = inner; +} + +static Object *createcodelabel(CLabel *label) { + CodeLabelList *list; + Object *obj; + + for (list = codelabellist; list; list = list->next) { + if (list->label == label) + return list->object; + } + + obj = galloc(sizeof(Object)); + memclrw(obj, sizeof(Object)); + obj->otype = OT_OBJECT; + obj->type = (Type *) &void_ptr; + obj->name = label->uniquename; + obj->toc = NULL; + obj->u.data.info = NULL; // not sure if this is the right union! + obj->sclass = TK_STATIC; + obj->qual = Q_CONST; + obj->datatype = DDATA; + obj->flags |= OBJECT_FLAGS_2 | OBJECT_DEFINED; + + list = galloc(sizeof(CodeLabelList)); + memclrw(list, sizeof(CodeLabelList)); + list->object = obj; + list->label = label; + list->next = codelabellist; + codelabellist = list; + + return obj; +} + +void dumpcodelabels(Object *func) { + CodeLabelList *list; + + for (list = codelabellist; list; list = list->next) + ObjGen_DeclareCodeLabel(list->object, list->label->pclabel->block->codeOffset, func); +} + +static void referenceexception(Object *obj) { + ObjectList *list; + + if (obj && obj->otype == OT_OBJECT && obj->datatype == DLOCAL) { + for (list = exceptionlist; list; list = list->next) { + if (list->object == obj) + return; + } + + list = lalloc(sizeof(ObjectList)); + memclrw(list, sizeof(ObjectList)); + list->object = obj; + list->next = exceptionlist; + exceptionlist = list; + } +} + +static ENodeType invert_relop(ENodeType nodetype) { + switch (nodetype) { + case ELESS: return EGREATEREQU; + case EGREATER: return ELESSEQU; + case ELESSEQU: return EGREATER; + case EGREATEREQU: return ELESS; + case EEQU: return ENOTEQU; + case ENOTEQU: return EEQU; + default: return nodetype; + } +} + +static ENode *COND_to_COMPARE(ENode *cond, ENode *expr1, ENode *expr2) { + SInt32 val1; + SInt32 val2; + SInt32 condval; + ENodeType invop; + + while (expr1->type == ETYPCON && TYPE_FITS_IN_REGISTER(expr1->rtype)) + expr1 = expr1->data.monadic; + while (expr2->type == ETYPCON && TYPE_FITS_IN_REGISTER(expr2->rtype)) + expr2 = expr2->data.monadic; + + if (expr1->type != EINTCONST || !TYPE_FITS_IN_REGISTER(expr1->rtype) || !CInt64_IsInRange(expr1->data.intval, 4)) + return NULL; + if (expr2->type != EINTCONST || !TYPE_FITS_IN_REGISTER(expr2->rtype) || !CInt64_IsInRange(expr2->data.intval, 4)) + return NULL; + + val1 = expr1->data.intval.lo; + val2 = expr2->data.intval.lo; + condval = 0; + switch (val1) { + case 1: + if (val2 != 0) + return NULL; + break; + case 0: + condval = 1; + if (val2 != 1) + return NULL; + break; + default: + return NULL; + } + + while (cond->type == ELOGNOT) { + condval = (condval + 1) & 1; + cond = cond->data.monadic; + } + + if (condval) { + invop = invert_relop(cond->type); + if (invop == cond->type) + return NULL; + cond->type = invop; + } + + return cond; +} + +static ENode *comparewithzero(ENode *expr) { + ENode *expr1; + ENode *expr2; + ENode *tmp; + + expr1 = lalloc(sizeof(ENode)); + memclrw(expr1, sizeof(ENode)); + expr2 = lalloc(sizeof(ENode)); + memclrw(expr2, sizeof(ENode)); + + while (expr->type == EFORCELOAD || expr->type == ETYPCON || expr->type == ECOMMA) { + if (!TYPE_FITS_IN_REGISTER(expr->rtype)) + break; + if (expr->type == ECOMMA) { + expr->data.diadic.right = comparewithzero(expr->data.diadic.right); + return expr; + } + expr = expr->data.monadic; + } + + if (expr->type == ECOND && TYPE_FITS_IN_REGISTER(expr->rtype)) { + tmp = COND_to_COMPARE(expr->data.cond.cond, expr->data.cond.expr1, expr->data.cond.expr2); + if (tmp) + expr = tmp; + } + + if (expr->type >= ELESS && expr->type <= ENOTEQU) + return expr; + + if (IS_TYPE_FLOAT(expr->rtype)) { + static Float float0 = {0.0}; + + expr2->type = EFLOATCONST; + expr2->cost = 0; + expr2->rtype = (expr->rtype->size == 4) ? (Type *) &stfloat : (Type *) &stdouble; + expr2->data.floatval = float0; + } else { + expr2->type = EINTCONST; + expr2->cost = 0; + if (TYPE_IS_8BYTES(expr->rtype)) + expr2->rtype = (Type *) &stsignedlonglong; + else + expr2->rtype = (Type *) &stsignedint; + expr2->data.intval.lo = 0; + expr2->data.intval.hi = 0; + } + + expr1->type = ENOTEQU; + expr1->cost = expr->cost; + expr1->rtype = (Type *) &stsignedint; + expr1->data.diadic.left = expr; + expr1->data.diadic.right = expr2; + return expr1; +} + +static void rewritefunctioncallreturningstruct(ENode *expr) { + ENode *ret_expr; + ENode *copy; + + ret_expr = expr->data.funccall.args->node; + + copy = lalloc(sizeof(ENode)); + memclrw(copy, sizeof(ENode)); + + *copy = *expr; + expr->type = ECOMMA; + expr->data.diadic.left = copy; + expr->data.diadic.right = ret_expr; +} + +static void rewritestrcpy(ENode *expr) { + ENode *int_expr; + ENodeList *list; + + int_expr = lalloc(sizeof(ENode)); + memclrw(int_expr, sizeof(ENode)); + int_expr->type = EINTCONST; + int_expr->cost = 0; + int_expr->flags = 0; + int_expr->rtype = (Type *) &stunsignedlong; + CInt64_SetLong(&int_expr->data.intval, expr->data.funccall.args->next->node->data.string.size); + + list = lalloc(sizeof(ENodeList)); + memclrw(list, sizeof(ENodeList)); + list->next = NULL; + list->node = int_expr; + expr->data.funccall.args->next->next = list; + expr->data.funccall.funcref->data.objref = __memcpy_object; +} + +static SInt32 magnitude(Type *type) { + if (IS_TYPE_FLOAT(type)) + return type->size * 4; + else if (is_unsigned(type)) + return (type->size * 2) + 1; + else + return type->size * 2; +} + +static Type *promote_type(Type *type) { + if (IS_TYPE_ENUM(type)) + type = TYPE_ENUM(type)->enumtype; + if (TYPE_INTEGRAL(type)->integral > stsignedint.integral) + return type; + else + return (Type *) &stsignedint; +} + +static Type *common_type(Type *type1, Type *type2) { + Type *tmp; + + if (IS_TYPE_FLOAT(type1) || IS_TYPE_FLOAT(type2)) { + if (TYPE_INTEGRAL(type1)->integral > TYPE_INTEGRAL(type2)->integral) + return type1; + else + return type2; + } + + type1 = promote_type(type1); + type2 = promote_type(type2); + if (type1 != type2) { + if (TYPE_INTEGRAL(type1)->integral < TYPE_INTEGRAL(type2)->integral) { + tmp = type1; + type1 = type2; + type2 = tmp; + } + + if (type1->size == type2->size && !is_unsigned(type1) && is_unsigned(type2)) { + if (type1 == (Type *) &stsignedlong) { + type1 = (Type *) &stunsignedlong; + } else { + CError_ASSERT(1789, type1 == (Type *) &stsignedlonglong); + type1 = (Type *) &stunsignedlonglong; + } + } + } + + return type1; +} + +static void rewrite_opassign(ENode *expr, ENodeType exprtype) { + ENode *left_sub; + ENode *right; + Type *left_type; + Type *right_type; + ENode *new_expr; + ENode *tmp; + Type *commontype; + Type *promo_left; + Type *promo_right; + + left_sub = expr->data.diadic.left->data.monadic; + right = expr->data.diadic.right; + left_type = expr->data.diadic.left->rtype; + right_type = expr->data.diadic.right->rtype; + + new_expr = lalloc(sizeof(ENode)); + memclrw(new_expr, sizeof(ENode)); + new_expr->type = exprtype; + new_expr->rtype = left_type; + new_expr->data.diadic.left = expr->data.diadic.left; + new_expr->data.diadic.right = right; + + expr->type = EASS; + expr->data.diadic.left = left_sub; + expr->data.diadic.right = new_expr; + + if (left_sub->type != EOBJREF) { + ENode *define; + ENode *reuse; + + define = lalloc(sizeof(ENode)); + memclrw(define, sizeof(ENode)); + define->type = EDEFINE; + define->rtype = left_type; + + reuse = lalloc(sizeof(ENode)); + memclrw(reuse, sizeof(ENode)); + reuse->type = EREUSE; + reuse->rtype = left_type; + reuse->data.monadic = define; + + if (left_sub->type != EBITFIELD) { + define->data.monadic = expr->data.diadic.left; + expr->data.diadic.left = define; + new_expr->data.diadic.left->data.diadic.left = reuse; + } else { + ENode *copy; + define->data.monadic = left_sub->data.diadic.left; + left_sub->data.diadic.left = define; + + copy = lalloc(sizeof(ENode)); + *copy = *left_sub; + copy->data.diadic.left = reuse; + new_expr->data.diadic.left->data.diadic.left = copy; + } + } + + switch (exprtype) { + case EADD: + case ESUB: + if (IS_TYPE_POINTER(left_type)) + break; + if (right->type == EINTCONST && TYPE_FITS_IN_REGISTER(left_type)) + break; + case EAND: + case EXOR: + case EOR: + if (left_type == right_type) + break; + case EMUL: + case EDIV: + case EMODULO: + commontype = common_type(left_type, right_type); + if (left_type != commontype) { + tmp = lalloc(sizeof(ENode)); + memclrw(tmp, sizeof(ENode)); + tmp->type = ETYPCON; + tmp->rtype = left_type; + tmp->data.monadic = expr->data.diadic.right; + expr->data.diadic.right = tmp; + + tmp = lalloc(sizeof(ENode)); + memclrw(tmp, sizeof(ENode)); + tmp->type = ETYPCON; + tmp->rtype = commontype; + tmp->data.monadic = new_expr->data.diadic.left; + new_expr->data.diadic.left = tmp; + } + if (right_type != commontype) { + tmp = lalloc(sizeof(ENode)); + memclrw(tmp, sizeof(ENode)); + tmp->type = ETYPCON; + tmp->rtype = commontype; + tmp->data.monadic = new_expr->data.diadic.right; + new_expr->data.diadic.right = tmp; + } + new_expr->rtype = commontype; + break; + + case ESHL: + case ESHR: + promo_left = promote_type(left_type); + promo_right = promote_type(right_type); + if (left_type != promo_left) { + tmp = lalloc(sizeof(ENode)); + memclrw(tmp, sizeof(ENode)); + tmp->type = ETYPCON; + tmp->rtype = left_type; + tmp->data.monadic = expr->data.diadic.right; + expr->data.diadic.right = tmp; + + tmp = lalloc(sizeof(ENode)); + memclrw(tmp, sizeof(ENode)); + tmp->type = ETYPCON; + tmp->rtype = promo_left; + tmp->data.monadic = new_expr->data.diadic.left; + new_expr->data.diadic.left = tmp; + } + if (right_type != promo_right) { + if (new_expr->data.diadic.right->type == EINTCONST && promo_right == (Type *) &stsignedint) { + new_expr->data.diadic.right->rtype = (Type *) &stsignedint; + } else { + tmp = lalloc(sizeof(ENode)); + memclrw(tmp, sizeof(ENode)); + tmp->type = ETYPCON; + tmp->rtype = promo_right; + tmp->data.monadic = new_expr->data.diadic.right; + new_expr->data.diadic.right = tmp; + } + } + new_expr->rtype = promo_left; + break; + } +} + +static void rewrite_preincdec(ENode *expr) { + ENode *subexpr; // r31 + Type *type; // r28 + ENode *new_expr; // r29 + + subexpr = expr->data.monadic; + type = expr->rtype; + + new_expr = lalloc(sizeof(ENode)); + memclrw(new_expr, sizeof(ENode)); + + if (IS_TYPE_FLOAT(type)) { + new_expr->type = EFLOATCONST; + new_expr->cost = 0; + new_expr->rtype = type; + new_expr->data.floatval = one_point_zero; + } else if (IS_TYPE_POINTER(type)) { + new_expr->type = EINTCONST; + new_expr->cost = 0; + new_expr->rtype = (Type *) &stunsignedlong; + new_expr->data.intval.hi = 0; + new_expr->data.intval.lo = TYPE_POINTER(type)->target->size; + } else { + new_expr->type = EINTCONST; + new_expr->cost = 0; + new_expr->rtype = type; + new_expr->data.intval.hi = 0; + new_expr->data.intval.lo = 1; + } + + expr->type = (expr->type == EPREDEC) ? ESUBASS : EADDASS; + expr->data.diadic.left = subexpr; + expr->data.diadic.right = new_expr; +} + +// Don't know what this would be called in the original, but weh +typedef union signed_vec { + SInt8 sc[16]; + SInt16 ss[8]; + SInt32 sl[4]; +} signed_vec; + +Boolean canoptimizevectorconst(MWVector128 *vecp, Type *type, COVCResult *result) { + // this function is very broken + signed_vec vec; + union { SInt32 lg; SInt8 ch[4]; } conv32; + union { SInt16 sh; SInt8 ch[2]; } conv16; + char flag; + SInt8 first8; + SInt16 first16; + SInt32 first32; + int i; + char ci; + UInt32 l0, l1, l2, l3; + + if (IS_TYPE_VECTOR(type)) { + vec = *((signed_vec *) vecp); + + first8 = vec.sc[0]; + flag = 1; + i = 1; + while (flag && i < 16) + flag = first8 == vec.sc[i++]; + /*flag = 1; + for (i = 1; flag && i < 16; i++) { + flag = first8 == vec.sc[i]; + }*/ + + if (flag && first8 < 16 && first8 > -17) { + if (result) { + result->op1 = PC_VSPLTISB; + result->op2 = -1; + result->arg = first8; + } + return 1; + } + + first16 = vec.ss[0]; + flag = 1; + for (i = 1; flag && i < 8; i++) { + flag = vec.ss[i] == first16; + } + + conv16.sh = first16; + if (flag && conv16.ch[0] == 0 && conv16.ch[1] < 16 && conv16.ch[1] >= 0) { + if (result) { + result->op1 = PC_VSPLTISH; + result->op2 = -1; + result->arg = conv16.ch[1]; + } + return 1; + } + + if (flag && conv16.ch[0] == -1 && (conv16.ch[1] & 0xF0) == 0xF0) { + if (result) { + result->op1 = PC_VSPLTISH; + result->op2 = -1; + result->arg = conv16.ch[1]; + } + return 1; + } + + first32 = vec.sl[0]; + flag = 1; + for (i = 1; flag && i < 4; i++) { + flag = vec.sl[i] == first32; + } + + conv32.lg = first32; + if (flag && conv32.ch[0] == 0 && conv32.ch[1] == 0 && conv32.ch[2] == 0 && conv32.ch[3] < 16 && conv32.ch[3] >= 0) { + if (result) { + result->op1 = PC_VSPLTISW; + result->op2 = -1; + result->arg = conv32.ch[3]; + } + return 1; + } + + if (flag && conv32.ch[0] == -1 && conv32.ch[1] == -1 && conv32.ch[2] == -1 && (conv32.ch[3] & 0xF0) == 0xF0) { + if (result) { + result->op1 = PC_VSPLTISW; + result->op2 = -1; + result->arg = conv32.ch[3]; + } + return 1; + } + + l0 = vec.sl[0]; + l1 = vec.sl[1]; + l2 = vec.sl[2]; + l3 = vec.sl[3]; + for (ci = 0; ci < 16; ci++) { + UInt32 *l; + UInt32 *r; + + l = (UInt32 *) lvslBytes[(char) ci]; + r = (UInt32 *) lvsrBytes[(char) ci]; + if (l0 == l[0] && l1 == l[1] && l2 == l[2] && l3 == l[3]) { + if (result) { + result->op1 = -1; + result->op2 = PC_LVSL; + result->arg = ci; + } + return 1; + } + if (l0 == r[0] && l1 == r[1] && l2 == r[2] && l3 == r[3]) { + if (result) { + result->op1 = -1; + result->op2 = PC_LVSR; + result->arg = ci; + } + return 1; + } + } + } + + return 0; +} + +static SInt32 countindirects(ENode *expr) { + SInt32 tmp1; + SInt32 tmp2; + + switch (expr->type) { + case EINTCONST: + case EFLOATCONST: + case ESTRINGCONST: + case EOBJREF: + case EVECTOR128CONST: + return 0; + case ECOND: + if (expr->data.cond.cond->hascall || expr->data.cond.expr1->hascall || expr->data.cond.expr2->hascall) + return 2; + + if ((tmp1 = countindirects(expr->data.cond.cond)) >= 2) + return 2; + if ((tmp2 = countindirects(expr->data.cond.expr1)) >= 2) + return 2; + if (tmp2 > tmp1) + tmp1 = tmp2; + if ((tmp2 = countindirects(expr->data.cond.expr2)) >= 2) + return 2; + if (tmp2 > tmp1) + tmp1 = tmp2; + return tmp1; + case EFUNCCALL: + case EFUNCCALLP: + return 2; + case EMUL: + case EMULV: + case EDIV: + case EMODULO: + case EADDV: + case ESUBV: + case EADD: + case ESUB: + case ESHL: + case ESHR: + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + case EEQU: + case ENOTEQU: + case EAND: + case EXOR: + case EOR: + case ELAND: + case ELOR: + case EASS: + case EMULASS: + case EDIVASS: + case EMODASS: + case EADDASS: + case ESUBASS: + case ESHLASS: + case ESHRASS: + case EANDASS: + case EXORASS: + case EORASS: + case ECOMMA: + case EPMODULO: + case EROTL: + case EROTR: + case EBCLR: + case EBTST: + case EBSET: + if ((tmp1 = countindirects(expr->data.diadic.left)) >= 2) + return 2; + if ((tmp2 = countindirects(expr->data.diadic.right)) >= 2) + return 2; + if (tmp2 > tmp1) + tmp1 = tmp2; + return tmp1; + case EPOSTINC: + case EPOSTDEC: + case EPREINC: + case EPREDEC: + case EINDIRECT: + case EMONMIN: + case EBINNOT: + case ELOGNOT: + case EFORCELOAD: + case ETYPCON: + case EBITFIELD: + if (expr->type == EINDIRECT) + return countindirects(expr->data.monadic) + 1; + else + return countindirects(expr->data.monadic); + default: + return 2; + } +} + +static Boolean DetectCondSideAffect(ENode *expr) { + switch (expr->type) { + case EMUL: + case EMULV: + case EDIV: + case EMODULO: + case EADDV: + case ESUBV: + case EADD: + case ESUB: + case ESHL: + case ESHR: + case EAND: + case EXOR: + case EOR: + case ECOMMA: + case EPMODULO: + case EROTL: + case EROTR: + case EBCLR: + case EBTST: + case EBSET: + if (DetectCondSideAffect(expr->data.diadic.left)) + return 1; + return DetectCondSideAffect(expr->data.diadic.right); + case EINDIRECT: + if (expr->data.monadic->type == EINDIRECT) + return 1; + if (expr->data.monadic->type == EOBJREF) { + if (expr->data.monadic->data.objref->datatype != DLOCAL && expr->data.monadic->data.objref->datatype != DDATA) + return 1; + if (IS_TYPE_POINTER(expr->data.monadic->data.objref->type)) + return 1; + return Registers_GetVarInfo(expr->data.monadic->data.objref)->noregister != 0; + } + return 1; + case EMONMIN: + case EBINNOT: + case ELOGNOT: + case EFORCELOAD: + case ETYPCON: + case EBITFIELD: + return DetectCondSideAffect(expr->data.monadic); + case EINTCONST: + case EFLOATCONST: + case ESTRINGCONST: + case EOBJREF: + case EVECTOR128CONST: + return 0; + case EPOSTINC: + case EPOSTDEC: + case EPREINC: + case EPREDEC: + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + case EEQU: + case ENOTEQU: + case ELAND: + case ELOR: + case EASS: + case EMULASS: + case EDIVASS: + case EMODASS: + case EADDASS: + case ESUBASS: + case ESHLASS: + case ESHRASS: + case EANDASS: + case EXORASS: + case EORASS: + case ECOND: + case EFUNCCALL: + case EFUNCCALLP: + case EMFPOINTER: + case ENULLCHECK: + case EPRECOMP: + case EDEFINE: + case EREUSE: + case EASSBLK: + case ECONDASS: + return 1; + default: + CError_FATAL(2523); + return 1; + } +} + +static UInt8 WeightandSumOps(ENode *expr) { + UInt32 score; + + switch (expr->type) { + case ECOND: + case ECONDASS: + score = WeightandSumOps(expr->data.cond.cond); + score += WeightandSumOps(expr->data.cond.expr1); + score += WeightandSumOps(expr->data.cond.expr2); + break; + case EMUL: + case EMULV: + case EMULASS: + score = WeightandSumOps(expr->data.diadic.left) + 10; + score += WeightandSumOps(expr->data.diadic.right); + break; + case EDIV: + case EMODULO: + case EDIVASS: + case EMODASS: + score = WeightandSumOps(expr->data.diadic.left) + 20; + score += WeightandSumOps(expr->data.diadic.right); + break; + case EADDV: + case ESUBV: + case EADD: + case ESUB: + case ESHL: + case ESHR: + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + case EEQU: + case ENOTEQU: + case EAND: + case EXOR: + case EOR: + case ELAND: + case ELOR: + case EASS: + case EADDASS: + case ESUBASS: + case ESHLASS: + case ESHRASS: + case EANDASS: + case EXORASS: + case EORASS: + case ECOMMA: + case EPMODULO: + case EROTL: + case EROTR: + case EBCLR: + case EBTST: + case EBSET: + score = WeightandSumOps(expr->data.diadic.left) + 1; + score += WeightandSumOps(expr->data.diadic.right); + break; + case EPOSTINC: + case EPOSTDEC: + case EPREINC: + case EPREDEC: + case EMONMIN: + case EBINNOT: + case ELOGNOT: + score = WeightandSumOps(expr->data.monadic) + 1; + break; + case EINDIRECT: + if (expr->data.monadic->type == EOBJREF && expr->data.monadic->data.objref->datatype == DLOCAL) + if (!Registers_GetVarInfo(expr->data.monadic->data.objref)->noregister) + return 0; + case EFORCELOAD: + case ETYPCON: + case EBITFIELD: + score = WeightandSumOps(expr->data.monadic); + break; + case EOBJREF: + score = 0; + break; + case EINTCONST: + case EFLOATCONST: + case ESTRINGCONST: + case EVECTOR128CONST: + score = 0; + break; + case EFUNCCALL: + case EFUNCCALLP: + score = 5; + break; + default: + score = 255; + } + + if (score >= 255) + score = 255; + return (UInt8) score; +} + +Boolean TOC_use_fsel(ENode *expr) { + ENode *left; + ENode *right; + Type *rtype; + int score1; + int score2; + + left = expr->data.cond.expr1; + right = expr->data.cond.expr2; + rtype = expr->rtype; + + if (!copts.peephole) return 0; + if (!copts.gen_fsel) return 0; + if (left->hascall) return 0; + if (right->hascall) return 0; + if (!IS_TYPE_FLOAT(rtype)) return 0; + if (!IS_TYPE_FLOAT(left->rtype)) return 0; + if (!IS_TYPE_FLOAT(right->rtype)) return 0; + + if (expr->data.cond.cond->type < ELESS || expr->data.cond.cond->type > ENOTEQU) + return 0; + if (!IS_TYPE_FLOAT(expr->data.cond.cond->data.diadic.right->rtype)) + return 0; + if (expr->data.cond.cond->type == ELOGNOT || expr->data.cond.cond->type == ELAND || expr->data.cond.cond->type == ELOR) + return 0; + + if (expr->type == ECONDASS) { + if (left->type != EINDIRECT) + return 0; + if (left->data.monadic->type != EOBJREF) + return 0; + } + + if (DetectCondSideAffect(left)) + return 0; + if (DetectCondSideAffect(right)) + return 0; + + if (expr->type == ECONDASS) + score1 = 1; + else + score1 = WeightandSumOps(left); + score2 = WeightandSumOps(right); + + if (score1 > copts.gen_fsel) + return 0; + else if (score2 > copts.gen_fsel) + return 0; + else + return 1; +} + +Boolean TOC_use_isel(ENode *expr, Boolean flag) { + int opt; + ENode *left; + ENode *right; + Type *rtype; + Object *obj; + int score1; + int score2; + + left = expr->data.cond.expr1; + right = expr->data.cond.expr2; + rtype = expr->rtype; + if (flag) + opt = 10; + else + opt = copts.gen_isel; + + if (!opt) return 0; + if (!copts.peephole) return 0; + if (left->hascall) return 0; + if (right->hascall) return 0; + if (!TYPE_FITS_IN_REGISTER(rtype)) return 0; + if (!TYPE_FITS_IN_REGISTER(left->rtype)) return 0; + if (!TYPE_FITS_IN_REGISTER(right->rtype)) return 0; + + if (expr->data.cond.cond->type < ELESS || expr->data.cond.cond->type > ENOTEQU) + return 0; + if (TYPE_IS_8BYTES(rtype)) + return 0; + + if (flag) { + if (!TYPE_FITS_IN_REGISTER(expr->data.cond.cond->data.diadic.right->rtype)) + return 0; + if (TYPE_IS_8BYTES(expr->data.cond.cond->data.diadic.right->rtype)) + return 0; + } + + if (expr->type == ECONDASS) { + if (left->type != EINDIRECT) + return 0; + if (left->data.monadic->type != EOBJREF) + return 0; + if (flag) { + obj = left->data.monadic->data.objref; + if (obj->datatype != DLOCAL) + return 0; + if ((Registers_GetVarInfo(obj) ? Registers_GetVarInfo(obj)->reg : 0) == 0) + return 0; + if (obj->u.var.info->rclass != RegClass_GPR) + return 0; + } + } + + if (DetectCondSideAffect(left)) + return 0; + if (DetectCondSideAffect(right)) + return 0; + + if (expr->type == ECONDASS) + score1 = 1; + else + score1 = WeightandSumOps(left); + score2 = WeightandSumOps(right); + + if (score1 > opt) + return 0; + else if (score2 > opt) + return 0; + else + return 1; +} + +SInt32 GetSizeSkip(ENode *expr) { + if (expr->type == EASS) + expr = expr->data.diadic.right; + if (expr->type == ETYPCON && expr->data.monadic->rtype->size < expr->rtype->size) + return expr->data.monadic->rtype->size; + else + return expr->rtype->size; +} + +void Optimize64bitMath(ENode *expr) { + ENode *left; // r23 + ENode *right; // r28 + SInt32 leftsize; // r24 + SInt32 rightsize; // r25 + SInt32 totalsize; // r22 + int unsignedflag; // r4 + + CError_ASSERT(2886, TYPE_IS_8BYTES(expr->rtype)); + + left = expr->data.diadic.left; + right = expr->data.diadic.right; + leftsize = GetSizeSkip(left); + totalsize = (leftsize + (rightsize = GetSizeSkip(right))); + unsignedflag = is_unsigned(expr->rtype) != 0; + + switch (totalsize) { + case 2: + case 3: + case 4: + if (unsignedflag) { + left->rtype = (Type *) &stunsignedint; + right->rtype = (Type *) &stunsignedint; + } else { + left->rtype = (Type *) &stsignedint; + right->rtype = (Type *) &stsignedint; + } + break; + case 5: + case 6: + case 8: + case 9: + case 10: + case 12: + if (expr->type != ESUB || leftsize >= rightsize) { + if (leftsize < 4) { + if (unsignedflag) + left->rtype = (Type *) &stunsignedint; + else + left->rtype = (Type *) &stsignedint; + } else { + if (left->type == ETYPCON && left->data.monadic->rtype != (Type *) &stfloat) + expr->data.diadic.left = left->data.monadic; + } + if (rightsize < 4) { + if (unsignedflag) + right->rtype = (Type *) &stunsignedint; + else + right->rtype = (Type *) &stsignedint; + } else { + if (right->type == ETYPCON && right->data.monadic->rtype != (Type *) &stfloat) + expr->data.diadic.right = right->data.monadic; + } + } + break; + case 16: + break; + default: + CError_FATAL(2975); + } +} + +static Boolean OptimizeNestedAssginments(ENode **pexpr, Object *check) { + ENode *expr; + Boolean success1; + Boolean success2; + + expr = *pexpr; + switch (expr->type) { + case EOBJREF: + return check != expr->data.objref; + case EMUL: + case EMULV: + case EDIV: + case EMODULO: + case EADDV: + case ESUBV: + case EADD: + case ESUB: + case ESHL: + case ESHR: + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + case EEQU: + case ENOTEQU: + case EAND: + case EXOR: + case EOR: + case ELAND: + case ELOR: + case EASS: + case EMULASS: + case EDIVASS: + case EMODASS: + case EADDASS: + case ESUBASS: + case ESHLASS: + case ESHRASS: + case EANDASS: + case EXORASS: + case EORASS: + case ECOMMA: + case EPMODULO: + case EROTL: + case EROTR: + case EBCLR: + case EBTST: + case EBSET: + switch (expr->type) { + case EASS: + if (ENODE_IS(expr->data.diadic.left, EOBJREF) && expr->data.diadic.left->data.objref == check) { + *pexpr = expr->data.diadic.right; + return OptimizeNestedAssginments(pexpr, check); + } + break; + case EMULASS: + case EDIVASS: + case EMODASS: + case EADDASS: + case ESUBASS: + case ESHLASS: + case ESHRASS: + case EANDASS: + case EXORASS: + case EORASS: + CError_FATAL(3033); + return 0; + } + if (OptimizeNestedAssginments(&expr->data.diadic.right, check)) + return OptimizeNestedAssginments(&expr->data.diadic.left, check); + else + return 0; + break; + case EPOSTINC: + case EPOSTDEC: + case EPREINC: + case EPREDEC: + case EINDIRECT: + case EMONMIN: + case EBINNOT: + case ELOGNOT: + case EFORCELOAD: + case ETYPCON: + case EBITFIELD: + return OptimizeNestedAssginments(&expr->data.monadic, check); + case EINTCONST: + case EFLOATCONST: + case ESTRINGCONST: + case EVECTOR128CONST: + return 1; + case ECOND: + success2 = OptimizeNestedAssginments(&expr->data.cond.expr2, check); + success1 = OptimizeNestedAssginments(&expr->data.cond.expr1, check); + if (!success2 || !success1) + return 0; + return OptimizeNestedAssginments(&expr->data.cond.cond, check) == 0; + case ECONDASS: + if (!OptimizeNestedAssginments(&expr->data.cond.expr2, check)) + return 0; + if (OptimizeNestedAssginments(&expr->data.cond.cond, check)) + return OptimizeNestedAssginments(&expr->data.cond.expr1, check) == 0; + else + return 0; + case EFUNCCALL: + case EFUNCCALLP: + case EMFPOINTER: + case ENULLCHECK: + case EPRECOMP: + case EDEFINE: + case EREUSE: + case EASSBLK: + return 0; + default: + CError_FATAL(3083); + return 0; + } +} + +static void expandTOCexpression(ENode *expr, Type *type, int ignored) { + Object *obj; + Object *tmpobj; + ENode *cond; + ENode *tmpexpr; + ENode *newexpr; + ENodeList *list; + + expr->ignored = ignored; + switch (expr->type) { + case EINTCONST: + expr->hascall = 0; + break; + case EFLOATCONST: + uses_globals = 1; + RewriteFloatConst(expr); + expandTOCexpression(expr, NULL, 0); + break; + case EVECTOR128CONST: + if (!canoptimizevectorconst(&expr->data.vector128val, expr->rtype, NULL)) { + uses_globals = 1; + RewriteVectorConst(expr); + expandTOCexpression(expr, NULL, 0); + } + break; + case ESTRINGCONST: + uses_globals = 1; + CInit_RewriteString(expr, 1); + expandTOCexpression(expr, NULL, 0); + break; + case EOBJREF: + obj = expr->data.objref; + CError_ASSERT(3203, obj->datatype != DALIAS); + if (obj->datatype == DFUNC || obj->datatype == DVFUNC) + uses_globals = 1; + if (obj->datatype == DDATA) { + uses_globals = 1; + if (createIndirect(obj, 0, 1)) { + tmpexpr = lalloc(sizeof(ENode)); + memclrw(tmpexpr, sizeof(ENode)); + tmpexpr->type = EOBJREF; + tmpexpr->cost = 0; + tmpexpr->data.objref = obj->toc; + tmpexpr->rtype = CDecl_NewPointerType(expr->rtype); + + expr->type = EINDIRECT; + expr->cost = 1; + expr->data.monadic = tmpexpr; + } + } + expr->hascall = 0; + break; + case ECONDASS: + expr->ignored = 0; + case ECOND: + if (!ENODE_IS_RANGE(expr->data.cond.cond, ELESS, ENOTEQU)) + expr->data.cond.cond = comparewithzero(expr->data.cond.cond); + expandTOCexpression(expr->data.cond.expr1, NULL, ignored); + expandTOCexpression(expr->data.cond.expr2, NULL, ignored); + if (TOC_use_fsel(expr)) { + cond = expr->data.cond.cond; + if (ENODE_IS(cond->data.diadic.right, EFLOATCONST) && CMach_FloatIsZero(cond->data.diadic.right->data.floatval)) { + expandTOCexpression(cond->data.diadic.left, NULL, 0); + } else if (ENODE_IS(cond->data.diadic.left, EFLOATCONST) && CMach_FloatIsZero(cond->data.diadic.left->data.floatval)) { + expandTOCexpression(cond->data.diadic.right, NULL, 0); + } else { + expandTOCexpression(expr->data.cond.cond, NULL, 0); + } + } else { + expandTOCexpression(expr->data.cond.cond, NULL, 0); + } + expr->hascall = expr->data.cond.cond->hascall | expr->data.cond.expr1->hascall | expr->data.cond.expr2->hascall; + break; + case EFUNCCALL: + case EFUNCCALLP: + if (is_intrinsic_function_call(expr)) { + expr->hascall = 0; + if ((expr->data.funccall.funcref->data.objref->u.func.u.intrinsicid & 0xFFFFu) == Intrinsic_008) { + if (copts.altivec_model) + update_frame_align(16); + dynamic_stack = 1; + requires_frame = 1; + } else if ((expr->data.funccall.funcref->data.objref->u.func.u.intrinsicid & 0xFFFFu) == Intrinsic_035) { + if (expr->data.funccall.args->next->node->type == ESTRINGCONST) { + rewritestrcpy(expr); + } else { + requires_frame = 1; + makes_call = 1; + expr->hascall = 1; + } + } else if ((expr->data.funccall.funcref->data.objref->u.func.u.intrinsicid & 0xFFFFu) == Intrinsic_036) { + if (expr->data.funccall.args->next->next->node->type != EINTCONST) { + requires_frame = 1; + makes_call = 1; + expr->hascall = 1; + } + } + } else { + requires_frame = 1; + makes_call = 1; + expr->hascall = 1; + } + + if (disables_optimizer(expr)) { + disable_optimizer |= 1; + if (copts.disable_registers) + disable_optimizer |= 2; + } + + if (ENODE_IS(expr->data.funccall.funcref, EINDIRECT) && IS_TYPE_FUNC(expr->data.funccall.funcref->rtype)) + *expr->data.funccall.funcref = *expr->data.funccall.funcref->data.monadic; + + if (ENODE_IS(expr->data.funccall.funcref, EOBJREF)) { + expr->data.funccall.funcref->hascall = 0; + if (expr->data.funccall.funcref->data.objref->datatype == DVFUNC && (expr->data.funccall.funcref->flags & ENODE_FLAG_80)) { + tmpobj = galloc(sizeof(Object)); + *tmpobj = *expr->data.funccall.funcref->data.objref; + tmpobj->datatype = DFUNC; + expr->data.funccall.funcref->data.objref = tmpobj; + } + } else { + expandTOCexpression(expr->data.funccall.funcref, NULL, 0); + } + + for (list = expr->data.funccall.args; list; list = list->next) + expandTOCexpression(list->node, NULL, 0); + + if (expr->hascall) + estimate_func_param_size(expr); + + if (is_intrinsic_function_call(expr)) { + for (list = expr->data.funccall.args; list; list = list->next) + expr->hascall |= list->node->hascall; + } + + if (CMach_PassResultInHiddenArg(TYPE_FUNC(expr->data.funccall.functype)->functype)) + rewritefunctioncallreturningstruct(expr); + + break; + case ECOMMA: + expandTOCexpression(expr->data.diadic.left, NULL, 1); + expandTOCexpression(expr->data.diadic.right, NULL, ignored); + expr->hascall = expr->data.diadic.left->hascall | expr->data.diadic.right->hascall; + break; + case ELAND: + case ELOR: + if (!ENODE_IS(expr->data.diadic.left, ELOGNOT) && !ENODE_IS2(expr->data.diadic.left, ELAND, ELOR) && !ENODE_IS_RANGE(expr->data.diadic.left, ELESS, ENOTEQU)) + expr->data.diadic.left = comparewithzero(expr->data.diadic.left); + if (!ENODE_IS(expr->data.diadic.right, ELOGNOT) && !ENODE_IS2(expr->data.diadic.right, ELAND, ELOR) && !ENODE_IS_RANGE(expr->data.diadic.right, ELESS, ENOTEQU)) + expr->data.diadic.right = comparewithzero(expr->data.diadic.right); + expandTOCexpression(expr->data.diadic.left, NULL, 0); + expandTOCexpression(expr->data.diadic.right, NULL, 0); + expr->hascall = expr->data.diadic.left->hascall | expr->data.diadic.right->hascall; + break; + case EDIVASS: + if (!IS_TYPE_FLOAT(expr->rtype) && IS_TYPE_FLOAT(expr->data.diadic.right->rtype)) + uses_globals = 1; + rewrite_opassign(expr, EDIV); + goto opassign_common; + case EMULASS: + if (!IS_TYPE_FLOAT(expr->rtype) && IS_TYPE_FLOAT(expr->data.diadic.right->rtype)) + uses_globals = 1; + rewrite_opassign(expr, EMUL); + goto opassign_common; + case EADDASS: + if (!IS_TYPE_FLOAT(expr->rtype) && IS_TYPE_FLOAT(expr->data.diadic.right->rtype)) + uses_globals = 1; + rewrite_opassign(expr, EADD); + goto opassign_common; + case ESUBASS: + if (!IS_TYPE_FLOAT(expr->rtype) && IS_TYPE_FLOAT(expr->data.diadic.right->rtype)) + uses_globals = 1; + rewrite_opassign(expr, ESUB); + goto opassign_common; + case EMODASS: + if (!IS_TYPE_FLOAT(expr->rtype) && IS_TYPE_FLOAT(expr->data.diadic.right->rtype)) + uses_globals = 1; + rewrite_opassign(expr, EMODULO); + goto opassign_common; + case ESHLASS: + rewrite_opassign(expr, ESHL); + goto opassign_common; + case ESHRASS: + rewrite_opassign(expr, ESHR); + goto opassign_common; + case EANDASS: + rewrite_opassign(expr, EAND); + goto opassign_common; + case EXORASS: + rewrite_opassign(expr, EXOR); + goto opassign_common; + case EORASS: + rewrite_opassign(expr, EOR); + goto opassign_common; + case EASS: + if (ENODE_IS(expr->data.diadic.left, EINDIRECT)) + expr->data.diadic.left = expr->data.diadic.left->data.monadic; + opassign_common: + expandTOCexpression(expr->data.diadic.left, NULL, 0); + expandTOCexpression(expr->data.diadic.right, NULL, 0); + expr->hascall = expr->data.diadic.left->hascall | expr->data.diadic.right->hascall; + break; + case EEQU: + case ENOTEQU: + if (ENODE_IS(expr->data.diadic.right, EINTCONST) && expr->data.diadic.right->data.intval.lo == 0 && expr->data.diadic.right->data.intval.hi == 0) { + for (tmpexpr = expr->data.diadic.left; ENODE_IS2(tmpexpr, EFORCELOAD, ETYPCON); tmpexpr = tmpexpr->data.monadic) { + if (!TYPE_FITS_IN_REGISTER(tmpexpr->rtype)) + break; + } + if (ENODE_IS(tmpexpr, ELOGNOT) && TYPE_FITS_IN_REGISTER(tmpexpr->data.monadic->rtype)) { + if (ENODE_IS(expr, EEQU)) + expr->type = ENOTEQU; + else + expr->type = EEQU; + expr->data.diadic.left = tmpexpr->data.monadic; + expandTOCexpression(expr, NULL, 0); + break; + } + if (ENODE_IS(tmpexpr, EEQU)) { + if (ENODE_IS(expr, EEQU)) + tmpexpr->type = ENOTEQU; + *expr = *tmpexpr; + expandTOCexpression(expr, NULL, 0); + break; + } + if (ENODE_IS(tmpexpr, ENOTEQU)) { + if (ENODE_IS(expr, EEQU)) + tmpexpr->type = EEQU; + *expr = *tmpexpr; + expandTOCexpression(expr, NULL, 0); + break; + } + if (ENODE_IS(tmpexpr, ECOND)) { + newexpr = COND_to_COMPARE(tmpexpr->data.cond.cond, tmpexpr->data.cond.expr1, tmpexpr->data.cond.expr2); + if (newexpr) { + *tmpexpr = *newexpr; + expandTOCexpression(expr, NULL, 0); + break; + } + } + } + case EDIV: + if (ENODE_IS(expr, EDIV) && ENODE_IS(expr->data.diadic.right, EFLOATCONST) && CMach_FloatIsPowerOf2(expr->data.diadic.right->data.floatval)) { + expr->type = EMUL; + expr->data.diadic.right->data.floatval = CMach_FloatReciprocal(expr->data.diadic.right->data.floatval); + } + case EMODULO: + case ESHL: + case ESHR: + case ELESS: + case EGREATER: + case ELESSEQU: + case EGREATEREQU: + expandTOCexpression(expr->data.diadic.left, NULL, 0); + expandTOCexpression(expr->data.diadic.right, NULL, 0); + expr->hascall = expr->data.diadic.left->hascall | expr->data.diadic.right->hascall; + if (TYPE_IS_8BYTES(expr->rtype)) { + if (ENODE_IS2(expr, ESHL, ESHR) && !ENODE_IS(expr->data.diadic.right, EINTCONST)) { + expr->hascall = 1; + requires_frame = 1; + makes_call = 1; + } + if (ENODE_IS2(expr, EDIV, EMODULO)) { + if (ENODE_IS(expr->data.diadic.right, EINTCONST)) { + if (I8_log2n(((SInt64) expr->data.diadic.right->data.intval.hi << 32) + expr->data.diadic.right->data.intval.lo) <= 0) { + expr->hascall = 1; + requires_frame = 1; + makes_call = 1; + } + } else { + expr->hascall = 1; + requires_frame = 1; + makes_call = 1; + } + } + } + break; + case ESUB: + if (ENODE_IS(expr->data.diadic.right, EINTCONST)) { + expr->type = EADD; + expr->data.diadic.right->data.intval = CInt64_Neg(expr->data.diadic.right->data.intval); + } + case EMUL: + case EADD: + case EAND: + case EXOR: + case EOR: + expandTOCexpression(expr->data.diadic.left, type, 0); + expandTOCexpression(expr->data.diadic.right, type, 0); + expr->hascall = expr->data.diadic.left->hascall | expr->data.diadic.right->hascall; + if (ENODE_IS3(expr, EMUL, EADD, ESUB) && TYPE_IS_8BYTES(expr->rtype)) + Optimize64bitMath(expr); + if (type) { + if ( + ENODE_IS(expr->data.diadic.left, ETYPCON) && + IS_TYPE_INT_OR_ENUM(expr->data.diadic.left->rtype) && + IS_TYPE_INT_OR_ENUM(expr->data.diadic.left->data.monadic->rtype) && + expr->data.diadic.left->data.monadic->rtype->size >= type->size && + !TYPE_IS_8BYTES(expr->data.diadic.left->data.monadic->rtype) + ) + expr->data.diadic.left = expr->data.diadic.left->data.monadic; + + if ( + ENODE_IS(expr->data.diadic.right, ETYPCON) && + IS_TYPE_INT_OR_ENUM(expr->data.diadic.right->rtype) && + IS_TYPE_INT_OR_ENUM(expr->data.diadic.right->data.monadic->rtype) && + expr->data.diadic.right->data.monadic->rtype->size >= type->size && + !TYPE_IS_8BYTES(expr->data.diadic.right->data.monadic->rtype) + ) + expr->data.diadic.right = expr->data.diadic.right->data.monadic; + + expr->rtype = type; + } + break; + + case ETYPCON: + tmpexpr = expr->data.monadic; + if ((IS_TYPE_INT_OR_ENUM(expr->rtype) && expr->rtype->size < 4) && IS_TYPE_INT_OR_ENUM(tmpexpr->rtype) && !TYPE_IS_8BYTES(tmpexpr->rtype)) { + expandTOCexpression(tmpexpr, expr->rtype, 0); + } else { + expandTOCexpression(tmpexpr, NULL, expr->rtype->type == TYPEVOID); + } + + expr->hascall = tmpexpr->hascall; + if (IS_TYPE_INT_OR_ENUM(tmpexpr->rtype) && IS_TYPE_FLOAT(expr->rtype)) + uses_globals = 1; + + if ((TYPE_IS_8BYTES(tmpexpr->rtype) && IS_TYPE_FLOAT(expr->rtype)) || (TYPE_IS_8BYTES(expr->rtype) && IS_TYPE_FLOAT(tmpexpr->rtype))) { + uses_globals = 1; + expr->hascall = 1; + requires_frame = 1; + makes_call = 1; + } + + if (IS_TYPE_FLOAT(tmpexpr->rtype)) { + if (is_unsigned(expr->rtype) && expr->rtype->size == 4) { + expr->hascall = 1; + requires_frame = 1; + makes_call = 1; + } else { + uses_globals = 1; + } + } + + if (IS_TYPE_VECTOR(expr->rtype) && !IS_TYPE_VECTOR(tmpexpr->rtype)) + PPCError_Error(PPCErrorStr114); + break; + + case EPOSTINC: + if (!expr->ignored) { + if (IS_TYPE_FLOAT(expr->data.monadic->rtype) && is_unsigned(expr->rtype)) + uses_globals = 1; + expandTOCexpression(expr->data.monadic, NULL, 0); + expr->hascall = expr->data.monadic->hascall; + break; + } + expr->type = EPREINC; + case EPREINC: + rewrite_preincdec(expr); + rewrite_opassign(expr, EADD); + goto opassign_common; + + case EPOSTDEC: + if (!expr->ignored) { + if (IS_TYPE_FLOAT(expr->data.monadic->rtype) && is_unsigned(expr->rtype)) + uses_globals = 1; + expandTOCexpression(expr->data.monadic, NULL, 0); + expr->hascall = expr->data.monadic->hascall; + break; + } + expr->type = EPREDEC; + case EPREDEC: + rewrite_preincdec(expr); + rewrite_opassign(expr, ESUB); + goto opassign_common; + + case ELOGNOT: + if (!ENODE_IS(expr->data.monadic, ELOGNOT) && !ENODE_IS2(expr->data.monadic, ELAND, ELOR) && !ENODE_IS_RANGE(expr->data.monadic, ELESS, ENOTEQU)) + expr->data.monadic = comparewithzero(expr->data.monadic); + case EMONMIN: + case EBINNOT: + tmpexpr = expr->data.monadic; + expandTOCexpression(tmpexpr, type, 0); + expr->hascall = tmpexpr->hascall; + if (type && ENODE_IS(expr->data.monadic, ETYPCON)) { + if (IS_TYPE_INT_OR_ENUM(expr->data.monadic->rtype) && IS_TYPE_INT_OR_ENUM(expr->data.monadic->data.monadic->rtype)) { + if (expr->data.monadic->data.monadic->rtype->size >= type->size) { + expr->data.monadic = expr->data.monadic->data.monadic; + expr->rtype = type; + } + } + } + break; + + case EINDIRECT: + case EFORCELOAD: + case EBITFIELD: + tmpexpr = expr->data.monadic; + expandTOCexpression(tmpexpr, NULL, 0); + expr->hascall = tmpexpr->hascall; + break; + + case EDEFINE: + tmpexpr = expr->data.monadic; + expandTOCexpression(tmpexpr, NULL, 0); + expr->hascall = tmpexpr->hascall; + break; + + case EREUSE: + expr->hascall = expr->data.monadic->hascall; + break; + + case ENULLCHECK: + expandTOCexpression(expr->data.diadic.left, NULL, 0); + expandTOCexpression(expr->data.diadic.right, NULL, 0); + expr->hascall = expr->data.diadic.left->hascall | expr->data.diadic.right->hascall; + break; + + case EPRECOMP: + expr->hascall = 0; + break; + + case ELABEL: + obj = createcodelabel(expr->data.label); + newexpr = lalloc(sizeof(ENode)); + memclrw(newexpr, sizeof(ENode)); + newexpr->type = EOBJREF; + newexpr->cost = 0; + newexpr->data.objref = obj; + newexpr->rtype = CDecl_NewPointerType(obj->type); + + expr->type = EINDIRECT; + expr->cost = 1; + expr->data.monadic = newexpr; + expr->hascall = 0; + break; + } +} + +static void checkexceptionreferences(ExceptionAction *action) { + for (; action; action = action->prev) { + switch (action->type) { + case EAT_DESTROYLOCAL: + referenceexception(action->data.destroy_local.local); + break; + case EAT_DESTROYLOCALCOND: + referenceexception(action->data.destroy_local_cond.local); + referenceexception(action->data.destroy_local_cond.cond); + break; + case EAT_DESTROYLOCALOFFSET: + referenceexception(action->data.destroy_local_offset.local); + break; + case EAT_DESTROYLOCALPOINTER: + referenceexception(action->data.destroy_local_pointer.pointer); + break; + case EAT_DESTROYLOCALARRAY: + referenceexception(action->data.destroy_local_array.localarray); + break; + case EAT_DESTROYBASE: + referenceexception(action->data.destroy_member.objectptr); // wrong union? + break; + case EAT_DESTROYPARTIALARRAY: + referenceexception(action->data.destroy_partial_array.arraypointer); + referenceexception(action->data.destroy_partial_array.arraycounter); + referenceexception(action->data.destroy_partial_array.element_size); + break; + case EAT_DESTROYMEMBER: + referenceexception(action->data.destroy_member.objectptr); + break; + case EAT_DESTROYMEMBERCOND: + referenceexception(action->data.destroy_member_cond.objectptr); + referenceexception(action->data.destroy_member_cond.cond); + break; + case EAT_DESTROYMEMBERARRAY: + referenceexception(action->data.destroy_member_array.objectptr); + break; + case EAT_DELETEPOINTER: + case EAT_DELETELOCALPOINTER: + referenceexception(action->data.delete_pointer.pointerobject); + break; + case EAT_DELETEPOINTERCOND: + referenceexception(action->data.delete_pointer_cond.pointerobject); + referenceexception(action->data.delete_pointer_cond.cond); + break; + case EAT_CATCHBLOCK: + referenceexception(action->data.catch_block.catch_object); + referenceexception(action->data.catch_block.catch_info_object); + break; + case EAT_ACTIVECATCHBLOCK: + referenceexception(action->data.active_catch_block.catch_info_object); + break; + } + } +} + +void expandTOCreferences(Statement **stmts) { + Statement *stmt; + IAOperand *op; + int i; + InlineAsm *ia; + VarInfo *vi; + + codelabellist = NULL; + exceptionlist = NULL; + + for (stmt = *stmts; stmt; stmt = stmt->next) { + curstmtvalue = stmt->value; + if (stmt->flags & StmtFlag_1) { + has_catch_blocks = 1; + dynamic_stack = 1; + requires_frame = 1; + } + + switch (stmt->type) { + case ST_EXPRESSION: + expandTOCexpression(stmt->expr, NULL, 1); + if (stmt->expr->type == ETYPCON && IS_TYPE_VOID(stmt->expr->rtype)) + stmt->expr = stmt->expr->data.monadic; + break; + case ST_GOTOEXPR: + expandTOCexpression(stmt->expr, NULL, 0); + break; + case ST_IFGOTO: + case ST_IFNGOTO: + if (stmt->expr->type < ELESS || stmt->expr->type > ENOTEQU) + stmt->expr = comparewithzero(stmt->expr); + expandTOCexpression(stmt->expr, NULL, 0); + break; + case ST_RETURN: + if (!stmt->expr) + continue; + expandTOCexpression( + stmt->expr, NULL, + IS_TYPE_ARRAY(stmt->expr->rtype) || IS_TYPE_NONVECTOR_STRUCT(stmt->expr->rtype) || IS_TYPE_CLASS(stmt->expr->rtype) || + IS_TYPE_12BYTES_MEMBERPOINTER(stmt->expr->rtype)); + break; + case ST_SWITCH: + uses_globals = 1; + expandTOCexpression(stmt->expr, NULL, 0); + break; + case ST_ENDCATCHDTOR: + requires_frame = 1; + makes_call = 1; + break; + case ST_ASM: + if ((ia = (InlineAsm *) stmt->expr)) { + if (ia->flags & IAFlag1) { + if (ia->opcode == IADirective_FrFree) + requires_frame = 1; + } else { + for (i = 0, op = ia->args; i < ia->argcount; i++, op++) { + if (op->type == IAOpnd_Reg) { + if (!op->u.reg.object) { + if (op->u.reg.num == INVALID_PIC_REG) + uses_globals = 1; + else if (op->u.reg.effect & EffectWrite) + asm_used_register(op->u.reg.rclass, op->u.reg.num); + } else if ((vi = Registers_GetVarInfo(op->u.reg.object))) { + vi->flags |= VarInfoFlag40; + } + } else if (op->type == IAOpnd_3) { + uses_globals = 1; + } + } + + if (ia->flags & IAFlag2) + makes_call = 1; + } + } + break; + } + + checkexceptionreferences(stmt->dobjstack); + } +} + +void resetTOCvarinfo(void) { + ObjectList *list; + + for (list = toclist; list; list = list->next) + list->object->u.toc.info = CodeGen_GetNewVarInfo(); +} + +Boolean needdescriptor(void) { + // completely unused, dunno what args this might take + return 0; +} + +Object *createstaticinitobject(void) { + char buf[100]; + char *p; + Str255 fname; + TypeFunc *tfunc; + Object *obj; + + COS_FileGetFSSpecInfo(&cparamblkptr->sourcefile, NULL, NULL, fname); + sprintf(buf, "__sinit_%*.*s", -fname[0], fname[0], &fname[1]); + for (p = &buf[1]; *p; p++) { + if (*p == '.') + *p = '_'; + } + + tfunc = galloc(sizeof(TypeFunc)); + memclrw(tfunc, sizeof(TypeFunc)); + tfunc->type = TYPEFUNC; + tfunc->functype = &stvoid; + tfunc->args = NULL; + tfunc->flags = FUNC_DEFINED; + + obj = galloc(sizeof(Object)); + memclrw(obj, sizeof(Object)); + obj->otype = OT_OBJECT; + obj->type = (Type *) tfunc; + obj->name = GetHashNameNodeExport(buf); + obj->sclass = TK_STATIC; + obj->datatype = DFUNC; + + return obj; +} + +static void estimate_func_param_size(ENode *node) { + SInt32 work; + ENodeList *list; + SInt32 align; + + work = 0; + for (list = node->data.funccall.args; list; list = list->next) { + align = ~7 & (CMach_ArgumentAlignment(list->node->rtype) + 7); + work += ~(align - 1) & (list->node->rtype->size + align - 1); + } + + estimate_out_param_size(work); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/Alias.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/Alias.c new file mode 100644 index 0000000..8223bf4 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/Alias.c @@ -0,0 +1,747 @@ +#include "compiler/Alias.h" +#include "compiler/CClass.h" +#include "compiler/CError.h" +#include "compiler/CParser.h" +#include "compiler/CMachine.h" +#include "compiler/CodeGen.h" +#include "compiler/CopyPropagation.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/RegisterInfo.h" +#include "compiler/UseDefChains.h" +#include "compiler/ValueNumbering.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +static Alias *aliases; +static int n_aliases; +static int n_gathered_aliases; +static Alias *alias_hash[997]; +Alias *worst_case; +Object worst_case_obj; + +static TypePointer worst_case_memory_type = { + TYPEARRAY, + 0xFFFFFF, + TYPE(&stchar) +}; + +static Boolean is_safe_const(Object *obj) { + Type *type; + + type = obj->type; + while (IS_TYPE_ARRAY(type)) + type = TPTR_TARGET(type); + + if (TYPE_FITS_IN_REGISTER(type) || IS_TYPE_VECTOR(type) || IS_TYPE_FLOAT(type) || IS_TYPE_STRUCT(type)) + return is_const_object(obj); + + if (IS_TYPE_CLASS(type)) + return is_const_object(obj) && CClass_IsPODClass(TYPE_CLASS(type)); + + return 0; +} + +void initialize_aliases(void) { + int i; + + memclrw(&worst_case_obj, sizeof(Object)); + worst_case_obj.otype = OT_OBJECT; + worst_case_obj.type = TYPE(&worst_case_memory_type); + worst_case_obj.datatype = DDATA; + worst_case_obj.name = GetHashNameNodeExport("@worst_case@"); + + aliases = NULL; + n_aliases = 0; + n_gathered_aliases = 0; + for (i = 0; i < 997; i++) + alias_hash[i] = NULL; + + worst_case = make_alias_set(); + add_alias_member(worst_case, make_alias(&worst_case_obj, 0, 0)); +} + +static UInt32 hash_alias(Object *object, SInt32 offset, SInt32 size) { + return (UInt32) (object->name->hashval * offset * size) % 997; +} + +static Alias *create_alias(AliasType type, Object *object, SInt32 offset, SInt32 size, Boolean addToHash) { + Alias *alias; + UInt32 hash; + + alias = lalloc(sizeof(Alias)); + memclrw(alias, sizeof(Alias)); + alias->type = type; + alias->index = n_aliases++; + alias->next = aliases; + aliases = alias; + alias->object = object; + alias->offset = offset; + alias->size = size; + + if (addToHash) { + hash = hash_alias(object, offset, size); + alias->hashNext = alias_hash[hash]; + alias_hash[hash] = alias; + } + + return alias; +} + +static Alias *lookup_alias(Object *object, SInt32 offset, SInt32 size) { + Alias *scan; + + for (scan = alias_hash[hash_alias(object, offset, size)]; scan; scan = scan->hashNext) { + if (scan->object == object && scan->offset == offset && scan->size == size) + return scan; + } + + return NULL; +} + +Alias *make_alias(Object *object, SInt32 offset, SInt32 size) { + Alias *alias; + Alias *alias2; + + if (!offset && !size) + size = object->type->size; + + alias = lookup_alias(object, offset, size); + if (!alias) { + if (offset > 0 || size != object->type->size) { + alias2 = make_alias(object, 0, object->type->size); + alias = create_alias(AliasType1, object, offset, size, 1); + add_alias_member(alias2, alias); + } else { + alias = create_alias(AliasType0, object, offset, size, 1); + } + + switch (object->datatype) { + case DLOCAL: + case DNONLAZYPTR: + break; + default: + if (!is_safe_const(object)) + add_alias_member(worst_case, make_alias(object, 0, 0)); + } + } + + if (offset > object->type->size) + return NULL; + else + return alias; +} + +Alias *make_alias_set(void) { + return create_alias(AliasType2, NULL, 0, 0, 0); +} + +void add_alias_member(Alias *parent, Alias *child) { + AliasMember *member; + + if (child->type == AliasType2) { + for (member = child->parents; member; member = member->nextParent) + add_alias_member(parent, member->child); + } else { + if (parent == worst_case && child->type == AliasType1) + child = make_alias(child->object, 0, 0); + + for (member = parent->parents; member; member = member->nextParent) { + if (member->child == child) + return; + } + + member = lalloc(sizeof(AliasMember)); + member->parent = parent; + member->child = child; + member->nextParent = parent->parents; + parent->parents = member; + member->nextChild = child->children; + child->children = member; + } +} + +Alias *make_alias_set_from_IR(void) { + CError_FATAL(333); + return NULL; +} + +static Boolean aliases_overlap(Alias *a, Alias *b) { + return ( + a->offset == b->offset || + (a->offset > b->offset && a->offset < (b->offset + b->size)) || + (b->offset > a->offset && b->offset < (a->offset + a->size)) + ); +} + +static int is_address_load(PCode *pcode) { + Object *obj; + + switch (pcode->op) { + case PC_LWZ: + if (pcode->args[2].kind == PCOp_MEMORY && pcode->args[2].data.mem.obj->datatype == DNONLAZYPTR) + return 1; + break; + case PC_LBZU: + case PC_LBZUX: + case PC_LHZU: + case PC_LHZUX: + case PC_LHAU: + case PC_LHAUX: + case PC_LWZU: + case PC_LWZUX: + case PC_STBU: + case PC_STBUX: + case PC_STHU: + case PC_STHUX: + case PC_STWU: + case PC_STWUX: + return 1; + case PC_ADDI: + case PC_ADDIS: + if (pcode->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) { + if (pcode->args[2].kind == PCOp_MEMORY) { + obj = pcode->args[2].data.mem.obj; + if (obj->datatype == DLOCAL && !is_safe_const(obj)) + add_alias_member(worst_case, make_alias(obj, 0, 0)); + return 0; + } + } else { + return 1; + } + break; + case PC_ADD: + return 1; + } + + return 0; +} + +static int addresspropagatestouse(int candidateID, int useID) { + PCode *candidate_pcode; // r30 + PCode *use_pcode; // r29 + int reg; // r28 + short reg2; + Object *object; // r27 + SInt32 offset; // r26 + Alias *alias; // r25 + Boolean flag24; // r24 + SInt32 size; // r23 + Alias *aliasSet; // r22 + int i; + PCode *scan; + PCodeArg *op; + + candidate_pcode = Candidates[candidateID].pcode; + use_pcode = Uses[useID].pcode; + flag24 = 0; + size = 1; + reg = candidate_pcode->args[0].data.reg.reg; + + if (candidate_pcode->alias && (candidate_pcode->alias->type == AliasType0 || candidate_pcode->alias->type == AliasType1)) { + object = candidate_pcode->alias->object; + offset = candidate_pcode->alias->offset; + if (offset == 0 && candidate_pcode->alias->size == object->type->size) + flag24 = 1; + } else if (candidate_pcode->args[2].kind == PCOp_MEMORY) { + object = candidate_pcode->args[2].data.mem.obj; + if (candidate_pcode->op == PC_ADDIS) + offset = candidate_pcode->args[2].data.mem.offset << 16; + else + offset = candidate_pcode->args[2].data.mem.offset; + } else { + return 0; + } + + CError_ASSERT(478, object->otype == OT_OBJECT); + + if ((candidate_pcode->flags & (fIsRead | fIsWrite)) && (candidate_pcode->flags & fUpdatesPtr)) { + reg = candidate_pcode->args[1].data.reg.reg; + offset = 0; + flag24 = 1; + } else if (candidate_pcode->op == PC_LWZ) { + if (object->datatype != DNONLAZYPTR) + return 0; + + object = object->u.var.realObj; + CError_ASSERT(495, object->otype == OT_OBJECT); + offset = 0; + } else if (candidate_pcode->op == PC_ADDI) { + if (!candidate_pcode->alias && object) + candidate_pcode->alias = make_alias(object, offset, 1); + } else if (candidate_pcode->op == PC_ADDIS) { + if (!candidate_pcode->alias && object) + candidate_pcode->alias = make_alias(object, offset, 1); + } else if (candidate_pcode->op == PC_ADD) { + offset = 0; + flag24 = 1; + } else { + CError_FATAL(509); + } + + if ( + !(use_pcode->flags & (fIsRead | fIsWrite)) && + use_pcode->op != PC_ADDI && + use_pcode->op != PC_ADD && + use_pcode->op != PC_ADDIS + ) { + if (object->datatype == DLOCAL && !is_safe_const(object)) + add_alias_member(worst_case, make_alias(object, 0, 0)); + return 1; + } + + if ( + (use_pcode->flags & (fIsWrite | fPCodeFlag40000)) && + use_pcode->args[0].kind == PCOp_REGISTER && + use_pcode->args[0].arg == RegClass_GPR && + use_pcode->args[0].data.reg.reg == reg && + object->datatype == DLOCAL && + !is_safe_const(object) + ) + add_alias_member(worst_case, make_alias(object, 0, 0)); + + if (use_pcode->argCount < 3) + return 1; + + CError_ASSERT(543, use_pcode->args[1].kind == PCOp_REGISTER); + + if (candidate_pcode->block == use_pcode->block && precedes(candidate_pcode, use_pcode)) { + for (scan = candidate_pcode->nextPCode; scan && scan != use_pcode; scan = scan->nextPCode) { + for (op = scan->args, i = scan->argCount; i--; op++) { + if (op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) && + op->data.reg.reg == reg) + return 1; + } + } + } else { + if (!bitvectorgetbit(candidateID, propinfo[use_pcode->block->blockIndex].vec8)) { + if (bitvectorgetbit(candidate_pcode->defID, usedefinfo[use_pcode->block->blockIndex].defvec8)) { + for (scan = use_pcode->block->firstPCode; scan && scan != use_pcode; scan = scan->nextPCode) { + for (op = scan->args, i = scan->argCount; i--; op++) { + if (op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) && + op->data.reg.reg == reg) + return 1; + } + } + } else { + return 1; + } + } + + for (scan = use_pcode->block->firstPCode; scan; scan = scan->nextPCode) { + if (scan == use_pcode) + break; + for (op = scan->args, i = scan->argCount; i--; op++) { + if (op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) && + op->data.reg.reg == reg) + return 1; + } + } + } + + CError_ASSERT(598, object != NULL); + + if (use_pcode->op == PC_ADDI || use_pcode->op == PC_ADD || use_pcode->op == PC_ADDIS) { + if (use_pcode->args[0].data.reg.reg < n_real_registers[RegClass_GPR] && !is_safe_const(object)) + add_alias_member(worst_case, make_alias(object, 0, 0)); + } + + if (use_pcode->flags & (fIsRead | fIsWrite)) + size = nbytes_loaded_or_stored_by(use_pcode); + + if (use_pcode->args[2].kind == PCOp_REGISTER) { + if (use_pcode->args[1].data.reg.reg == 0) { + if (use_pcode->args[2].data.reg.reg == reg) + alias = make_alias(object, offset, size); + } else { + if (use_pcode->args[1].data.reg.reg == reg) + reg2 = use_pcode->args[2].data.reg.reg; + else if (use_pcode->args[2].data.reg.reg == reg) + reg2 = use_pcode->args[1].data.reg.reg; + else + return 1; + + for (scan = use_pcode->prevPCode; scan; scan = scan->prevPCode) { + if (scan->op == PC_LI && scan->args[0].data.reg.reg == reg2) + break; + + for (i = 0; i < scan->argCount; i++) { + if (scan->args[i].kind == PCOp_REGISTER && + scan->args[i].arg == RegClass_GPR && + scan->args[i].data.reg.reg == reg2 && + (scan->args[i].data.reg.effect & EffectWrite)) { + scan = NULL; + break; + } + } + + if (!scan) + break; + } + + if (scan) { + offset += scan->args[1].data.mem.offset; + alias = make_alias(object, offset, size); + } else { + alias = make_alias(object, 0, 0); + } + } + } else { + if (use_pcode->args[1].kind != PCOp_REGISTER || + use_pcode->args[1].arg != RegClass_GPR || + use_pcode->args[1].data.reg.reg != reg) + return 1; + + if (use_pcode->args[1].data.reg.effect & EffectWrite) { + alias = make_alias(object, 0, 0); + } else if (use_pcode->args[2].kind == PCOp_IMMEDIATE) { + if (use_pcode->op == PC_ADDIS) { + offset += use_pcode->args[2].data.imm.value << 16; + alias = make_alias(object, offset, 1); + } else { + offset += use_pcode->args[2].data.imm.value; + alias = make_alias(object, offset, size); + } + } else { + return 1; + } + } + + if (flag24) + alias = make_alias(object, 0, 0); + + if (!alias) + return 1; + + if (!use_pcode->alias) { + if ( + use_pcode->op == PC_ADDI || + use_pcode->op == PC_ADD || + use_pcode->op == PC_ADDIS || + ((candidate_pcode->flags & (fIsRead | fIsWrite)) && (candidate_pcode->flags & fUpdatesPtr)) + ) + recursive_propagation = 1; + } + + if (use_pcode->alias) { + if (use_pcode->alias == worst_case) { + add_alias_member(worst_case, make_alias(object, 0, 0)); + } else if (use_pcode->alias == alias) { + return 1; + } else if (use_pcode->alias->type == AliasType0 || use_pcode->alias->type == AliasType1) { + if (object == use_pcode->alias->object) { + use_pcode->alias = make_alias(object, 0, 0); + } else { + aliasSet = make_alias_set(); + if ( + use_pcode->op == PC_ADDI || + use_pcode->op == PC_ADD || + use_pcode->op == PC_ADDIS || + ((use_pcode->flags & (fIsRead | fIsWrite)) && (use_pcode->flags & fUpdatesPtr)) + ) { + if (alias->type == AliasType2) + add_alias_member(worst_case, alias); + else + add_alias_member(worst_case, make_alias(use_pcode->alias->object, 0, 0)); + } + add_alias_member(aliasSet, use_pcode->alias); + add_alias_member(aliasSet, alias); + use_pcode->alias = aliasSet; + } + } else { + add_alias_member(use_pcode->alias, alias); + } + } else { + use_pcode->alias = alias; + } + + propagated_instructions = 1; + return 1; +} + +static void finishpropagatealiases(int id) { + propagated_instructions = 1; +} + +static Propagation alias_prop = { + &is_address_load, + &addresspropagatestouse, + &finishpropagatealiases, + "ALIAS", + "ALIASES", + "A%" PRId32, + 1 +}; + +static void propagatealiasinfo(Object *proc) { + propagateinstructions(proc, &alias_prop, (copts.optimizationlevel >= 4) ? 4 : 1, 1); +} + +void gather_alias_info(void) { + UInt32 *myvec; // r31 + Alias *alias; // r22 + AliasMember *member; + AliasMember *member2; + PCodeBlock *block; // r21 + PCode *pcode; // r20 + PCodeArg *op; // r19 + RegUseOrDef *list; // r18 + int i; // r17 + Alias *alias_choice; // r16 + int aliases_idx; // r15 (helper in r23) + PCode *defpcode; // r14 + Alias *alias_array[3]; + UseOrDef *def; + int defID; + + if (coloring) { + propagatealiasinfo(gFunction); + myvec = oalloc(4 * ((number_of_Defs + 31) >> 5)); + + for (block = pcbasicblocks; block; block = block->nextBlock) { + bitvectorcopy(myvec, usedefinfo[block->blockIndex].defvec8, number_of_Defs); + + for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) { + if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) { + if (!pcode->alias) { + pcode->alias = worst_case; + } else { + if ((pcode->alias->type == AliasType0 || pcode->alias->type == AliasType1) && + pcode->alias->size == nbytes_loaded_or_stored_by(pcode)) { + pcode->flags &= ~fIsPtrOp; + } else { + pcode->flags |= fIsPtrOp; + } + + if (pcode->alias != worst_case) { + aliases_idx = 0; + alias_choice = NULL; + op = pcode->args; + for (i = 0; i < pcode->argCount; i++, op++) { + if ( + (!(pcode->flags & (fIsWrite | fPCodeFlag40000)) || op != pcode->args) && + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectRead) + ) { + alias_array[aliases_idx] = NULL; + if (aliases_idx >= 2) { + alias_choice = worst_case; + break; + } + alias_array[aliases_idx] = pcode->alias; + + for (list = reg_Defs[RegClass_GPR][op->data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(list->id, myvec)) { + defpcode = Defs[list->id].pcode; + if (!defpcode->alias || !is_address_load(defpcode) || defpcode->alias == worst_case) { + alias_array[aliases_idx] = worst_case; + break; + } + } + } + + aliases_idx++; + } + } + + if (!alias_choice) { + if (aliases_idx > 0) { + alias_choice = alias_array[0]; + if (aliases_idx == 2) { + if (alias_array[0] != worst_case) { + if (alias_array[1] != worst_case) + alias_choice = worst_case; + } else if (alias_array[1] != worst_case) { + alias_choice = alias_array[1]; + } + } + } + + if (alias_choice == worst_case) { + pcode->flags |= fIsPtrOp; + if (pcode->alias->type == AliasType2) + add_alias_member(worst_case, pcode->alias); + else + add_alias_member(worst_case, make_alias(pcode->alias->object, 0, 0)); + } + + if (alias_choice) + pcode->alias = alias_choice; + } + } + } + } else { + if ((pcode->flags & fIsCall) && !pcode->alias) + pcode->alias = worst_case; + } + + for (def = &Defs[defID = pcode->defID]; defID < number_of_Defs && def->pcode == pcode; defID++, def++) { + if (def->v.kind == PCOp_REGISTER && def->v.arg == RegClass_GPR) { + for (list = reg_Defs[RegClass_GPR][def->v.u.reg]; list; list = list->next) + bitvectorclearbit(list->id, myvec); + } + bitvectorsetbit(defID, myvec); + } + } + } + + freeoheap(); + } else { + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) { + if ((pcode->flags & (fIsRead | fIsWrite | fIsCall | fPCodeFlag20000 | fPCodeFlag40000)) && !pcode->alias) + pcode->alias = worst_case; + } + } + } + + if (n_gathered_aliases != n_aliases) { + for (alias = aliases; alias; alias = alias->next) { + if (alias->type == AliasType2) { + alias->vec24 = lalloc(4 * ((n_aliases + 31) >> 5)); + bitvectorinitialize(alias->vec24, n_aliases, 0); + for (member = alias->parents; member; member = member->nextParent) { + bitvectorsetbit(member->child->index, alias->vec24); + for (member2 = member->child->parents; member2; member2 = member2->nextParent) + bitvectorsetbit(member2->child->index, alias->vec24); + } + } + } + n_gathered_aliases = n_aliases; + } +} + +static Boolean may_alias_alias(Alias *a, Alias *b) { + switch ((a->type * 3) + b->type) { + case (AliasType0 * 3) + AliasType0: + return a == b; + case (AliasType0 * 3) + AliasType1: + case (AliasType1 * 3) + AliasType0: + return a->object == b->object; + case (AliasType1 * 3) + AliasType1: + return (a->object == b->object) && aliases_overlap(a, b); + case (AliasType0 * 3) + AliasType2: + case (AliasType1 * 3) + AliasType2: + return bitvectorgetbit(a->index, b->vec24) != 0; + case (AliasType2 * 3) + AliasType0: + case (AliasType2 * 3) + AliasType1: + return bitvectorgetbit(b->index, a->vec24) != 0; + case (AliasType2 * 3) + AliasType2: + return (a == b) || !bitvectorintersectionisempty(a->vec24, b->vec24, n_aliases); + default: + CError_FATAL(1054); + return 1; + } +} + +Boolean may_alias(PCode *a, PCode *b) { + return may_alias_alias(a->alias, b->alias); +} + +Boolean uniquely_aliases(PCode *a, PCode *b) { + if (may_alias_alias(a->alias, b->alias)) { + if ( + a->alias->type != AliasType2 && + b->alias->type != AliasType2 && + a->alias && + b->alias && + a->alias->size == nbytes_loaded_or_stored_by(a) && + b->alias->size == nbytes_loaded_or_stored_by(b) + ) + return 1; + } + + return 0; +} + +Boolean may_alias_worst_case(PCode *pcode) { + return may_alias_alias(pcode->alias, worst_case); +} + +Boolean may_alias_object(PCode *pcode, Object *object) { + return may_alias_alias(pcode->alias, make_alias(object, 0, 0)); +} + +void initialize_alias_values(void) { + Alias *alias; + + for (alias = aliases; alias; alias = alias->next) { + alias->valuenumber = nextvaluenumber++; + alias->valuepcode = NULL; + } +} + +void update_alias_value(Alias *alias, PCode *pcode) { + AliasMember *member; + AliasMember *member2; + AliasMember *member3; + + switch (alias->type) { + case AliasType0: + killmemory(alias, pcode); + for (member = alias->children; member; member = member->nextChild) { + CError_ASSERT(1152, member->parent->type == AliasType2); + killmemory(member->parent, NULL); + } + for (member = alias->parents; member; member = member->nextParent) { + CError_ASSERT(1157, member->child->type == AliasType1); + killmemory(member->child, NULL); + for (member2 = member->child->children; member2; member2 = member2->nextChild) { + if (member2->parent != alias) { + CError_ASSERT(1163, member2->parent->type == AliasType2); + killmemory(member2->parent, NULL); + } + } + } + break; + + case AliasType1: + killmemory(alias, pcode); + for (member = alias->children; member; member = member->nextChild) { + killmemory(member->parent, NULL); + if (member->parent->type == AliasType0) { + for (member2 = member->parent->parents; member2; member2 = member2->nextParent) { + if (member2->child != alias && aliases_overlap(alias, member2->child)) { + killmemory(member2->child, NULL); + } + } + } + } + break; + + case AliasType2: + killmemory(alias, NULL); + for (member = alias->parents; member; member = member->nextParent) { + killmemory(member->child, NULL); + for (member2 = member->child->children; member2; member2 = member2->nextChild) { + if (member2->parent != alias) + killmemory(member2->parent, NULL); + } + for (member3 = member->child->parents; member3; member3 = member3->nextParent) { + killmemory(member3->child, NULL); + for (member2 = member3->child->children; member2; member2 = member2->nextChild) { + if (member2->parent != member->child) + killmemory(member2->parent, NULL); + } + } + } + break; + } +} + +void update_all_alias_values(void) { + Alias *alias; + + for (alias = aliases; alias; alias = alias->next) + killmemory(alias, NULL); +} + diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/CodeMotion.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/CodeMotion.c new file mode 100644 index 0000000..8ce2962 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/CodeMotion.c @@ -0,0 +1,906 @@ +#include "compiler/CodeMotion.h" +#include "compiler/Alias.h" +#include "compiler/BitVectors.h" +#include "compiler/LoopDetection.h" +#include "compiler/LoopOptimization.h" +#include "compiler/CompilerTools.h" +#include "compiler/PCode.h" +#include "compiler/UseDefChains.h" +#include "compiler/RegisterInfo.h" + +int movedloopinvariantcode; +int unswitchedinvariantcode; + +static int isloopinvariant(PCode *pcode, Loop *loop, UInt32 *vec, int flag1, int flag2) { + PCodeArg *op; + RegUseOrDef *list; + int i; + + if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) { + if (pcode->alias) { + if (pcode->alias->type == AliasType2 || (pcode->flags & (fIsVolatile | fSideEffects))) + return 0; + + if (pcode->flags & fIsRead) { + for (list = findobjectusedef(pcode->alias->object)->defs; list; list = list->next) { + if ( + may_alias(pcode, Defs[list->id].pcode) && + bitvectorgetbit(list->id, vec) && + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 0; + } + } + + if (pcode->flags & fIsWrite) { + for (list = findobjectusedef(pcode->alias->object)->uses; list; list = list->next) { + if ( + may_alias(pcode, Uses[list->id].pcode) && + bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 0; + } + } + } else { + return 0; + } + } + + if ((pcode->flags & fIsWrite) && !bitvectorgetbit(pcode->block->blockIndex, loop->vec2C)) + return 0; + + op = pcode->args; + i = pcode->argCount; + while (i--) { + switch (op->kind) { + case PCOp_MEMORY: + if ((pcode->flags & fIsRead) && ((pcode->flags == 0) & 0x40)) { + for (list = findobjectusedef(op->data.mem.obj)->defs; list; list = list->next) { + if ( + may_alias(pcode, Defs[list->id].pcode) && + bitvectorgetbit(list->id, vec) && + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 0; + } + } + if (pcode->flags & fIsWrite) { + for (list = findobjectusedef(op->data.mem.obj)->uses; list; list = list->next) { + if ( + may_alias(pcode, Uses[list->id].pcode) && + bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 0; + } + } + break; + case PCOp_REGISTER: + if (op->data.reg.effect & (EffectRead | EffectWrite)) { + if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) { + if (op->data.reg.reg == _FP_) + break; + if (op->data.reg.reg == _CALLER_SP_) + break; + if (op->data.reg.reg == 2) + break; + } + if (op->data.reg.reg < n_real_registers[op->arg]) { + if (op->arg == RegClass_CRFIELD) { + if (!flag2 || (op->data.reg.effect & EffectRead)) + return 0; + } else if (op->arg == RegClass_SPR) { + if (!flag1) + return 0; + } else { + return 0; + } + } else if (op->data.reg.effect & EffectRead) { + if (flag1 && op->kind == PCOp_REGISTER && op->arg == RegClass_SPR) + break; + if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) { + if (op->data.reg.reg == _FP_) + break; + if (op->data.reg.reg == _CALLER_SP_) + break; + if (op->data.reg.reg == 2) + break; + } + + for (list = reg_Defs[op->arg][op->data.reg.reg]; list; list = list->next) { + if ( + bitvectorgetbit(list->id, vec) && + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 0; + } + } + } + break; + } + op++; + } + + return 1; +} + +static int isuniquedefinition(PCode *pcode, Loop *loop) { + RegUseOrDef *list; + int defID; + UseOrDef *def; + + defID = pcode->defID; + def = &Defs[defID]; + if (defID >= number_of_Defs) + return 0; + if (def->pcode != pcode) + return 0; + if ((defID + 1) < number_of_Defs && def[1].pcode == pcode) + return 0; + + if (def->v.kind == PCOp_REGISTER) { + for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) { + if ( + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) && + list->id != defID + ) + return 0; + } + } else if (def->v.kind == PCOp_MEMORY) { + for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) { + if ( + may_alias(pcode, Defs[list->id].pcode) && + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) && + list->id != defID + ) + return 0; + } + } else { + CError_FATAL(292); + } + + return 1; +} + +static int uniquelyreachesuse(int defID, int useID) { + UseOrDef *def; + UseOrDef *use; + RegUseOrDef *list; + PCode *pcode; + + def = &Defs[defID]; + use = &Uses[useID]; + if (def->v.kind == PCOp_REGISTER) { + for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) { + if ( + list->id != defID && + bitvectorgetbit(list->id, usedefinfo[use->pcode->block->blockIndex].defvec8) + ) + break; + } + } else if (def->v.kind == PCOp_MEMORY) { + for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) { + if ( + may_alias(def->pcode, Defs[list->id].pcode) && + list->id != defID && + bitvectorgetbit(list->id, usedefinfo[use->pcode->block->blockIndex].defvec8) + ) + break; + } + } + + if (!list) + return 1; + + if (def->pcode->block == use->pcode->block) { + for (pcode = use->pcode->prevPCode; pcode; pcode = pcode->prevPCode) { + if (pcode == def->pcode) + return 1; + } + } + + return 0; +} + +static int uniquelyreachesalluses(int defID, Loop *loop) { + UseOrDef *def; + RegUseOrDef *list; + + def = &Defs[defID]; + + if (def->v.kind == PCOp_REGISTER) { + for (list = reg_Uses[def->v.arg][def->v.u.reg]; list; list = list->next) { + if ( + bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].usevec1C) || + (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) && !uniquelyreachesuse(defID, list->id)) + ) + return 0; + } + } else if (def->v.kind == PCOp_MEMORY) { + for (list = findobjectusedef(def->v.u.object)->uses; list; list = list->next) { + if (may_alias(def->pcode, Uses[list->id].pcode)) { + if ( + bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].usevec1C) || + (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) && !uniquelyreachesuse(defID, list->id)) + ) + return 0; + } + } + } else { + CError_FATAL(382); + } + + return 1; +} + +static int isliveonexit(TinyValue *v, Loop *loop) { + RegUseOrDef *list; + UInt32 *vec; + + vec = usedefinfo[loop->preheader->blockIndex].usevec1C; + + if (v->kind == PCOp_REGISTER) { + for (list = reg_Uses[v->arg][v->u.reg]; list; list = list->next) { + if ( + bitvectorgetbit(list->id, vec) && + !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 1; + } + } else if (v->kind == PCOp_MEMORY) { + for (list = findobjectusedef(v->u.object)->uses; list; list = list->next) { + if ( + bitvectorgetbit(list->id, vec) && + !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 1; + } + } + + return 0; +} + +static int dominatesallexits(PCode *pcode, Loop *loop) { + return bitvectorgetbit(pcode->block->blockIndex, loop->vec28) != 0; +} + +static int maymove(PCode *pcode, Loop *loop) { + short reg; + + if (!isuniquedefinition(pcode, loop)) + return 0; + if (!uniquelyreachesalluses(pcode->defID, loop)) + return 0; + if (!dominatesallexits(pcode, loop) && isliveonexit(&Defs[pcode->defID].v, loop)) + return 0; + + if (loop->bodySize > 25) { + switch (pcode->op) { + case PC_LI: + if ( + pcode->nextPCode && + pcode->nextPCode->op == PC_LVX && + (pcode->nextPCode->flags & fIsConst) + ) { + reg = pcode->args[0].data.reg.reg; + if (pcode->nextPCode->args[1].data.reg.reg == reg || + pcode->nextPCode->args[2].data.reg.reg == reg) + return 1; + } + case PC_VSPLTISB: + case PC_VSPLTISH: + case PC_VSPLTISW: + return 0; + default: + if (!bitvectorgetbit(pcode->block->blockIndex, loop->vec2C)) + return 0; + } + } + + return 1; +} + +static void moveinvariantcomputation(PCode *pcode, Loop *loop) { + ObjectUseDef *oud; + BlockList *blocklist; + RegUseOrDef *list; + UseOrDef *def; + int defID; + + defID = pcode->defID; + def = &Defs[defID]; + deletepcode(pcode); + insertpcodebefore(loop->preheader->lastPCode, pcode); + loop->bodySize--; + movedloopinvariantcode = 1; + + if (def->v.kind == PCOp_REGISTER) { + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) + bitvectorclearbit(list->id, usedefinfo[blocklist->block->blockIndex].defvec8); + bitvectorsetbit(defID, usedefinfo[blocklist->block->blockIndex].defvec8); + } + } else if (def->v.kind == PCOp_MEMORY) { + oud = findobjectusedef(def->v.u.object); + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + for (list = oud->defs; list; list = list->next) { + if (uniquely_aliases(pcode, Defs[list->id].pcode)) + bitvectorclearbit(list->id, usedefinfo[blocklist->block->blockIndex].defvec8); + } + bitvectorsetbit(defID, usedefinfo[blocklist->block->blockIndex].defvec8); + } + } else { + CError_FATAL(545); + } +} + +static int srawi_addze_maymove(PCode *pcode, Loop *loop) { + RegUseOrDef *list; + UseOrDef *def; + int defID; + int nextDefID; + + defID = pcode->defID; + nextDefID = pcode->nextPCode->defID; + + def = &Defs[defID]; + if (defID >= number_of_Defs) + return 0; + if (def->pcode != pcode) + return 0; + if ((defID + 1) < number_of_Defs && def[1].pcode == pcode) + return 0; + + if (def->v.kind == PCOp_REGISTER && def->v.arg == RegClass_GPR) { + for (list = reg_Defs[RegClass_GPR][def->v.u.reg]; list; list = list->next) { + if ( + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) && + list->id != defID && + list->id != nextDefID + ) + return 0; + } + } else { + CError_FATAL(582); + } + + if (!uniquelyreachesalluses(pcode->defID, loop)) + return 0; + if (!dominatesallexits(pcode, loop) && isliveonexit(&Defs[pcode->defID].v, loop)) + return 0; + if (!dominatesallexits(pcode->nextPCode, loop) && isliveonexit(&Defs[pcode->nextPCode->defID].v, loop)) + return 0; + + return 1; +} + +static int srawi_addze_isloopinvariant(PCode *pcode, Loop *loop, UInt32 *vec) { + static PCode *oldNextInstr; + PCode *nextInstr; + + nextInstr = pcode->nextPCode; + if ( + pcode->op == PC_ADDZE && + oldNextInstr == pcode + ) { + oldNextInstr = NULL; + return 1; + } else if ( + pcode->op == PC_SRAWI && + nextInstr && + nextInstr->op == PC_ADDZE && + pcode->args[0].data.reg.reg == nextInstr->args[0].data.reg.reg && + nextInstr->args[0].data.reg.reg == nextInstr->args[1].data.reg.reg && + !(pcode->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) && + !(nextInstr->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) && + isloopinvariant(pcode, loop, vec, 1, 0) && + srawi_addze_maymove(pcode, loop) + ) { + oldNextInstr = nextInstr; + return 1; + } else { + oldNextInstr = NULL; + return 0; + } +} + +static void removeblockfromloop(Loop *loop, PCodeBlock *block) { + BlockList *list; + BlockList **ptr; + + bitvectorclearbit(block->blockIndex, loop->memberblocks); + bitvectorclearbit(block->blockIndex, loop->vec24); + bitvectorclearbit(block->blockIndex, loop->vec28); + bitvectorclearbit(block->blockIndex, loop->vec2C); + loop->bodySize -= block->pcodeCount; + + ptr = &loop->blocks; + while ((list = *ptr)) { + if (list->block == block) + *ptr = list->next; + else + ptr = &list->next; + } +} + +static void changesuccessor(PCodeBlock *block, PCodeBlock *from, PCodeBlock *to) { + PCLink **ptr; + PCLink *link; + + for (link = block->successors; link; link = link->nextLink) { + if (link->block == from) + link->block = to; + } + + ptr = &from->predecessors; + while ((link = *ptr)) { + if (link->block == block) { + *ptr = link->nextLink; + link->nextLink = to->predecessors; + to->predecessors = link; + } else { + ptr = &link->nextLink; + } + } +} + +static void movesuccessor(PCodeBlock *to, PCodeBlock *from, PCodeBlock *block) { + PCLink **ptr; + PCLink *link; + + for (link = block->predecessors; link; link = link->nextLink) { + if (link->block == from) + link->block = to; + } + + ptr = &from->successors; + while ((link = *ptr)) { + if (link->block == block) { + *ptr = link->nextLink; + link->nextLink = to->successors; + to->successors = link; + } else { + ptr = &link->nextLink; + } + } +} + +static void movecmptopreheader(Loop *loop, PCodeBlock *block, PCode *pc1, PCode *pc2, PCodeArg *op) { + PCodeBlock *preheader; + PCode *pc3; + + preheader = loop->preheader; + if (PCODE_FLAG_SET_F(pc1) & fRecordBit) { + moveinvariantcomputation(pc1, loop); + } else { + deletepcode(pc1); + insertpcodebefore(loop->preheader->lastPCode, pc1); + loop->bodySize--; + movedloopinvariantcode = 1; + } + loop->preheader = NULL; + + insertpreheaderblock(loop); + + pc3 = preheader->lastPCode; + CError_ASSERT(775, pc3->op == PC_B); + deletepcode(pc3); + deletepcode(pc2); + appendpcode(preheader, pc2); + movesuccessor(preheader, block, op->data.label.label->block); +} + +static PCodeBlock *appendheadercopy(Loop *loop, PCodeBlock *block1, PCodeBlock *block2, PCodeBlock *block3) { + PCodeBlock *newblock1; + PCodeBlock *newblock2; + PCLink *link; + PCode *scan; + + newblock1 = lalloc(sizeof(PCodeBlock)); + newblock2 = lalloc(sizeof(PCodeBlock)); + + newblock1->labels = NULL; + newblock1->predecessors = newblock1->successors = NULL; + newblock1->firstPCode = newblock1->lastPCode = NULL; + newblock1->pcodeCount = 0; + newblock1->loopWeight = loop->body->loopWeight; + newblock1->flags = 0; + newblock1->blockIndex = pcblockcount++; + + newblock2->labels = NULL; + newblock2->predecessors = newblock2->successors = NULL; + newblock2->firstPCode = newblock2->lastPCode = NULL; + newblock2->pcodeCount = 0; + newblock2->loopWeight = loop->body->loopWeight; + newblock2->flags = 0; + newblock2->blockIndex = pcblockcount++; + + newblock1->nextBlock = newblock2; + newblock2->prevBlock = newblock1; + newblock1->prevBlock = block1; + newblock2->nextBlock = block1->nextBlock; + block1->nextBlock = newblock1; + newblock2->nextBlock->prevBlock = newblock2; + + pclabel(newblock1, makepclabel()); + pclabel(newblock2, makepclabel()); + + changesuccessor(block1, block1->successors->block, newblock1); + + link = lalloc(sizeof(PCLink)); + link->block = newblock2; + link->nextLink = newblock1->successors; + newblock1->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = newblock1; + link->nextLink = newblock2->predecessors; + newblock2->predecessors = link; + + appendpcode(newblock2, makepcode(PC_B, block2->nextBlock->labels)); + pcbranch(newblock2, block2->nextBlock->labels); + pccomputepredecessors1(newblock2); + + for (scan = block2->firstPCode; scan; scan = scan->nextPCode) + appendpcode(newblock1, copypcode(scan)); + + pcbranch(newblock1, block3->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock1; + link->nextLink = block3->predecessors; + block3->predecessors = link; + + addblocktoloop(loop, newblock1); + if (bitvectorgetbit(block2->blockIndex, loop->vec28)) + bitvectorsetbit(newblock1->blockIndex, loop->vec28); + if (bitvectorgetbit(block2->blockIndex, loop->vec2C)) + bitvectorsetbit(newblock1->blockIndex, loop->vec2C); + + for (loop = loop->parent; loop; loop = loop->parent) { + addblocktoloop(loop, newblock1); + if (bitvectorgetbit(block2->blockIndex, loop->vec28)) + bitvectorsetbit(newblock1->blockIndex, loop->vec28); + if (bitvectorgetbit(block2->blockIndex, loop->vec2C)) + bitvectorsetbit(newblock1->blockIndex, loop->vec2C); + + addblocktoloop(loop, newblock2); + if (bitvectorgetbit(block2->blockIndex, loop->vec28)) + bitvectorsetbit(newblock2->blockIndex, loop->vec28); + if (bitvectorgetbit(block2->blockIndex, loop->vec2C)) + bitvectorsetbit(newblock2->blockIndex, loop->vec2C); + } + + return newblock1; +} + +static BlockList *findswitchpath(Loop *loop, PCodeBlock *block) { + BlockList *head; + BlockList *tail; + BlockList *node; + PCodeBlock *scan; + + head = NULL; + tail = NULL; + + for (scan = block; scan && scan != loop->body; scan = scan->successors->block) { + if (!bitvectorgetbit(scan->blockIndex, loop->memberblocks)) + return NULL; + if (scan->successors && scan->successors->nextLink) + return NULL; + + node = oalloc(sizeof(BlockList)); + node->block = scan; + node->next = NULL; + if (head) { + tail->next = node; + tail = node; + } else { + head = node; + tail = node; + } + } + + return head; +} + +static void simpleunswitchloop(Loop *loop) { + PCode *pc29; + PCodeArg *op27; + UInt32 *myvec; + PCodeBlock *block26; + PCode *pc25; // r25 + BlockList *path2_24; + PCodeArg *op23; + PCode *pc23; // r23 + BlockList *scanlist; // r23 + BlockList *bestpath1; // r23 + BlockList *bestpath2; // r22 + PCodeBlock *headercopy; // r22 + Loop *newloop; // r21 + PCodeBlock *preheader21; + BlockList *path20; + PCode *scan20; + PCode *lastpcode; + int i; + BlockList *pathiter1; + BlockList *pathiter2; + + if (!(lastpcode = loop->body->lastPCode)) + return; + if (lastpcode->op != PC_BT && lastpcode->op != PC_BF) + return; + if (lastpcode->args[2].kind != PCOp_LABEL) + return; + if (!bitvectorgetbit(lastpcode->args[2].data.label.label->block->blockIndex, loop->memberblocks)) + return; + if (loop->x57) + return; + if (loop->x4D) + return; + if (bitvectorgetbit(loop->body->nextBlock->blockIndex, loop->memberblocks)) + return; + + for (block26 = pcbasicblocks; block26; block26 = block26->nextBlock) { + if (bitvectorgetbit(block26->blockIndex, loop->memberblocks)) + break; + } + + if (!block26) + return; + + myvec = oalloc(4 * ((number_of_Defs + 31) >> 5)); + bitvectorcopy(myvec, usedefinfo[block26->blockIndex].defvec8, number_of_Defs); + for (pc25 = loop->preheader->nextBlock->firstPCode; pc25; pc25 = pc25->nextPCode) { + if (!(PCODE_FLAG_SET_F(pc25) & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects | fRecordBit))) { + if (isloopinvariant(pc25, loop, myvec, 0, 1)) + break; + } + } + + if (!pc25 || pc25->argCount < 1) + return; + + if ( + pc25->argCount < 1 || + pc25->args[0].kind != PCOp_REGISTER || + pc25->args[0].arg != RegClass_CRFIELD + ) + return; + + pc29 = pc25->block->lastPCode; + if ( + !pc29 || + !(pc29->flags & fIsBranch) || + pc29->args[0].kind != PCOp_REGISTER || + pc29->args[0].arg != RegClass_CRFIELD + ) + return; + + if (pc29->args[0].data.reg.reg != pc25->args[0].data.reg.reg) + return; + + op27 = NULL; + for (i = 0; i < pc29->argCount; i++) { + if (pc29->args[i].kind == PCOp_LABEL) + op27 = &pc29->args[i]; + } + + if (op27) { + preheader21 = loop->preheader; + + path20 = findswitchpath(loop, block26->nextBlock); + if (!path20) + return; + + path2_24 = findswitchpath(loop, op27->data.label.label->block); + if (!path2_24) + return; + + bestpath1 = NULL; + bestpath2 = NULL; + for (pathiter1 = path20; pathiter1; pathiter1 = pathiter1->next) { + for (pathiter2 = path2_24; pathiter2; pathiter2 = pathiter2->next) { + if (pathiter1->block == pathiter2->block) { + bestpath1 = pathiter1; + break; + } + } + if (bestpath1) + break; + bestpath2 = pathiter1; + } + + CError_ASSERT(1192, bestpath2->block); + + if (bestpath2->block->lastPCode && bestpath2->block->lastPCode->op == PC_B) + deletepcode(bestpath2->block->lastPCode); + + while (bestpath1) { + for (scan20 = bestpath1->block->firstPCode; scan20; scan20 = scan20->nextPCode) { + if (scan20->op != PC_B) + appendpcode(bestpath2->block, copypcode(scan20)); + } + bestpath1 = bestpath1->next; + } + + headercopy = appendheadercopy(loop, bestpath2->block, loop->body, block26); + movecmptopreheader(loop, block26, pc25, pc29, op27); + + if (block26->pcodeCount) { + if (path2_24->block->firstPCode) { + pc23 = path2_24->block->firstPCode; + for (scan20 = block26->firstPCode; scan20; scan20 = scan20->nextPCode) { + if (scan20->op != PC_B) + insertpcodebefore(pc23, copypcode(scan20)); + } + } else { + for (scan20 = block26->firstPCode; scan20; scan20 = scan20->nextPCode) { + if (scan20->op != PC_B) + appendpcode(path2_24->block, copypcode(scan20)); + } + } + } + + op23 = NULL; + for (i = 0; i < loop->body->lastPCode->argCount; i++) { + if (loop->body->lastPCode->args[i].kind == PCOp_LABEL) + op23 = &loop->body->lastPCode->args[i]; + } + + CError_ASSERT(1250, op23 != NULL); + + changesuccessor(loop->body, op23->data.label.label->block, path2_24->block); + op23->data.label.label = path2_24->block->labels; + + op23 = NULL; + for (i = 0; i < preheader21->lastPCode->argCount; i++) { + if (preheader21->lastPCode->args[i].kind == PCOp_LABEL) + op23 = &preheader21->lastPCode->args[i]; + } + + CError_ASSERT(1267, op23 != NULL); + + changesuccessor(preheader21, op23->data.label.label->block, loop->body); + op23->data.label.label = loop->body->labels; + + op23 = NULL; + for (i = 0; i < loop->preheader->lastPCode->argCount; i++) { + if (loop->preheader->lastPCode->args[i].kind == PCOp_LABEL) + op23 = &loop->preheader->lastPCode->args[i]; + } + + CError_ASSERT(1284, op23 != NULL); + + changesuccessor(loop->preheader, op23->data.label.label->block, headercopy); + op23->data.label.label = headercopy->labels; + + newloop = lalloc(sizeof(Loop)); + newloop->parent = loop->parent; + newloop->children = NULL; + newloop->nextSibling = loop->nextSibling; + loop->nextSibling = newloop; + newloop->body = loop->body; + newloop->preheader = NULL; + newloop->blocks = NULL; + newloop->basicInductionVars = NULL; + newloop->footer = NULL; + newloop->pc18 = NULL; + newloop->loopWeight = loop->loopWeight; + + bitvectorinitialize(newloop->memberblocks = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(newloop->vec24 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(newloop->vec28 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(newloop->vec2C = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + + removeblockfromloop(loop, newloop->body); + addblocktoloop(newloop, newloop->body); + + bitvectorsetbit(newloop->body->blockIndex, newloop->vec24); + bitvectorsetbit(newloop->body->blockIndex, newloop->vec2C); + bitvectorsetbit(newloop->body->blockIndex, newloop->vec28); + + for (scanlist = path2_24; scanlist; scanlist = scanlist->next) { + removeblockfromloop(loop, scanlist->block); + addblocktoloop(newloop, scanlist->block); + bitvectorsetbit(scanlist->block->blockIndex, newloop->vec2C); + } + + newloop->preheader = NULL; + insertpreheaderblock(newloop); + analyzeloop(newloop); + + loop->body = headercopy; + + for (scanlist = loop->blocks; scanlist; scanlist = scanlist->next) + bitvectorsetbit(scanlist->block->blockIndex, loop->vec2C); + + bitvectorsetbit(headercopy->blockIndex, loop->vec24); + analyzeloop(loop); + + unswitchedinvariantcode = 1; + } +} + +static void simpleunswitchloops(Loop *loop) { + while (loop) { + if (loop->children) + simpleunswitchloops(loop->children); + else if (!loop->x4F) + simpleunswitchloop(loop); + loop = loop->nextSibling; + } +} + +static void moveinvariantsfromloop(Loop *loop) { + RegUseOrDef *list; + BlockList *blocklist; + PCode *instr; + PCode *nextInstr; + UInt32 *myvec; + UseOrDef *def; + int defID; + int flag; + PCodeBlock *block; + + myvec = oalloc(4 * ((number_of_Defs + 31) >> 5)); + do { + flag = 0; + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + block = blocklist->block; + bitvectorcopy(myvec, usedefinfo[block->blockIndex].defvec8, number_of_Defs); + for (instr = block->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + if (!(instr->flags & fIsBranch) && instr->argCount) { + if ( + !(instr->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) && + isloopinvariant(instr, loop, myvec, 0, 0) && + maymove(instr, loop) + ) { + moveinvariantcomputation(instr, loop); + flag = 1; + } else if (srawi_addze_isloopinvariant(instr, loop, myvec)) { + moveinvariantcomputation(instr, loop); + flag = 1; + } + + for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) { + if (def->v.kind == PCOp_REGISTER) { + for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) + bitvectorclearbit(list->id, myvec); + } else if (def->v.kind == PCOp_MEMORY) { + if (def->v.arg == PCOpMemory0) { + for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) { + if (uniquely_aliases(instr, Defs[list->id].pcode)) + bitvectorclearbit(list->id, myvec); + } + } + } else { + CError_FATAL(1434); + } + + bitvectorsetbit(defID, myvec); + } + } + } + } + } while (flag); +} + +static void moveinvariantsfromloops(Loop *loop) { + while (loop) { + if (loop->children) + moveinvariantsfromloops(loop->children); + moveinvariantsfromloop(loop); + loop = loop->nextSibling; + } +} + +void moveloopinvariantcode(void) { + unswitchedinvariantcode = 0; + movedloopinvariantcode = 0; + if (loopsinflowgraph) { + moveinvariantsfromloops(loopsinflowgraph); + simpleunswitchloops(loopsinflowgraph); + } + freeoheap(); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ConstantPropagation.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ConstantPropagation.c new file mode 100644 index 0000000..2b40453 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ConstantPropagation.c @@ -0,0 +1,643 @@ +#include "compiler/ConstantPropagation.h" +#include "compiler/Alias.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StackFrame.h" +#include "compiler/UseDefChains.h" +#include "compiler/objects.h" + +int propagatedconstants; +static int changed; +static PCode **defininginstruction; +static PCode **vrdefininginstruction; + +static void computedefininginstructions(PCodeBlock *block) { + RegUseOrDef *list; + PCode *instr; + int i; + + for (i = 0; i < used_virtual_registers[RegClass_GPR]; i++) { + instr = NULL; + for (list = reg_Defs[RegClass_GPR][i]; list; list = list->next) { + if (bitvectorgetbit(list->id, usedefinfo[block->blockIndex].defvec8)) { + if (instr == NULL) { + instr = Defs[list->id].pcode; + } else { + instr = NULL; + break; + } + } + } + defininginstruction[i] = instr; + } + + for (i = 0; i < used_virtual_registers[RegClass_VR]; i++) { + instr = NULL; + for (list = reg_Defs[RegClass_VR][i]; list; list = list->next) { + if (bitvectorgetbit(list->id, usedefinfo[block->blockIndex].defvec8)) { + if (instr == NULL) { + instr = Defs[list->id].pcode; + } else { + instr = NULL; + break; + } + } + } + vrdefininginstruction[i] = instr; + } +} + +static PCode *isstackoperand(PCodeArg *op, SInt16 *resultValue, SInt16 addend) { + PCode *instr; + + if ((instr = defininginstruction[op->data.reg.reg]) && instr->op == PC_ADDI) { + if ( + instr->args[2].kind == PCOp_MEMORY && + (instr->args[1].data.reg.reg == _FP_ || instr->args[1].data.reg.reg == _CALLER_SP_) && + instr->args[2].data.mem.obj->datatype == DLOCAL + ) + { + if (can_add_displ_to_local(instr->args[2].data.mem.obj, addend)) { + *resultValue = instr->args[2].data.mem.offset; + return instr; + } else { + return NULL; + } + } else { + return NULL; + } + } else { + return NULL; + } +} + +static int isconstantoperand(PCodeArg *op, SInt16 *resultValue) { + PCode *instr; + + if ( + (instr = defininginstruction[op->data.reg.reg]) && + instr->op == PC_LI && + instr->args[1].kind == PCOp_IMMEDIATE + ) + { + *resultValue = instr->args[1].data.imm.value; + return 1; + } else { + return 0; + } +} + +static int isuint16constantoperand(PCodeArg *op, SInt16 *resultValue) { + PCode *instr; + + if ( + (instr = defininginstruction[op->data.reg.reg]) && + instr->op == PC_LI && + instr->args[1].kind == PCOp_IMMEDIATE && + FITS_IN_USHORT(instr->args[1].data.imm.value) + ) + { + *resultValue = instr->args[1].data.imm.value; + return 1; + } else { + return 0; + } +} + +static int isvectorconstantoperand(PCodeArg *op, SInt16 *resultValue, Opcode *resultNewOp) { + PCode *instr; + + if ( + (instr = vrdefininginstruction[op->data.reg.reg]) && + (instr->op == PC_VSPLTISB || instr->op == PC_VSPLTISH || instr->op == PC_VSPLTISW) && + instr->args[1].kind == PCOp_IMMEDIATE + ) + { + *resultValue = instr->args[1].data.imm.value; + *resultNewOp = instr->op; + return 1; + } else { + return 0; + } +} + +static int isunsignedloadoperand(PCodeArg *op) { + PCode *instr; + + if ((instr = defininginstruction[op->data.reg.reg])) { + if (instr->flags & fIsRead) { + if (instr->op >= PC_LHZ && instr->op <= PC_LHZUX) + return 2; + if (instr->op >= PC_LBZ && instr->op <= PC_LBZUX) + return 1; + } else if (instr->op == PC_RLWINM) { + int var3 = instr->args[3].data.imm.value; + int var4 = instr->args[4].data.imm.value; + if (var4 == 31) { + if (var3 == 24) + return 1; + if (var3 == 16) + return 2; + } + } + } + + return 0; +} + +static int ismaskedoperand(PCodeArg *op, UInt32 *resultMask) { + PCode *instr; + UInt32 mask; + + if ((instr = defininginstruction[op->data.reg.reg]) && instr->op == PC_RLWINM) { + if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) { + mask = + ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) & + ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1))); + } else { + mask = + ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) | + ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1))); + } + *resultMask = mask; + return 1; + } + + return 0; +} + +static int issignedloadoperand(PCodeArg *op) { + PCode *instr; + + if ((instr = defininginstruction[op->data.reg.reg])) { + if (instr->flags & fIsRead) { + if (instr->op >= PC_LHA && instr->op <= PC_LHAUX) + return 2; + } else if (instr->op == PC_EXTSB) { + return 1; + } else if (instr->op == PC_EXTSH) { + return 2; + } + } + + return 0; +} + +static void propagateconstantstoblock(PCodeBlock *block) { + PCode *instr; + SInt16 immAddend; + SInt16 value1; + SInt16 valueU16; + Opcode newOpcode; + SInt16 value2; + UInt32 mask; + UInt32 mask2; + int loadSize; + PCodeArg *op; + int i; + + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + switch (instr->op) { + case PC_MR: + if (isconstantoperand(&instr->args[1], &value1)) { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value1; + instr->args[1].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } + break; + case PC_VMR: + if (isvectorconstantoperand(&instr->args[1], &value1, &newOpcode)) { + change_opcode(instr, newOpcode); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value1; + instr->args[1].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } + break; + case PC_RLWINM: + if ( + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + instr->args[2].data.imm.value == 0 && + instr->args[4].data.imm.value == 31 + ) + { + if (isconstantoperand(&instr->args[1], &value1)) { + if ( + (instr->args[3].data.imm.value == 16 && value1 == (value1 & 0x7FFF)) || + (instr->args[3].data.imm.value == 24 && value1 == (value1 & 0xFF)) + ) + { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + break; + } + } + + loadSize = isunsignedloadoperand(&instr->args[1]); + if ( + (loadSize == 2 && instr->args[3].data.imm.value <= 16) || + (loadSize == 1 && instr->args[3].data.imm.value <= 24) + ) + { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + break; + } + + if (ismaskedoperand(&instr->args[1], &mask)) { + if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) { + mask2 = + ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) & + ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1))); + } else { + mask2 = + ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) | + ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1))); + } + if (mask == (mask & mask2)) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + } + } + break; + + case PC_EXTSH: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + if (isconstantoperand(&instr->args[1], &value1)) { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + break; + } + + loadSize = issignedloadoperand(&instr->args[1]); + if (loadSize == 1 || loadSize == 2) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + break; + + case PC_EXTSB: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + if ( + isconstantoperand(&instr->args[1], &value1) && + value1 >= -128 && + value1 <= 127 + ) + { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + break; + } + + loadSize = issignedloadoperand(&instr->args[1]); + if (loadSize == 1) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + break; + + case PC_ADDI: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + immAddend = instr->args[2].data.imm.value; + if ( + isconstantoperand(&instr->args[1], &value1) && + FITS_IN_SHORT(immAddend + value1) + ) + { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = immAddend + value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + break; + + case PC_ADD: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + if (isconstantoperand(&instr->args[2], &value1)) { + if (value1 == 0) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + } else { + change_opcode(instr, PC_ADDI); + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = value1; + instr->args[2].data.imm.obj = NULL; + } + propagatedconstants = 1; + changed = 1; + immAddend = value1; + } + + if (isconstantoperand(&instr->args[1], &value1)) { + if (instr->op == PC_ADDI || instr->op == PC_MR) { + if (FITS_IN_SHORT(immAddend + value1)) { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = immAddend + value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + } else { + instr->args[1] = instr->args[2]; + if (value1 == 0) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + } else { + change_opcode(instr, PC_ADDI); + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = value1; + instr->args[2].data.imm.obj = NULL; + } + propagatedconstants = 1; + changed = 1; + } + } + + if (changed) { + if (instr->op == PC_MR) { + PCode *stackInstr; + if ((stackInstr = isstackoperand(&instr->args[1], &value1, 0))) { + change_opcode(instr, PC_ADDI); + instr->flags = stackInstr->flags; + instr->args[1] = stackInstr->args[1]; + instr->args[2] = stackInstr->args[2]; + change_num_operands(instr, 3); + propagatedconstants = 1; + changed = 1; + } + } else if (instr->op == PC_ADDI && instr->args[2].kind == PCOp_IMMEDIATE) { + PCode *stackInstr; + SInt16 addend = instr->args[2].data.imm.value; + if ((stackInstr = isstackoperand(&instr->args[1], &value1, addend))) { + change_opcode(instr, PC_ADDI); + instr->flags = stackInstr->flags; + instr->args[1] = stackInstr->args[1]; + instr->args[2] = stackInstr->args[2]; + instr->args[2].data.imm.value = value1 + addend; + if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) + instr->alias = make_alias(instr->args[2].data.imm.obj, instr->args[2].data.imm.value, 1); + propagatedconstants = 1; + changed = 1; + } + } + } + break; + + case PC_OR: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + value1 = 0; + immAddend = 0; + if (isconstantoperand(&instr->args[2], &value1)) { + if (isuint16constantoperand(&instr->args[2], &valueU16)) { + if (valueU16 != 0) { + change_opcode(instr, PC_ORI); + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = valueU16; + instr->args[2].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } else { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + value1 = valueU16; + } else if (value1 == 0) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + immAddend = value1; + } + + if (isconstantoperand(&instr->args[1], &value1)) { + if (instr->op == PC_ORI || instr->op == PC_MR) { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = immAddend | value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } else if (isuint16constantoperand(&instr->args[1], &valueU16)) { + if (valueU16 != 0) { + change_opcode(instr, PC_ORI); + instr->args[1] = instr->args[2]; + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = valueU16; + instr->args[2].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } else { + change_opcode(instr, PC_MR); + instr->args[1] = instr->args[2]; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + } else if (value1 == 0) { + change_opcode(instr, PC_MR); + instr->args[1] = instr->args[2]; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + } + break; + + case PC_SUBF: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + if (isconstantoperand(&instr->args[1], &value1) && FITS_IN_SHORT(-value1)) { + if (isconstantoperand(&instr->args[2], &value2) && FITS_IN_SHORT(value2 - value1)) { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value2 - value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + } else if (value1 == 0) { + change_opcode(instr, PC_MR); + instr->args[1] = instr->args[2]; + change_num_operands(instr, 2); + } else { + change_opcode(instr, PC_ADDI); + instr->args[1] = instr->args[2]; + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = -value1; + instr->args[2].data.imm.obj = NULL; + } + propagatedconstants = 1; + changed = 1; + value2 = value1; + } else if (isconstantoperand(&instr->args[2], &value1) && FITS_IN_SHORT(-value1)) { + if (value1 == 0) { + change_opcode(instr, PC_NEG); + change_num_operands(instr, 2); + } else { + instr->flags = opcodeinfo[PC_SUBFIC].flags | (instr->flags & ~opcodeinfo[PC_SUBF].flags); + change_opcode(instr, PC_SUBFIC); + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = value1; + instr->args[2].data.imm.obj = NULL; + instr->args[3].kind = PCOp_REGISTER; + instr->args[3].arg = RegClass_SPR; + instr->args[3].data.reg.reg = 0; + instr->args[3].data.reg.effect = EffectWrite; + change_num_operands(instr, 4); + } + propagatedconstants = 1; + changed = 1; + } + + break; + + case PC_LBZ: + case PC_LHZ: + case PC_LHA: + case PC_LWZ: + case PC_STB: + case PC_STH: + case PC_STW: + case PC_LFS: + case PC_LFD: + case PC_STFS: + case PC_STFD: + if (instr->args[2].kind == PCOp_IMMEDIATE) { + PCode *stackInstr; + SInt16 addend = instr->args[2].data.imm.value; + + if ((stackInstr = isstackoperand(&instr->args[1], &value1, addend))) { + instr->args[1] = stackInstr->args[1]; + instr->args[2] = stackInstr->args[2]; + instr->args[2].data.imm.value = value1 + addend; + if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) + instr->alias = make_alias(instr->args[2].data.imm.obj, instr->args[2].data.imm.value, + nbytes_loaded_or_stored_by(instr)); + propagatedconstants = 1; + changed = 1; + } + } + break; + + case PC_LBZX: + case PC_LHZX: + case PC_LHAX: + case PC_LWZX: + case PC_STBX: + case PC_STHX: + case PC_STWX: + case PC_LFSX: + case PC_LFDX: + case PC_STFSX: + case PC_STFDX: + if (isconstantoperand(&instr->args[2], &value1)) { + instr->op -= 2; + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = value1; + instr->args[2].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } else if (isconstantoperand(&instr->args[1], &value1)) { + instr->op -= 2; + instr->args[1] = instr->args[2]; + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = value1; + instr->args[2].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } + + break; + } + + for (i = 0, op = instr->args; i < instr->argCount; i++, op++) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) + ) + { + defininginstruction[op->data.reg.reg] = instr; + } + else if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_VR && + (op->data.reg.effect & EffectWrite) + ) + { + vrdefininginstruction[op->data.reg.reg] = instr; + } + } + } +} + +void propagateconstants(void) { + PCodeBlock *block; + int i; + + propagatedconstants = 0; + computeusedefchains(0); + defininginstruction = galloc(sizeof(PCode *) * used_virtual_registers[RegClass_GPR]); + vrdefininginstruction = galloc(sizeof(PCode *) * used_virtual_registers[RegClass_VR]); + + do { + changed = 0; + for (i = 0; i < pcblockcount; i++) { + if ((block = depthfirstordering[i])) { + computedefininginstructions(block); + propagateconstantstoblock(block); + } + } + } while (changed); + + freeoheap(); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopDetection.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopDetection.c new file mode 100644 index 0000000..6bb2d51 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopDetection.c @@ -0,0 +1,885 @@ +#include "compiler/LoopDetection.h" +#include "compiler/CFunc.h" +#include "compiler/PCode.h" +#include "compiler/TOC.h" +#include "compiler/UseDefChains.h" +#include "compiler/CompilerTools.h" +#include "compiler/BitVectors.h" +#include "compiler/enode.h" +#include "compiler/objects.h" + +Loop *loopsinflowgraph; +int loopdetection_nblocks; +static UInt32 **dominators; +static BlockList *loopheaders; +static int nloopheaders; +static PCodeBlock **loopstack; +BitVector *LoopTemp; +struct LoopList *LoopList_First; + +static void computedominators(void) { + int i; + PCodeBlock *block; + int blockCount; + int flag; + UInt32 *myvec; + PCLink *link; + + blockCount = pcblockcount; + flag = 1; + + dominators = oalloc(sizeof(UInt32 *) * pcblockcount); + for (i = 0; i < pcblockcount; i++) + dominators[i] = oalloc(4 * ((blockCount + 31) >> 5)); + + myvec = oalloc(4 * ((blockCount + 31) >> 5)); + + bitvectorinitialize(dominators[pcbasicblocks->blockIndex], blockCount, 0); + //dominators[pcbasicblocks->blockIndex][0] |= 1; + bitvectorsetbit(0, dominators[pcbasicblocks->blockIndex]); + + for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) + bitvectorinitialize(dominators[block->blockIndex], blockCount, 0xFFFFFFFF); + + computedepthfirstordering(); + + while (flag) { + flag = 0; + for (i = 0; i < pcblockcount; i++) { + block = depthfirstordering[i]; + if (block && block->blockIndex != pcbasicblocks->blockIndex) { + bitvectorcopy(myvec, dominators[block->predecessors->block->blockIndex], blockCount); + for (link = block->predecessors->nextLink; link; link = link->nextLink) + bitvectorintersect(myvec, dominators[link->block->blockIndex], blockCount); + //myvec[block->blockIndex >> 5] |= 1 << (block->blockIndex & 31); + bitvectorsetbit(block->blockIndex, myvec); + + if (bitvectorchanged(dominators[block->blockIndex], myvec, blockCount)) + flag = 1; + } + } + } +} + +static BlockList *findloopheaders(void) { + PCodeBlock *block; + PCLink *link; + BlockList *list; + + loopheaders = NULL; + nloopheaders = 0; + + for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) { + for (link = block->predecessors; link; link = link->nextLink) { + //if ((1 << (block->blockIndex & 31)) & dominators[link->block->blockIndex][block->blockIndex >> 5]) + if (bitvectorgetbit(block->blockIndex, dominators[link->block->blockIndex])) + break; + } + + if (link) { + list = oalloc(sizeof(BlockList)); + list->block = block; + list->next = loopheaders; + loopheaders = list; + nloopheaders++; + } + } + + return loopheaders; +} + +void addblocktoloop(Loop *loop, PCodeBlock *block) { + BlockList *list = lalloc(sizeof(BlockList)); + + //loop->memberblocks[block->blockIndex >> 5] |= 1 << (block->blockIndex & 31); + bitvectorsetbit(block->blockIndex, loop->memberblocks); + + list->block = block; + list->next = loop->blocks; + loop->blocks = list; +} + +static void findnaturalloop(Loop *loop) { + BlockList *list; + BlockList *list2; + PCLink *link; + PCodeBlock *block; + int i; + + i = 0; + addblocktoloop(loop, loop->body); + for (link = loop->body->predecessors; link; link = link->nextLink) { + if (bitvectorgetbit(loop->body->blockIndex, dominators[link->block->blockIndex]) && link->block != loop->body) { + addblocktoloop(loop, link->block); + loopstack[i++] = link->block; + } + } + + while (i) { + link = loopstack[--i]->predecessors; + while (link) { + if (!bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) { + addblocktoloop(loop, link->block); + loopstack[i++] = link->block; + } + link = link->nextLink; + } + } + + for (list = loop->blocks; list; list = list->next) { + block = list->block; + for (link = block->successors; link; link = link->nextLink) { + if (!bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) { + bitvectorsetbit(block->blockIndex, loop->vec24); + break; + } + } + } + + for (list = loop->blocks; list; list = list->next) { + for (list2 = loop->blocks; list2; list2 = list2->next) { + if (bitvectorgetbit(list2->block->blockIndex, loop->vec24) && + !bitvectorgetbit(list->block->blockIndex, dominators[list2->block->blockIndex])) + break; + } + + if (!list2) + bitvectorsetbit(list->block->blockIndex, loop->vec28); + } + + for (list = loop->blocks; list; list = list->next) { + for (link = loop->body->predecessors; link; link = link->nextLink) { + if (bitvectorgetbit(link->block->blockIndex, loop->memberblocks) && + !bitvectorgetbit(list->block->blockIndex, dominators[link->block->blockIndex])) + break; + } + + if (!link) + bitvectorsetbit(list->block->blockIndex, loop->vec2C); + } +} + +static void addlooptolist(Loop *loop, Loop **list) { + Loop **scan; + Loop *scanloop; + + scan = list; + while ((scanloop = *scan)) { + if (bitvectorgetbit(loop->body->blockIndex, scanloop->memberblocks)) { + loop->parent = scanloop; + addlooptolist(loop, &scanloop->children); + return; + } + + if (bitvectorgetbit(scanloop->body->blockIndex, loop->memberblocks)) { + *scan = scanloop->nextSibling; + scanloop->parent = loop; + scanloop->nextSibling = loop->children; + loop->children = scanloop; + } else { + scan = &scanloop->nextSibling; + } + } + + loop->nextSibling = *list; + *list = loop; +} + +static void findnaturalloops(void) { + Loop *loop; + int size; + + loopdetection_nblocks = pcblockcount + 5 * nloopheaders; + loopstack = oalloc(sizeof(PCodeBlock *) * pcblockcount); + while (loopheaders) { + loop = lalloc(sizeof(Loop)); + loop->parent = loop->nextSibling = loop->children = NULL; + loop->body = loopheaders->block; + loop->preheader = NULL; + loop->blocks = NULL; + loop->basicInductionVars = NULL; + loop->footer = NULL; + loop->pc18 = NULL; + loop->loopWeight = loop->body->loopWeight; + + bitvectorinitialize(loop->memberblocks = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(loop->vec24 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(loop->vec28 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(loop->vec2C = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + + findnaturalloop(loop); + addlooptolist(loop, &loopsinflowgraph); + + loopheaders = loopheaders->next; + } +} + +static PCodeBlock *makepreheaderblock(void) { + PCodeLabel *label; + PCodeBlock *block; + + label = makepclabel(); + block = lalloc(sizeof(PCodeBlock)); + block->nextBlock = NULL; + block->prevBlock = NULL; + block->labels = NULL; + block->successors = NULL; + block->predecessors = NULL; + block->firstPCode = block->lastPCode = NULL; + block->pcodeCount = 0; + block->flags = 0; + block->blockIndex = pcblockcount++; + pclabel(block, label); + return block; +} + +static void insertpreheaderbefore(PCodeBlock *a, PCodeBlock *b) { + a->nextBlock = b; + a->prevBlock = b->prevBlock; + b->prevBlock->nextBlock = a; + b->prevBlock = a; +} + +void insertpreheaderblock(Loop *loop) { + PCodeBlock *preheader; + PCodeBlock *block29; + PCodeBlock *block28; + PCode *pcode27; + PCLink *link; // r26 + PCLink **linkptr; // r25 + PCodeLabel *newlabel; // r23 + PCLink *innerlink; + PCodeBlock *block; + PCodeArg *arg; + int i; + + preheader = loop->preheader = makepreheaderblock(); + block29 = NULL; + block28 = loop->body; + + if (!block28->labels) + pclabel(block28, makepclabel()); + + appendpcode(preheader, makepcode(PC_B, block28->labels)); + preheader->loopWeight = loop->parent ? loop->parent->loopWeight : 1; + + linkptr = &block28->predecessors; + while ((link = *linkptr)) { + if (bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) { + linkptr = &link->nextLink; + } else { + if (link->block->pcodeCount) { + pcode27 = link->block->lastPCode; + if (pcode27->op == PC_B) { + CError_ASSERT(462, pcode27->args[0].kind == PCOp_LABEL); + if (pcode27->args[0].data.label.label->block == block28) + pcode27->args[0].data.label.label = preheader->labels; + } else if (pcode27->op == PC_BT || pcode27->op == PC_BF) { + CError_ASSERT(474, pcode27->args[2].kind == PCOp_LABEL); + if (pcode27->args[2].data.label.label->block == block28) + pcode27->args[2].data.label.label = preheader->labels; + } else if (pcode27->op == PC_BCTR) { + if (pcode27->argCount > 1 && pcode27->args[1].kind == PCOp_MEMORY) { + Object *obj = pcode27->args[1].data.mem.obj; + UInt32 *array = (UInt32 *) obj->u.data.u.switchtable.data; + int i; + for (i = 0; i < obj->u.data.u.switchtable.size; i++) { + if (((PCodeLabel *) CTool_ResolveIndexToPointer(array[i]))->block == block28) + array[i] = CTool_CreateIndexFromPointer(preheader->labels); + } + } else { + CodeLabelList *cll; + for (cll = codelabellist; cll; cll = cll->next) { + if (cll->label->pclabel->block == block28) + cll->label->pclabel = preheader->labels; + } + } + } else { + CError_ASSERT(505, link->block->nextBlock == block28); + } + } + + for (innerlink = link->block->successors; innerlink; innerlink = innerlink->nextLink) { + if (innerlink->block == block28) + innerlink->block = preheader; + } + + *linkptr = link->nextLink;; + link->nextLink = preheader->predecessors; + preheader->predecessors = link; + } + } + + if (!bitvectorgetbit(block28->prevBlock->blockIndex, loop->memberblocks)) { + insertpreheaderbefore(preheader, block28); + + if ( + (!block28->nextBlock || !bitvectorgetbit(block28->nextBlock->blockIndex, loop->memberblocks)) && + block28->lastPCode && + (block28->lastPCode->flags & fIsBranch) && + block28->lastPCode->op != PC_BDNZ + ) { + i = block28->lastPCode->argCount; + arg = block28->lastPCode->args; + while (i && arg->kind != PCOp_LABEL) { + arg++; + i--; + } + + if (i && arg->kind == PCOp_LABEL && arg->data.label.label->block == block28) { + block29 = makepreheaderblock(); + insertpreheaderbefore(block29, block28); + newlabel = makepclabel(); + pclabel(block29, newlabel); + arg->data.label.label = newlabel; + + link = lalloc(sizeof(PCLink)); + link->block = block28; + link->nextLink = block29->predecessors; + block29->predecessors = link; + + link = lalloc(sizeof(PCLink)); + link->block = block28; + link->nextLink = block29->successors; + block29->successors = link; + + for (link = block28->successors; link; link = link->nextLink) { + if (link->block == block28) + link->block = block29; + } + for (link = block28->predecessors; link; link = link->nextLink) { + if (link->block == block28) + link->block = block29; + } + + bitvectorsetbit(block29->blockIndex, loop->vec2C); + addblocktoloop(loop, block29); + } + } + } else { + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (bitvectorgetbit(block->blockIndex, loop->memberblocks)) + break; + } + insertpreheaderbefore(preheader, block); + } + + link = lalloc(sizeof(PCLink)); + link->block = preheader; + link->nextLink = block28->predecessors; + block28->predecessors = link; + + link = lalloc(sizeof(PCLink)); + link->block = block28; + link->nextLink = preheader->successors; + preheader->successors = link; + + for (loop = loop->parent; loop; loop = loop->parent) { + addblocktoloop(loop, preheader); + if (bitvectorgetbit(block28->blockIndex, loop->vec28)) { + bitvectorsetbit(preheader->blockIndex, loop->vec28); + if (block29) + bitvectorsetbit(block29->blockIndex, loop->vec28); + } + if (bitvectorgetbit(block28->blockIndex, loop->vec2C)) { + bitvectorsetbit(preheader->blockIndex, loop->vec2C); + if (block29) + bitvectorsetbit(block29->blockIndex, loop->vec2C); + } + } +} + +static void insertpreheaderblocks(Loop *loop) { + while (loop) { + if (loop->children) + insertpreheaderblocks(loop->children); + insertpreheaderblock(loop); + loop = loop->nextSibling; + } +} + +void findloopsinflowgraph(void) { + loopsinflowgraph = NULL; + computedominators(); + if (findloopheaders()) { + findnaturalloops(); + insertpreheaderblocks(loopsinflowgraph); + } + freeoheap(); +} + +static int checklooplimits(SInt32 opcode, SInt32 condition, SInt32 c, SInt32 d, SInt32 addend, SInt32 *result) { + if (opcode == PC_BT) { + if (condition == 0) { + if (addend <= 0) + return 0; + if (c < d) + *result = (d - c + addend - 1) / addend; + else + *result = 0; + } else if (condition == 1) { + if (addend >= 0) + return 0; + if (c > d) + *result = (c - d - addend - 1) / -addend; + else + *result = 0; + } else { + return 0; + } + } else { + if (condition == 0) { + if (addend >= 0) + return 0; + if (c >= d) + *result = (c - d - addend) / -addend; + else + *result = 0; + } else if (condition == 1) { + if (addend <= 0) + return 0; + if (c <= d) + *result = (d - c + addend) / addend; + else + *result = 0; + } else if (c < d) { + if (addend <= 0) + return 0; + if ((d - c) % addend) + return 0; + *result = (d - c) / addend; + } else if (c > d) { + if (addend >= 0) + return 0; + if ((c - d) % -addend) + return 0; + *result = (c - d) / -addend; + } else { + *result = 0; + } + } + + return 1; +} + +static int checkunsignedlooplimits(SInt32 opcode, SInt32 condition, UInt32 c, UInt32 d, SInt32 addend, UInt32 *result) { + if (opcode == PC_BT) { + if (condition == 0) { + if (addend <= 0) + return 0; + if (c < d) + *result = (d - c + addend - 1) / addend; + else + *result = 0; + } else if (condition == 1) { + if (addend >= 0) + return 0; + if (c > d) + *result = (c - d - addend - 1) / -addend; + else + *result = 0; + } else { + return 0; + } + } else { + if (condition == 0) { + if (addend >= 0) + return 0; + if (c >= d) + *result = (c - d - addend) / -addend; + else + *result = 0; + } else if (condition == 1) { + if (addend <= 0) + return 0; + if (c <= d) + *result = (d - c + addend) / addend; + else + *result = 0; + } else if (c < d) { + if (addend <= 0) + return 0; + if ((d - c) % addend) + return 0; + *result = (d - c) / addend; + } else if (c > d) { + if (addend >= 0) + return 0; + if ((c - d) % -addend) + return 0; + *result = (c - d) / -addend; + } else { + *result = 0; + } + } + + return (*result & 0x80000000) == 0; +} + +static int checkunknownloop(int a, int b, int c, unsigned char *op) { + if (a == PC_BT) { + if (b == 0) { + if (c <= 0) + return 0; + *op = ELESS; + } else if (b == 1) { + if (c >= 0) + return 0; + *op = EGREATER; + } else { + return 0; + } + } else { + if (b == 0) { + if (c >= 0) + return 0; + *op = EGREATEREQU; + } else if (b == 1) { + if (c <= 0) + return 0; + *op = ELESSEQU; + } else if (c == 1) { + *op = ENOTEQU; + } else if (c == -1) { + *op = ENOTEQU; + } else { + return 0; + } + } + + return 1; +} + +static void checkcountingloop(Loop *loop) { + RegUseOrDef *list; + PCode *lastpcode; + PCode *prevpcode; + PCode *pc8; + PCode *check; + short op12; + short reg11; + SInt16 reg4; + short reg11b; + Loop *child; + + if (!(lastpcode = loop->body->lastPCode)) + return; + if (lastpcode->op != PC_BT && lastpcode->op != PC_BF) + return; + if (lastpcode->args[2].kind != PCOp_LABEL) + return; + + if (!bitvectorgetbit(lastpcode->args[2].data.label.label->block->blockIndex, loop->memberblocks)) + return; + if (bitvectorgetbit(loop->body->nextBlock->blockIndex, loop->memberblocks)) + return; + + reg11 = lastpcode->args[0].data.reg.reg; + reg4 = lastpcode->args[1].data.imm.value; + prevpcode = lastpcode->prevPCode; + if (!prevpcode) + return; + + op12 = prevpcode->op; + if (op12 == PC_ADDI && prevpcode->args[2].kind == PCOp_IMMEDIATE) { + pc8 = prevpcode; + prevpcode = prevpcode->prevPCode; + if (!prevpcode) + return; + + op12 = prevpcode->op; + if (pc8->args[0].data.reg.reg != pc8->args[1].data.reg.reg) + return; + if (op12 != PC_CMP && op12 != PC_CMPL && op12 != PC_CMPI && op12 != PC_CMPLI) + return; + if (prevpcode->args[1].data.reg.reg != pc8->args[0].data.reg.reg) + return; + if ((loop->step = pc8->args[2].data.imm.value) == 0) + return; + } + + if (op12 != PC_CMP && op12 != PC_CMPL && op12 != PC_CMPI && op12 != PC_CMPLI) + return; + + if (prevpcode->args[0].data.reg.reg != reg11) + return; + + reg11b = prevpcode->args[1].data.reg.reg; + if (reg11b < 32) + return; + + if (loop->preheader->nextBlock != lastpcode->args[2].data.label.label->block) + return; + + if (op12 == PC_CMPI) { + if (prevpcode->prevPCode) + return; + loop->upper = prevpcode->args[2].data.imm.value; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if (op12 == PC_CMPLI) { + if (prevpcode->prevPCode) + return; + loop->upper = prevpcode->args[2].data.imm.value & 0xFFFF; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if (op12 == PC_CMP || op12 == PC_CMPL) { + if (prevpcode->prevPCode) { + if ( + prevpcode->prevPCode->op == PC_LI && + prevpcode->prevPCode->args[1].kind == PCOp_IMMEDIATE && + prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg && + !prevpcode->prevPCode->prevPCode + ) { + loop->upper = prevpcode->prevPCode->args[1].data.imm.value; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if ( + prevpcode->prevPCode->op == PC_LIS && + prevpcode->prevPCode->args[1].kind == PCOp_IMMEDIATE && + prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg && + !prevpcode->prevPCode->prevPCode + ) { + loop->upper = prevpcode->prevPCode->args[1].data.imm.value << 16; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if ( + prevpcode->prevPCode->op == PC_ADDI && + prevpcode->prevPCode->args[2].kind == PCOp_IMMEDIATE && + prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg && + prevpcode->prevPCode->args[1].data.reg.reg == prevpcode->args[2].data.reg.reg && + prevpcode->prevPCode->prevPCode && + prevpcode->prevPCode->prevPCode->op == PC_LIS && + prevpcode->prevPCode->prevPCode->args[1].kind == PCOp_IMMEDIATE && + prevpcode->prevPCode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg && + !prevpcode->prevPCode->prevPCode->prevPCode + ) { + loop->upper = prevpcode->prevPCode->args[2].data.imm.value + + (prevpcode->prevPCode->prevPCode->args[1].data.imm.value << 16); + loop->upperType = LOOP_BOUND_CONSTANT; + } else { + return; + } + } else { + pc8 = NULL; + for (list = reg_Defs[RegClass_GPR][prevpcode->args[2].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) + return; + } + for (list = reg_Defs[RegClass_GPR][prevpcode->args[2].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].defvec8)) { + if (!pc8) { + pc8 = Defs[list->id].pcode; + if ( + pc8->op == PC_LI && + pc8->args[1].kind == PCOp_IMMEDIATE + ) { + loop->upper = pc8->args[1].data.imm.value; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if ( + pc8->op == PC_LIS && + pc8->args[1].kind == PCOp_IMMEDIATE + ) { + loop->upper = pc8->args[1].data.imm.value << 16; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if ( + pc8->op == PC_ADDI && + pc8->args[2].kind == PCOp_IMMEDIATE && + pc8->args[1].data.reg.reg == prevpcode->args[2].data.reg.reg && + pc8->prevPCode && + pc8->prevPCode->op == PC_LIS && + pc8->prevPCode->args[1].kind == PCOp_IMMEDIATE && + pc8->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg + ) { + loop->upper = pc8->args[2].data.imm.value + + (pc8->prevPCode->args[1].data.imm.value << 16); + loop->upperType = LOOP_BOUND_CONSTANT; + } else { + loop->upperType = LOOP_BOUND_VARIABLE; + break; + } + } else { + loop->upperType = LOOP_BOUND_VARIABLE; + break; + } + } + } + + if (loop->upperType == LOOP_BOUND_INDETERMINATE) + loop->upperType = LOOP_BOUND_VARIABLE; + } + } + + pc8 = NULL; + + for (list = reg_Defs[RegClass_GPR][reg11b]; list; list = list->next) { + check = Defs[list->id].pcode; + if (bitvectorgetbit(check->block->blockIndex, loop->memberblocks)) { + if (!pc8) { + pc8 = check; + if (check->op != PC_ADDI) + return; + if (check->args[1].data.reg.reg != reg11b) + return; + if (check->args[2].kind != PCOp_IMMEDIATE) + return; + if ((loop->step = check->args[2].data.imm.value) == 0) + return; + } else { + return; + } + } + } + + if (!pc8) + return; + + if (pc8->block != prevpcode->block && !bitvectorgetbit(prevpcode->block->blockIndex, loop->vec2C)) + return; + + if (loop->children) { + for (child = loop->children; child; child = child->nextSibling) { + if (bitvectorgetbit(pc8->block->blockIndex, child->memberblocks)) + return; + } + } + + loop->pc18 = pc8; + + pc8 = NULL; + + for (list = reg_Defs[RegClass_GPR][reg11b]; list; list = list->next) { + if (bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].defvec8)) { + if (!pc8) { + pc8 = Defs[list->id].pcode; + if ( + pc8->op == PC_LI && + pc8->args[1].kind == PCOp_IMMEDIATE + ) { + loop->lower = pc8->args[1].data.imm.value; + loop->lowerType = LOOP_BOUND_CONSTANT; + } else if ( + pc8->op == PC_LIS && + pc8->args[1].kind == PCOp_IMMEDIATE + ) { + loop->lower = pc8->args[1].data.imm.value << 16; + loop->lowerType = LOOP_BOUND_CONSTANT; + } else if ( + pc8->op == PC_ADDI && + pc8->args[2].kind == PCOp_IMMEDIATE && + pc8->args[1].data.reg.reg == reg11b && + pc8->prevPCode && + pc8->prevPCode->op == PC_LIS && + pc8->prevPCode->args[1].kind == PCOp_IMMEDIATE && + pc8->prevPCode->args[0].data.reg.reg == reg11b + ) { + loop->lower = pc8->args[2].data.imm.value + + (pc8->prevPCode->args[1].data.imm.value << 16); + loop->lowerType = LOOP_BOUND_CONSTANT; + } else { + loop->lowerType = LOOP_BOUND_VARIABLE; + break; + } + } else { + loop->lowerType = LOOP_BOUND_INDETERMINATE; + break; + } + } + } + + if (loop->lowerType == LOOP_BOUND_INDETERMINATE) + loop->lowerType = LOOP_BOUND_VARIABLE; + + if (loop->lowerType == LOOP_BOUND_CONSTANT && loop->upperType == LOOP_BOUND_CONSTANT) { + if (op12 == PC_CMP || op12 == PC_CMPI) { + if (!checklooplimits(lastpcode->op, reg4, loop->lower, loop->upper, loop->step, &loop->iterationCount)) + return; + } else { + if (!checkunsignedlooplimits(lastpcode->op, reg4, loop->lower, loop->upper, loop->step, (UInt32 *) &loop->iterationCount)) + return; + } + loop->isKnownCountingLoop = 1; + } else if (loop->lowerType != LOOP_BOUND_INDETERMINATE || loop->upperType != LOOP_BOUND_INDETERMINATE) { + if (!checkunknownloop(lastpcode->op, reg4, loop->step, &loop->unknownCondition)) + return; + loop->isUnknownCountingLoop = 1; + } +} + +void analyzeForCountableLoops(Loop *loop) { + if (!loop) + return; + + while (loop) { + if (loop->children) + analyzeForCountableLoops(loop->children); + checkcountingloop(loop); + loop = loop->nextSibling; + } +} + +void analyzeloop(Loop *loop) { + BlockList *list; + PCodeBlock *block; + PCode *pcode; + + loop->bodySize = 0; + loop->x4D = 0; + loop->x4E = 0; + loop->x4F = 1; + loop->isKnownCountingLoop = 0; + loop->isUnknownCountingLoop = 0; + loop->lowerType = LOOP_BOUND_INDETERMINATE; + loop->upperType = LOOP_BOUND_INDETERMINATE; + loop->iterationCount = -1; + loop->x57 = 0; + loop->x52 = 0; + + for (list = loop->blocks; list; list = list->next) { + block = list->block; + if (!loop->children) + block->flags |= fPCBlockFlag2000; + loop->bodySize += block->pcodeCount; + + if (block != loop->body) { + if (!block->successors || !block->predecessors || block->successors->nextLink || block->predecessors->nextLink) + loop->x4F = 0; + } + + if ((block->flags & fPCBlockFlag4000) == fPCBlockFlag4000) + loop->x52 = 1; + + for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) { + if (PCODE_FLAG_SET_T(pcode) & fLink) + loop->x4D = 1; + + if (pcode->op == PC_BCTRL || pcode->op == PC_BCTR || pcode->op == PC_BCCTR || pcode->op == PC_MTCTR || pcode->op == PC_MFCTR) { + loop->x4E = 1; + } else if (pcode->flags & fIsRead) { + if (pcode->op == PC_LBZX || pcode->op == PC_LHZX || pcode->op == PC_LHAX || pcode->op == PC_LWZX || pcode->op == PC_LFSX || pcode->op == PC_LFDX) + loop->x53 = 1; + } else if (pcode->flags & fIsWrite) { + if (pcode->op == PC_STBX || pcode->op == PC_STHX || pcode->op == PC_STWX || pcode->op == PC_STFSX || pcode->op == PC_STFDX) + loop->x54 = 1; + } else { + if (pcode->op == PC_EIEIO || pcode->op == PC_SYNC || pcode->op == PC_ISYNC) + loop->x57 = 1; + } + } + } + + if (!loop->children && !loop->x4D && loop->bodySize < 32) { + for (list = loop->blocks; list; list = list->next) + list->block->flags |= fPCBlockFlag2000; + } +} + +static void analyzeloops(Loop *loop) { + while (loop) { + if (loop->children) + analyzeloops(loop->children); + analyzeloop(loop); + loop = loop->nextSibling; + } +} + +void analyzeloopsinflowgraph(void) { + if (loopsinflowgraph) + analyzeloops(loopsinflowgraph); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c new file mode 100644 index 0000000..b2aef1e --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c @@ -0,0 +1,1553 @@ +#include "compiler/LoopOptimization.h" +#include "compiler/CFunc.h" +#include "compiler/CParser.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/LoopDetection.h" +#include "compiler/PCode.h" +#include "compiler/Registers.h" +#include "compiler/UseDefChains.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +int optimizedloops; +int optimizedloop_full_unroll; +int optimizedloop_trans_regs; +static UInt32 *liveonexit; +static UInt32 *inductionvariables; +static int last_virtual_GPR; + +static int ispowerof2(SInt32 value) { + int bit = getbit(value); + return (bit > 0 && bit < 31) ? bit : 0; +} + +static void insertupdateinstructions(Loop *loop) { + // nothing +} + +static void computeliveonexit(Loop *loop) { + UInt32 *usevec; + UInt32 *defvec; + BlockList *blocklist; + RegUseOrDef *list; + int gpr; + + bitvectorinitialize(usevec = oalloc(4 * ((number_of_Uses + 31) >> 5)), number_of_Uses, 0); + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + if (bitvectorgetbit(blocklist->block->blockIndex, loop->vec24)) + bitvectorunion(usevec, usedefinfo[blocklist->block->blockIndex].usevec1C, number_of_Uses); + } + + bitvectorinitialize(defvec = oalloc(4 * ((number_of_Defs + 31) >> 5)), number_of_Defs, 0); + if (loop->preheader) + bitvectorunion(defvec, usedefinfo[loop->preheader->blockIndex].defvec8, number_of_Defs); + + bitvectorinitialize(liveonexit, last_virtual_GPR, 0); + + for (gpr = 32; gpr < last_virtual_GPR; gpr++) { + for (list = reg_Defs[RegClass_GPR][gpr]; list; list = list->next) { + if (bitvectorgetbit(list->id, defvec)) { + if (!Defs[list->id].pcode->block || bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) { + bitvectorsetbit(gpr, liveonexit); + break; + } + } + } + + for (list = reg_Uses[RegClass_GPR][gpr]; list; list = list->next) { + if (bitvectorgetbit(list->id, usevec)) { + if (!Uses[list->id].pcode->block || !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) { + bitvectorsetbit(gpr, liveonexit); + break; + } + } + } + } +} + +static void eliminateinductionvariables(Loop *loop) { + BlockList *blocklist; + PCode *instr; + PCode *nextInstr; + PCodeArg *op; + int i; + + bitvectorinitialize(inductionvariables, last_virtual_GPR, 0xFFFFFFFF); + + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + for (instr = blocklist->block->firstPCode; instr; instr = instr->nextPCode) { + if ( + instr->op != PC_ADDI || + instr->args[0].data.reg.reg < 32 || + instr->args[0].data.reg.reg >= last_virtual_GPR || + instr->args[1].data.reg.reg != instr->args[0].data.reg.reg + ) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg >= n_real_registers[RegClass_GPR] && + op->data.reg.reg < last_virtual_GPR + ) + bitvectorclearbit(op->data.reg.reg, inductionvariables); + op++; + } + } + } + } + + if (loop->parent) { + for (instr = loop->preheader->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg >= n_real_registers[RegClass_GPR] && + op->data.reg.reg < last_virtual_GPR + ) + bitvectorsetbit(op->data.reg.reg, liveonexit); + op++; + } + } + } + + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + for (instr = blocklist->block->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + if ( + instr->op == PC_ADDI && + instr->args[0].data.reg.reg >= 32 && + instr->args[0].data.reg.reg < last_virtual_GPR && + instr->args[1].data.reg.reg == instr->args[0].data.reg.reg && + bitvectorgetbit(instr->args[0].data.reg.reg, inductionvariables) && + !bitvectorgetbit(instr->args[0].data.reg.reg, liveonexit) + ) { + deletepcode(instr); + optimizedloops = 1; + } + } + } +} + +static void skiplooptest(Loop *loop) { + PCodeBlock *preheader; + PCodeLabel *label; + PCLink **ptr; + PCLink *link; + PCode *lastInstr; + PCode *instr; + + preheader = loop->preheader; + lastInstr = loop->body->lastPCode; + CError_ASSERT(340, lastInstr->args[2].kind == PCOp_LABEL); + + label = lastInstr->args[2].data.label.label; + preheader->lastPCode->args[0].data.label.label = label; + preheader->successors->block = label->block; + + ptr = &loop->body->predecessors; + while ((link = *ptr)) { + if (link->block == preheader) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + link->nextLink = label->block->predecessors; + label->block->predecessors = link; + + while (1) { + instr = loop->body->firstPCode; + CError_ASSERT(369, instr); + + if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) + break; + + deletepcode(instr); + insertpcodebefore(loop->preheader->lastPCode, instr); + loop->bodySize--; + } + + for (instr = instr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode) + insertpcodebefore(loop->preheader->lastPCode, copypcode(instr)); +} + +static void unrollloop(Loop *loop) { + PCodeBlock *newBlock; + int i; + int factor; + PCode *instr; + PCodeBlock *block; + PCode *firstInstr; + PCode *nextInstr; + + for (factor = copts.unroll_factor_limit; factor > 1; factor--) { + if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit) + break; + } + + if (factor == 1) + return; + if ((loop->iterationCount / factor) != 1 && loop->bodySize < 4) + return; + + newBlock = oalloc(sizeof(PCodeBlock)); + newBlock->firstPCode = newBlock->lastPCode = NULL; + for (i = 0; i < factor - 1; i++) { + firstInstr = loop->body->firstPCode; + CError_ASSERT(448, firstInstr); + if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI) + CError_FATAL(450); + + for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode) + appendpcode(newBlock, copypcode(instr)); + + for (block = loop->preheader->successors->block; block != loop->body; block = block->successors->block) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op != PC_B) + appendpcode(newBlock, copypcode(instr)); + } + } + } + + block = loop->body->predecessors->block; + for (instr = newBlock->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + appendpcode(block, instr); + } + loop->iterationCount /= factor; +} + +void pccomputepredecessors1(PCodeBlock *block) { + PCLink *succ; + PCLink *pred; + + for (succ = block->successors; succ; succ = succ->nextLink) { + if (!succ->block) { + CError_FATAL(496); + } else { + for (pred = succ->block->predecessors; pred; pred = pred->nextLink) { + if (pred->block == block) + break; + } + + if (!pred) { + pred = lalloc(sizeof(PCLink)); + pred->block = block; + pred->nextLink = succ->block->predecessors; + succ->block->predecessors = pred; + } + } + } +} + +static PCodeBlock *insertnewpcblock(PCodeBlock *block, int loopWeight) { + PCodeBlock *newBlock; + PCodeBlock *nextBlock; + PCodeLabel *label; + PCLink *prev; + PCLink *link; + + label = makepclabel(); + newBlock = lalloc(sizeof(PCodeBlock)); + + nextBlock = block->nextBlock; + newBlock->nextBlock = nextBlock; + block->nextBlock = newBlock; + + newBlock->prevBlock = block; + nextBlock->prevBlock = newBlock; + + newBlock->labels = NULL; + newBlock->predecessors = newBlock->successors = NULL; + newBlock->firstPCode = newBlock->lastPCode = NULL; + newBlock->pcodeCount = 0; + newBlock->loopWeight = loopWeight; + newBlock->flags = 0; + newBlock->blockIndex = pcblockcount++; + pclabel(newBlock, label); + + prev = NULL; + for (link = block->successors; link; link = link->nextLink) { + if (link->block == nextBlock) { + if (!prev) + block->successors = link->nextLink; + else + prev->nextLink = link->nextLink; + link->nextLink = NULL; + newBlock->successors = link; + break; + } + prev = link; + } + + prev = NULL; + for (link = nextBlock->predecessors; link; link = link->nextLink) { + if (link->block == block) { + if (!prev) + nextBlock->predecessors = link->nextLink; + else + prev->nextLink = link->nextLink; + link->nextLink = NULL; + newBlock->predecessors = link; + break; + } + prev = link; + } + + link = lalloc(sizeof(PCLink)); + link->block = newBlock; + link->nextLink = block->successors; + block->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = newBlock; + link->nextLink = nextBlock->predecessors; + nextBlock->predecessors = link; + + return newBlock; +} + +static void unrollloopconditional(Loop *loop) { + PCodeBlock *lastBlock; + PCodeLabel *label24; + PCode *instr; + PCode *instrCopy; + int outputBlockCount; + int inputBlockCount; + PCodeBlock **blocks; + PCodeBlock **blocks2; + PCodeBlock **blocks3; + PCode *firstInstr; + int factor; + PCodeBlock *block; + PCLink *link; + PCLink *prev; + int i; + int j; + int k; + int instructionCount; + + label24 = NULL; + outputBlockCount = 0; + instructionCount = 0; + + for (factor = copts.unroll_factor_limit; factor > 1; factor--) { + if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit) + break; + } + + if (factor == 1) + return; + + inputBlockCount = 0; + for (block = loop->preheader->successors->block; block != loop->body; block = block->nextBlock) { + inputBlockCount++; + if (!bitvectorgetbit(block->blockIndex, loop->memberblocks)) + instructionCount += block->pcodeCount; + } + + if ((loop->bodySize - instructionCount - 2) < instructionCount || instructionCount > 8) + return; + + blocks = oalloc(inputBlockCount * sizeof(PCodeBlock *)); + blocks2 = oalloc(inputBlockCount * sizeof(PCodeBlock *)); + blocks3 = oalloc(factor * (inputBlockCount * sizeof(PCodeBlock *))); + memclrw(blocks, inputBlockCount * sizeof(PCodeBlock *)); + memclrw(blocks2, inputBlockCount * sizeof(PCodeBlock *)); + memclrw(blocks3, factor * (inputBlockCount * sizeof(PCodeBlock *))); + + block = loop->preheader->nextBlock; + for (i = 0; i < inputBlockCount; i++) { + blocks[i] = block; + block = block->nextBlock; + } + + lastBlock = blocks[inputBlockCount - 1]; + for (i = 0; i < factor - 1; i++) { + for (j = 0; j < inputBlockCount; j++) { + blocks2[j] = insertnewpcblock(lastBlock, loop->loopWeight); + blocks3[outputBlockCount++] = blocks2[j]; + lastBlock = blocks2[j]; + } + if (label24) { + pclabel(blocks2[0], label24); + label24 = NULL; + } + + for (j = 0; j < inputBlockCount; j++) { + for (instr = blocks[j]->firstPCode; instr; instr = instr->nextPCode) { + if (instr->flags & fIsBranch) { + PCodeArg *op; + int opID; + instrCopy = copypcode(instr); + op = NULL; + for (opID = 0; opID < instr->argCount; opID++) { + if (instr->args[opID].kind == PCOp_LABEL) { + op = &instr->args[opID]; + break; + } + } + + if (op) { + if (op->data.label.label->block == loop->body) { + if (!label24) + label24 = makepclabel(); + instrCopy->args[opID].data.label.label = label24; + } else { + for (k = 0; k < inputBlockCount; k++) { + if (op->data.label.label->block == blocks[k]) { + instrCopy->args[opID].data.label.label = blocks2[k]->labels; + break; + } + } + } + } + + appendpcode(blocks2[j], instrCopy); + if (op) + pcbranch(blocks2[j], instrCopy->args[opID].data.label.label); + } else { + appendpcode(blocks2[j], copypcode(instr)); + } + } + } + + firstInstr = loop->body->firstPCode; + CError_ASSERT(762, firstInstr != NULL); + if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI) + CError_FATAL(764); + + for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode) + appendpcode(blocks2[inputBlockCount - 1], copypcode(instr)); + + for (j = 0; j < inputBlockCount; j++) { + for (link = blocks[j]->successors; link; link = link->nextLink) { + if (link->block == blocks[j]->nextBlock) + break; + } + + if (!link) { + for (link = blocks2[j]->successors, prev = NULL; link; link = link->nextLink) { + if (link->block == blocks2[j]->nextBlock) { + if (prev) + prev->nextLink = link->nextLink; + else + blocks2[j]->successors = link->nextLink; + } else { + prev = link; + } + } + + for (link = blocks2[j]->nextBlock->predecessors, prev = NULL; link; link = link->nextLink) { + if (link->block == blocks2[j]) { + if (prev) + prev->nextLink = link->nextLink; + else + blocks2[j]->nextBlock->predecessors = link->nextLink; + } else { + prev = link; + } + } + } + } + } + + if (label24) + pclabel(loop->body, label24); + + for (i = 0; i < inputBlockCount; i++) { + for (instr = blocks[i]->firstPCode; instr; instr = instr->nextPCode) { + if (instr->flags & fIsBranch) { + PCodeArg *op; + int opID; + op = NULL; + for (opID = 0; opID < instr->argCount; opID++) { + if (instr->args[opID].kind == PCOp_LABEL) { + op = &instr->args[opID]; + break; + } + } + + if (op && op->data.label.label->block == loop->body) { + instr->args[opID].data.label.label = blocks3[0]->labels; + + for (link = blocks[i]->successors, prev = NULL; link; link = link->nextLink) { + if (link->block == loop->body) { + if (prev) + prev->nextLink = link->nextLink; + else + blocks[i]->successors = link->nextLink; + } else { + prev = link; + } + } + + for (link = loop->body->predecessors, prev = NULL; link; link = link->nextLink) { + if (link->block == blocks[i]) { + if (prev) + prev->nextLink = link->nextLink; + else + loop->body->predecessors = link->nextLink; + } else { + prev = link; + } + } + + link = blocks[i]->successors; + while (link && link->block != blocks3[0]) + link = link->nextLink; + + if (!link) { + pcbranch(blocks[i], op->data.label.label); + pccomputepredecessors1(blocks[i]); + } + } + } + } + } + + for (i = 0; i < outputBlockCount; i++) + pccomputepredecessors1(blocks3[i]); + + loop->iterationCount /= factor; +} + +static void unrollunknownBDNZ(Loop *loop) { + int factor; // r29 + PCodeBlock *preheader; // r17 + PCodeBlock *blockA; // r23 + PCodeBlock *postheader; // r22 + PCodeBlock *newblock1; // r26 + PCodeBlock *newblock2; // r31 + PCodeBlock *newblock3; // r19 + PCodeBlock *newblock4; // r20 + PCodeBlock *newblock5; // r24 + PCode *mtctr; // r21 + int mtctr_reg; // r27 + PCode *instr28; // r28 + PCode *instr6; // r6 + PCodeBlock *block; + PCode *instr; + PCodeArg *op; + int i; + int val; + short mtctr_shifted_reg; // r16 + short reg25; // r25 + PCLink *link; + + if (loop->bodySize < 4) + return; + + factor = 128; + while (factor > copts.unroll_factor_limit) + factor >>= 1; + while (factor > 1 && (loop->bodySize - 2) * factor > copts.unroll_instr_limit) + factor >>= 1; + + if (factor < 2) + return; + + preheader = loop->preheader; + blockA = loop->body->nextBlock; + postheader = preheader->nextBlock; + + newblock1 = insertnewpcblock(preheader, loop->loopWeight); + newblock2 = insertnewpcblock(newblock1, loop->loopWeight); + newblock3 = insertnewpcblock(newblock2, loop->loopWeight); + newblock4 = insertnewpcblock(newblock3, loop->loopWeight); + newblock5 = insertnewpcblock(newblock4, loop->loopWeight); + addblocktoloop(loop, newblock1); + addblocktoloop(loop, newblock2); + addblocktoloop(loop, newblock3); + addblocktoloop(loop, newblock4); + addblocktoloop(loop, newblock5); + + for (instr = preheader->lastPCode; instr; instr = instr->prevPCode) { + if (instr->op == PC_MTCTR) { + mtctr = instr; + mtctr_reg = instr->args[0].data.reg.reg; + } + } + + if (!mtctr) + return; + + instr28 = NULL; + for (block = postheader; block != loop->body; block = block->successors->block) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op != PC_B) { + appendpcode(newblock2, copypcode(instr)); + if (instr == loop->pc18) + instr28 = newblock2->lastPCode; + } + } + } + + if (!instr28) { + appendpcode(newblock2, copypcode(loop->pc18)); + instr28 = newblock2->lastPCode; + } + + instr6 = NULL; + for (instr = newblock2->firstPCode; instr; instr = instr->nextPCode) { + if (instr != instr28) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == loop->pc18->args[0].data.reg.reg && + (op->data.reg.effect & (EffectRead | EffectWrite)) + ) { + instr6 = instr; + break; + } + op++; + } + } + if (instr6) + break; + } + + if (!instr6) { + deletepcode(instr28); + deletepcode(loop->pc18); + if (loop->footer) + blockA = loop->footer; + else + blockA = insertnewpcblock(loop->body, loop->loopWeight); + } else { + instr28 = NULL; + } + + for (i = 1; i < factor; i++) { + for (block = postheader; block != loop->body; block = block->successors->block) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op != PC_B) + appendpcode(newblock2, copypcode(instr)); + } + } + } + + mtctr_shifted_reg = used_virtual_registers[RegClass_GPR]++; + appendpcode(newblock1, makepcode( + PC_RLWINM, + mtctr_shifted_reg, + mtctr_reg, + 32 - ispowerof2(factor), + ispowerof2(factor), + 31)); + + appendpcode(newblock1, makepcode(PC_CMPLI, 0, mtctr_shifted_reg, 0)); + + if (instr28) { + reg25 = used_virtual_registers[RegClass_GPR]++; + if (loop->step == 1) { + instr = makepcode(PC_MR, reg25, mtctr_reg); + } else if (loop->step == -1) { + instr = makepcode(PC_NEG, reg25, mtctr_reg); + } else { + val = ispowerof2(abs(loop->step)); + if (val > 0) { + instr = makepcode(PC_RLWINM, reg25, mtctr_reg, val, 0, 31 - val); + if (loop->step < 0) { + appendpcode(newblock1, instr); + instr = makepcode(PC_NEG, reg25, reg25); + } + } else { + instr = makepcode(PC_MULLI, reg25, mtctr_reg, loop->step); + } + } + appendpcode(newblock1, instr); + } + + appendpcode(newblock1, makepcode(PC_MTCTR, mtctr_shifted_reg)); + appendpcode(newblock1, makepcode(PC_BT, 0, 2, newblock5->labels)); + pcbranch(newblock1, newblock5->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock1; + link->nextLink = newblock5->predecessors; + newblock5->predecessors = link; + + appendpcode(newblock3, makepcode(PC_BDNZ, newblock2->labels)); + pcbranch(newblock3, newblock2->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock3; + link->nextLink = newblock2->predecessors; + newblock2->predecessors = link; + + appendpcode(newblock4, makepcode(PC_ANDI, mtctr_reg, mtctr_reg, factor - 1)); + appendpcode(newblock4, makepcode(PC_BT, 0, 2, blockA->labels)); + pcbranch(newblock4, blockA->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock4; + link->nextLink = blockA->predecessors; + blockA->predecessors = link; + + deletepcode(mtctr); + appendpcode(newblock5, mtctr); + + if (instr28 && bitvectorgetbit(instr28->args[0].data.reg.reg, liveonexit)) { + instr = makepcode(PC_ADD, instr28->args[0].data.reg.reg, instr28->args[0].data.reg.reg, reg25); + if (blockA->firstPCode) + insertpcodebefore(blockA->firstPCode, instr); + else + appendpcode(blockA, instr); + } +} + +static void deleteloop(Loop *loop) { + PCodeBlock *body; + PCodeBlock *preheader; + PCodeBlock *nextblock; + PCodeLabel *label; + PCode *instr; + PCLink **ptr; + PCLink *link; + PCodeBlock *block; + + body = loop->body; + preheader = loop->preheader; + nextblock = body->nextBlock; + label = makepclabel(); + + while (1) { + instr = body->firstPCode; + CError_ASSERT(1294, instr != NULL); + + if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) + break; + + deletepcode(instr); + insertpcodebefore(loop->preheader->lastPCode, instr); + loop->bodySize--; + } + + pclabel(nextblock, label); + preheader->lastPCode->args[0].data.label.label = label; + preheader->successors->block = nextblock; + + ptr = &nextblock->predecessors; + while ((link = *ptr)) { + if (link->block == body) { + link->block = preheader; + break; + } + ptr = &link->nextLink; + } + + block = pcbasicblocks; + while ((nextblock = block->nextBlock)) { + if (bitvectorgetbit(nextblock->blockIndex, loop->memberblocks)) + block->nextBlock = nextblock->nextBlock; + else + block = nextblock; + } +} + +static void deleteloopheader(Loop *loop) { + PCLink *link; + PCLink **ptr; + + ptr = &loop->body->successors; + while ((link = *ptr)) { + if (link->block == loop->body->lastPCode->args[2].data.label.label->block) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + ptr = &loop->body->lastPCode->args[2].data.label.label->block->predecessors; + while ((link = *ptr)) { + if (link->block == loop->body) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + deletepcode(loop->body->firstPCode); + deletepcode(loop->body->lastPCode); + optimizedloop_full_unroll = 1; +} + +static void rewriteloopwithBDNZ(Loop *loop) { + PCode *instr; + + if (!FITS_IN_SHORT(loop->iterationCount)) { + instr = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(loop->iterationCount)); + insertpcodebefore(loop->preheader->lastPCode, instr); + + if (loop->iterationCount != 0) + insertpcodeafter(instr, makepcode(PC_ADDI, instr->args[0].data.reg.reg, instr->args[0].data.reg.reg, 0, LOW_PART(loop->iterationCount))); + } else { + instr = makepcode(PC_LI, used_virtual_registers[RegClass_GPR]++, loop->iterationCount); + insertpcodebefore(loop->preheader->lastPCode, instr); + } + + insertpcodebefore(loop->preheader->lastPCode, makepcode(PC_MTCTR, instr->args[0].data.reg.reg)); + + instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label); + deletepcode(loop->body->firstPCode); + deletepcode(loop->body->lastPCode); + appendpcode(loop->body, instr); + + for (loop = loop->parent; loop; loop = loop->parent) + loop->x4E = 1; +} + +static void rewriteunknownloopwithBDNZ(Loop *loop) { + SInt32 value1; // r24 + SInt32 value2; // r30 + Opcode branchOpcode; // r19 + SInt32 absStep; // r28 + int branchCondition; // r20 + int counterReg; // r27 + int reg1; // r26 + int reg2; // r22 + unsigned char mode; // r23 + PCodeBlock *afterLoop; // r25 + PCode *addiInstr; + PCode *instr; + PCLink *link; + PCLink **ptr; + + absStep = abs(loop->step); + afterLoop = NULL; + + for (addiInstr = loop->body->lastPCode->prevPCode; addiInstr->op == PC_ADDI; addiInstr = loop->body->lastPCode->prevPCode) { + deletepcode(addiInstr); + if (loop->body->lastPCode->args[2].data.label.label->block->firstPCode) + insertpcodebefore(loop->body->lastPCode->args[2].data.label.label->block->firstPCode, addiInstr); + else + appendpcode(loop->body->lastPCode->args[2].data.label.label->block, addiInstr); + + afterLoop = insertnewpcblock(loop->body, loop->loopWeight); + appendpcode(afterLoop, copypcode(addiInstr)); + loop->footer = afterLoop; + } + + if (loop->unknownCondition == ELESS) { + branchOpcode = PC_BF; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; // GT + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 - loop->lower; + mode = 0; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; // LT + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 + loop->upper; + mode = 1; + } else { + branchCondition = 0; // LT + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 2; + } + } else if (loop->unknownCondition == ELESSEQU) { + branchOpcode = PC_BT; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; // LT + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - loop->lower; + mode = 0; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; // GT + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep + loop->upper; + mode = 1; + } else { + branchCondition = 1; // GT + value1 = 0; + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep; + mode = 2; + } + } else if (loop->unknownCondition == EGREATER) { + branchOpcode = PC_BF; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; // LT + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 + loop->lower; + mode = 1; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; // GT + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 - loop->upper; + mode = 0; + } else { + branchCondition = 1; // GT + value1 = 0; + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 3; + } + } else if (loop->unknownCondition == EGREATEREQU) { + branchOpcode = PC_BT; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; // GT + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep + loop->lower; + mode = 1; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; // LT + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - loop->upper; + mode = 0; + } else { + branchCondition = 0; // LT + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep; + mode = 3; + } + } else if (loop->unknownCondition == ENOTEQU) { + branchOpcode = PC_BT; + branchCondition = 2; // EQ + if (loop->step > 0) { + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 - loop->lower; + mode = 0; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 + loop->upper; + mode = 1; + } else { + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 2; + } + } else { + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 + loop->lower; + mode = 1; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 - loop->upper; + mode = 0; + } else { + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 3; + } + } + } + + while (1) { + instr = loop->body->firstPCode; + CError_ASSERT(1857, instr); + + if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) + break; + + deletepcode(instr); + insertpcodebefore(loop->preheader->lastPCode, instr); + loop->bodySize--; + } + + // build a value for mtctr + counterReg = used_virtual_registers[RegClass_GPR]++; + + if (mode == 1) { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_NEG, counterReg, reg1)); + } else if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBFIC, counterReg, reg1, value2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_LIS, counterReg, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, counterReg)); + } + } else if (mode == 0) { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_MR, counterReg, reg1)); + } else if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, reg1, 0, value2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + } + } else if (mode == 2) { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, reg2)); + } else if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, reg2, 0, value2)); + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, counterReg)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDIS, counterReg, reg2, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, counterReg)); + } + } else { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg2, reg1)); + } else { + if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, reg1, 0, value2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + } + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg2, counterReg)); + } + } + + if (absStep > 1) { + unsigned char bits; + if ((bits = ispowerof2(absStep))) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_RLWINM, counterReg, counterReg, 32 - bits, bits, 31)); + } else { + int reg = used_virtual_registers[RegClass_GPR]++; + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_LI, reg, absStep)); + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_DIVWU, counterReg, counterReg, reg)); + } + } + + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_MTCTR, counterReg)); + + if (mode < 2) { + if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) { + if (FITS_IN_USHORT(value1)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPLI, 0, reg1, value1)); + } else { + PCode *tmp; + tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1)); + insertpcodebefore(loop->preheader->lastPCode, tmp); + + if (loop->iterationCount != 0) + insertpcodeafter(tmp, + makepcode(PC_ADDI, + tmp->args[0].data.reg.reg, + tmp->args[0].data.reg.reg, + 0, LOW_PART(value1))); + + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPL, 0, reg1, tmp->args[0].data.reg.reg)); + } + } else { + if (FITS_IN_SHORT(value1)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPI, 0, reg1, value1)); + } else { + PCode *tmp; + tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1)); + insertpcodebefore(loop->preheader->lastPCode, tmp); + + if (loop->iterationCount != 0) + insertpcodeafter(tmp, + makepcode(PC_ADDI, + tmp->args[0].data.reg.reg, + tmp->args[0].data.reg.reg, + 0, LOW_PART(value1))); + + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMP, 0, reg1, tmp->args[0].data.reg.reg)); + } + } + } else { + if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPL, 0, reg1, reg2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMP, 0, reg1, reg2)); + } + } + + if (!afterLoop) + afterLoop = loop->body->nextBlock; + + instr = makepcode(branchOpcode, 0, branchCondition, afterLoop->labels); + deletepcode(loop->preheader->lastPCode); + appendpcode(loop->preheader, instr); + + instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label); + deletepcode(loop->body->firstPCode); + deletepcode(loop->body->lastPCode); + appendpcode(loop->body, instr); + + loop->preheader->successors = NULL; + ptr = &loop->body->predecessors; + while ((link = *ptr)) { + if (link->block == loop->preheader) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + link = lalloc(sizeof(PCLink)); + link->block = loop->preheader->nextBlock; + link->nextLink = loop->preheader->successors; + loop->preheader->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = loop->preheader; + link->nextLink = loop->preheader->nextBlock->predecessors; + loop->preheader->nextBlock->predecessors = link; + + link = lalloc(sizeof(PCLink)); + link->block = afterLoop; + link->nextLink = loop->preheader->successors; + loop->preheader->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = loop->preheader; + link->nextLink = afterLoop->predecessors; + afterLoop->predecessors = link; + + for (loop = loop->parent; loop; loop = loop->parent) + loop->x4E = 1; +} + +static void optimizeloop(Loop *loop) { + computeliveonexit(loop); + + if (loop->x53 || loop->x54) + insertupdateinstructions(loop); + + if (loop->isKnownCountingLoop) { + if (loop->iterationCount > 0) { + skiplooptest(loop); + if (!copts.optimizesize && !loop->x4D && !loop->x57) { + if (loop->x4F) + unrollloop(loop); + else if (!loop->x4E) + unrollloopconditional(loop); + } + } + + if (loop->iterationCount != 0) { + if (loop->iterationCount == 1) + deleteloopheader(loop); + else if (!loop->x4E && !loop->x4D) + rewriteloopwithBDNZ(loop); + } + + optimizedloops = 1; + } else if (loop->isUnknownCountingLoop && !loop->x4E && !loop->x4D) { + rewriteunknownloopwithBDNZ(loop); + if (copts.unroll_speculative && !copts.optimizesize) { + if (loop->x4F && !loop->x57 && !loop->x52) + unrollunknownBDNZ(loop); + } + optimizedloops = 1; + } + + eliminateinductionvariables(loop); +} + +typedef struct LocalArray { + struct LocalArray *next; + Object *object; + unsigned int invalid:1; + unsigned int isFloat:1; + unsigned int isSigned:1; + SInt32 elementSize; + SInt32 arraySize; + SInt32 elementCount; + int totalUses; + int elements[1]; +} LocalArray; + +static LocalArray *scanforlocalarrays(void) { + SInt32 elementCount; + LocalArray *head; + LocalArray *array; + ObjectList *list; + int i; + SInt32 arraySize; + SInt32 elementSize; + + head = NULL; + + for (list = locals; list; list = list->next) { + if ( + list->object && + !(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) && + list->object->type && + IS_TYPE_ARRAY(list->object->type) && + TPTR_TARGET(list->object->type) && + TPTR_TARGET(list->object->type)->type < TYPESTRUCT + ) { + arraySize = list->object->type->size; + elementSize = TPTR_TARGET(list->object->type)->size; + elementCount = arraySize / elementSize; + if (elementCount > 0 && elementCount <= 8) { + array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalArray)); + array->next = head; + head = array; + + array->object = list->object; + array->elementSize = elementSize; + array->arraySize = arraySize; + array->elementCount = elementCount; + array->totalUses = 0; + array->isSigned = 1; + array->isFloat = IS_TYPE_FLOAT(TPTR_TARGET(list->object->type)); + array->invalid = 0; + array->isSigned = 1; + if (!array->isFloat && is_unsigned(TPTR_TARGET(list->object->type))) + array->isSigned = 0; + + for (i = 0; i < elementCount; i++) { + array->elements[i] = 0; + } + } + } + } + + return head; +} + +static LocalArray *lookup_array_object(LocalArray *arrays, Object *object) { + while (arrays) { + if (arrays->object == object) + return arrays; + arrays = arrays->next; + } + return NULL; +} + +void changearraytoregisters(void) { + LocalArray *arrays; + PCodeBlock *block; + PCode *instr; + int i; + PCodeArg *op; + LocalArray **ptr; + LocalArray *array; + int intCounter; + int floatCounter; + int reg; + int reg2; + PCode *newInstr; + + arrays = NULL; + if ((arrays = scanforlocalarrays())) { + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (!(instr->flags & fIsBranch) && instr->argCount) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_MEMORY && + (PCOpMemoryArg) op->arg == PCOpMemory1 && + (array = lookup_array_object(arrays, op->data.mem.obj)) && + !array->invalid + ) { + if ( + (instr->flags & (fIsRead | fIsWrite)) && + (op->data.mem.offset % array->elementSize) == 0 && + op->data.mem.offset < array->arraySize + ) { + switch (instr->op) { + case PC_LBZ: + case PC_STB: + if (array->elementSize != 1) + array->invalid = 1; + break; + case PC_LHZ: + case PC_LHA: + case PC_STH: + if (array->elementSize != 2) + array->invalid = 1; + break; + case PC_LWZ: + case PC_STW: + if (array->elementSize != 4) + array->invalid = 1; + break; + case PC_LFD: + case PC_STFD: + if (array->elementSize != 8) + array->invalid = 1; + break; + default: + array->invalid = 1; + break; + } + + if (!array->invalid) + array->elements[op->data.mem.offset / array->elementSize]++; + } else { + array->invalid = 1; + } + } + op++; + } + } + } + } + + intCounter = 0; + floatCounter = 0; + ptr = &arrays; + array = *ptr; + while (array) { + if (array->invalid) { + *ptr = array = array->next; + continue; + } + + if (array->isFloat) + floatCounter += array->elementCount; + if (!array->isFloat) + intCounter += array->elementCount; + + for (i = 0; i < array->elementCount; i++) + array->totalUses += array->elements[i]; + + array = array->next; + } + + if (arrays) { + while (intCounter > 8) { + LocalArray *best; + int score; + score = 0; + best = NULL; + for (array = arrays; array; array = array->next) { + if (!array->isFloat) { + if (best) { + if (array->totalUses < score) { + score = array->totalUses; + best = array; + } + } else { + best = array; + score = array->totalUses; + } + } + } + + if (!best) + break; + + if (best == arrays) { + arrays = best->next; + } else { + for (array = arrays; array; array = array->next) { + if (array->next == best) { + array->next = best->next; + break; + } + } + } + + intCounter -= best->elementCount; + } + + while (floatCounter > 8) { + LocalArray *best; + int score; + score = 0; + best = NULL; + for (array = arrays; array; array = array->next) { + if (array->isFloat) { + if (best) { + if (array->totalUses < score) { + score = array->totalUses; + best = array; + } + } else { + best = array; + score = array->totalUses; + } + } + } + + if (!best) + break; + + if (best == arrays) { + arrays = best->next; + } else { + for (array = arrays; array; array = array->next) { + if (array->next == best) { + array->next = best->next; + break; + } + } + } + + floatCounter -= best->elementCount; + } + + CError_ASSERT(2394, intCounter <= 8 && floatCounter <= 8); + + if (!arrays) + return; + + optimizedloop_trans_regs = 1; + + for (array = arrays; array; array = array->next) { + for (i = 0; i < array->elementCount; i++) { + if (array->isFloat) + array->elements[i] = used_virtual_registers[RegClass_FPR]++; + else + array->elements[i] = used_virtual_registers[RegClass_GPR]++; + } + } + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if ( + !(instr->flags & fIsBranch) && + instr->argCount && + (instr->flags & (fIsRead | fIsWrite)) && + instr->args[2].kind == PCOp_MEMORY && + (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 && + (array = lookup_array_object(arrays, instr->args[2].data.mem.obj)) && + !(array->invalid) + ) + { + reg2 = instr->args[0].data.reg.reg; + reg = array->elements[instr->args[2].data.mem.offset / array->elementSize]; + newInstr = NULL; + switch (instr->op) { + case PC_LBZ: + if (array->isSigned) + newInstr = makepcode(PC_MR, reg2, reg); + else + newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 24, 31); + break; + case PC_STB: + if (array->isSigned) + newInstr = makepcode(PC_EXTSB, reg, reg2); + else + newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 24, 31); + break; + case PC_LHZ: + newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 16, 31); + break; + case PC_LHA: + newInstr = makepcode(PC_EXTSH, reg2, reg); + break; + case PC_STH: + if (array->isSigned) + newInstr = makepcode(PC_EXTSH, reg, reg2); + else + newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 16, 31); + break; + case PC_LWZ: + newInstr = makepcode(PC_MR, reg2, reg); + break; + case PC_STW: + newInstr = makepcode(PC_MR, reg, reg2); + break; + case PC_LFD: + newInstr = makepcode(PC_FMR, reg2, reg); + break; + case PC_STFD: + newInstr = makepcode(PC_FMR, reg, reg2); + break; + default: + CError_FATAL(2494); + break; + } + + if (newInstr) { + insertpcodebefore(instr, newInstr); + deletepcode(instr); + instr = newInstr; + } + } + } + } + } + } + + freeoheap(); +} + +static void optimizenestedloops(Loop *loop) { + while (loop) { + if (loop->children) + optimizenestedloops(loop->children); + optimizeloop(loop); + loop = loop->nextSibling; + } +} + +void optimizeloops(void) { + optimizedloops = 0; + optimizedloop_full_unroll = 0; + optimizedloop_trans_regs = 0; + if (loopsinflowgraph) { + computeusedefchains(0); + last_virtual_GPR = used_virtual_registers[RegClass_GPR]; + liveonexit = oalloc(4 * ((used_virtual_registers[RegClass_GPR] + 31) >> 5)); + inductionvariables = oalloc(4 * ((last_virtual_GPR + 31) >> 5)); + optimizenestedloops(loopsinflowgraph); + freeoheap(); + } +} + diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c new file mode 100644 index 0000000..2b68dca --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c @@ -0,0 +1,751 @@ +#include "compiler/StrengthReduction.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/LoopDetection.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/Registers.h" +#include "compiler/UseDefChains.h" + +int strengthreducedloops; + +static PCode *findinitializer(Loop *loop, short reg) { + UInt32 *vec; + PCode *best; + RegUseOrDef *list; + + vec = usedefinfo[loop->body->blockIndex].defvec8; + best = NULL; + + for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { + if ( + !(bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) && + bitvectorgetbit(list->id, vec) + ) + { + if (best) + return NULL; + best = Defs[list->id].pcode; + } + } + + if (best) { + if (best->op == PC_LI || best->op == PC_ADDI || best->op == PC_ADD) + return best; + } + + return NULL; +} + +static int isbasicinductionvariable(Loop *loop, short reg, SInt32 step) { + RegUseOrDef *list; + PCode *instr; + + for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { + instr = Defs[list->id].pcode; + if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) { + if (instr->op != PC_ADDI) + return 0; + if (instr->args[1].data.reg.reg != reg) + return 0; + if (instr->args[2].data.imm.value != step) + return 0; + } + } + + return 1; +} + +static void addbasicinductionvariable(Loop *loop, short reg, SInt32 step) { + BasicInductionVar *biv; + RegUseOrDef *list; + PCode *instr; + InstrList *instrList; + + for (biv = loop->basicInductionVars; biv; biv = biv->next) { + if (biv->reg == reg) + return; + } + + biv = oalloc(sizeof(BasicInductionVar)); + biv->next = loop->basicInductionVars; + loop->basicInductionVars = biv; + + biv->loop = loop; + biv->inductionVars = NULL; + biv->instrsC = NULL; + biv->step = step; + biv->reg = reg; + + for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { + instr = Defs[list->id].pcode; + if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) { + instrList = oalloc(sizeof(InstrList)); + instrList->next = biv->instrsC; + biv->instrsC = instrList; + instrList->instr = instr; + } + } + + biv->initializer = findinitializer(loop, reg); +} + +static void findbasicinductionvariables(Loop *loop) { + SInt16 step; + BlockList *block; + PCode *instr; + short reg; + + for (block = loop->blocks; block; block = block->next) { + for (instr = block->block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op == PC_ADDI) { + if ( + (reg = instr->args[0].data.reg.reg) >= 32 && + instr->args[1].data.reg.reg == reg && + isbasicinductionvariable(loop, reg, step = instr->args[2].data.imm.value) + ) + addbasicinductionvariable(loop, reg, step); + } + } + } +} + +static void findallbasicinductionvariables(Loop *loop) { + while (loop) { + if (loop->children) + findallbasicinductionvariables(loop->children); + findbasicinductionvariables(loop); + loop = loop->nextSibling; + } +} + +static int isinductionvariable(BasicInductionVar *biv, int useID, SInt32 *result1, short *result2, short *result3, Loop **result4) { + RegUseOrDef *list; + int counter; + Loop *loop; + Loop *scanloop; + PCode *instr; + + instr = Uses[useID].pcode; + *result2 = 0; + *result3 = 0; + *result4 = NULL; + + switch (instr->op) { + case PC_MULLI: + *result1 = instr->args[2].data.imm.value; + break; + + case PC_RLWINM: + if (instr->args[3].data.imm.value) + return 0; + if (instr->args[2].data.imm.value > 15) + return 0; + if (instr->args[4].data.imm.value != (31 - instr->args[2].data.imm.value)) + return 0; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + return 0; + *result1 = 1 << instr->args[2].data.imm.value; + break; + + case PC_LBZX: + case PC_LHZX: + case PC_LHAX: + case PC_LWZX: + case PC_STBX: + case PC_STHX: + case PC_STWX: + case PC_LFSX: + case PC_LFDX: + case PC_STFSX: + case PC_STFDX: + *result2 = 0; + *result3 = 0; + if (instr->args[1].data.reg.reg == biv->reg) { + *result2 = 1; + *result3 = 2; + } else if (instr->args[2].data.reg.reg == biv->reg) { + *result2 = 2; + *result3 = 1; + } + + counter = 0; + for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks)) + counter++; + } + if (counter) + return 0; + + loop = biv->loop; + for (scanloop = loop->parent; scanloop; scanloop = scanloop->parent) { + counter = 0; + for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, scanloop->memberblocks)) + counter++; + } + if (!biv->initializer || bitvectorgetbit(biv->initializer->block->blockIndex, scanloop->memberblocks)) + counter++; + if (counter) + break; + loop = scanloop; + } + + *result4 = loop; + *result1 = 1; + return 1; + + default: + return 0; + } + + counter = 0; + for (list = reg_Defs[RegClass_GPR][instr->args[0].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks)) + counter++; + } + + return counter == 1; +} + +static void addinductionvariable(BasicInductionVar *biv, PCode *instr, SInt32 val1, short val2, short val3, Loop *val4) { + InductionVar *iv; + + iv = oalloc(sizeof(InductionVar)); + iv->next = biv->inductionVars; + biv->inductionVars = iv; + + iv->basicVar = biv; + iv->instr = instr; + iv->instrC = NULL; + iv->step = val1; + iv->x18 = val2; + iv->x1A = val3; + iv->someloop = val4; + if (instr->flags & (fIsRead | fIsWrite)) + iv->x1C = -1; + else + iv->x1C = instr->args[0].data.reg.reg; + iv->x1E = -1; +} + +static void findnonbasicinductionvariables(Loop *loop) { + BasicInductionVar *biv; + RegUseOrDef *list; + SInt32 result1; + short result2; + short result3; + Loop *result4; + + for (biv = loop->basicInductionVars; biv; biv = biv->next) { + for (list = reg_Uses[RegClass_GPR][biv->reg]; list; list = list->next) { + if (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) { + if (isinductionvariable(biv, list->id, &result1, &result2, &result3, &result4)) + addinductionvariable(biv, Uses[list->id].pcode, result1, result2, result3, result4); + } + } + } +} + +static void findallnonbasicinductionvariables(Loop *loop) { + while (loop) { + if (loop->children) + findallnonbasicinductionvariables(loop->children); + if (loop->basicInductionVars) + findnonbasicinductionvariables(loop); + loop = loop->nextSibling; + } +} + +static void initializeinductionvariable(InductionVar *iv) { + BasicInductionVar *biv; // r31 + PCode *instr; // r27 + PCodeBlock *preheader; // r30 + SInt32 value30; // r30 + short reg29; // r29 + short reg26; // r26 + + biv = iv->basicVar; + preheader = biv->loop->preheader; + + if (iv->x1A) { + reg29 = iv->instr->args[iv->x1A].data.reg.reg; + reg26 = iv->instr->args[iv->x18].data.reg.reg; + instr = NULL; + + if ( + biv->initializer && + biv->initializer->op == PC_LI && + biv->initializer->block == preheader + ) + { + if (biv->initializer->args[1].data.imm.value == 0) + instr = makepcode(PC_MR, iv->x1E, reg29); + else if (FITS_IN_SHORT(biv->initializer->args[1].data.imm.value)) + instr = makepcode(PC_ADDI, iv->x1E, reg29, 0, biv->initializer->args[1].data.imm.value); + } + + if (!instr) + instr = makepcode(PC_ADD, iv->x1E, reg29, reg26); + + if (biv->initializer && instr->op != PC_ADD) + insertpcodeafter(biv->initializer, instr); + else if (iv->someloop && iv->someloop->preheader->lastPCode) + insertpcodebefore(iv->someloop->preheader->lastPCode, instr); + else + insertpcodebefore(preheader->lastPCode, instr); + + iv->instrC = instr; + iv->x1C = reg29; + return; + } + + if (!biv->initializer || biv->initializer->op != PC_LI) { + instr = copypcode(iv->instr); + instr->args[0].data.reg.reg = iv->x1E; + insertpcodebefore(preheader->lastPCode, instr); + } else { + value30 = biv->initializer->args[1].data.imm.value * iv->step; + if (!FITS_IN_SHORT(value30)) { + instr = makepcode(PC_LIS, iv->x1E, 0, HIGH_PART(value30)); + insertpcodeafter(biv->initializer, instr); + if (value30 != 0) + insertpcodeafter(instr, makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value30))); + } else { + instr = makepcode(PC_LI, iv->x1E, value30); + insertpcodeafter(biv->initializer, instr); + } + } +} + +static void incrementinductionvariable(InductionVar *iv) { + SInt32 value; + BasicInductionVar *biv; + PCode *instr; + InstrList *list; + + biv = iv->basicVar; + value = iv->step * biv->step; + for (list = biv->instrsC; list; list = list->next) { + if (!FITS_IN_SHORT(value)) { + instr = makepcode(PC_ADDIS, iv->x1E, iv->x1E, 0, HIGH_PART(value)); + insertpcodeafter(list->instr, instr); + + if (value != 0) { + instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value)); + insertpcodeafter(list->instr->nextPCode, instr); + } + } else { + instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, value); + insertpcodeafter(list->instr, instr); + } + } +} + +static void copyinductionvariable(InductionVar *iv) { + if (iv->instr->flags & (fIsRead | fIsWrite)) { + iv->instr->op -= 2; + iv->instr->args[1].data.reg.reg = iv->x1E; + iv->instr->args[2].kind = PCOp_IMMEDIATE; + iv->instr->args[2].data.imm.value = 0; + iv->instr->args[2].data.imm.obj = NULL; + } else { + insertpcodeafter(iv->instr, makepcode(PC_MR, iv->x1C, iv->x1E)); + deletepcode(iv->instr); + } +} + +static int testnestediv(InductionVar *iv, SInt32 step1, int reg, SInt32 step2, Loop *loop1, Loop *loop2) { + SInt32 addend; + BlockList *list; + PCode *instr; + PCodeArg *op; + int i; + + if (iv->instrC && iv->x1C == reg) { + if (iv->instrC->op == PC_MR) + addend = 0; + else if (iv->instrC->op == PC_ADDI) + addend = iv->instrC->args[2].data.imm.value; + else + return 0; + + if (step2 == (addend + (step1 * iv->step * loop2->iterationCount))) { + for (list = loop1->blocks; list && list->block != loop2->blocks->block; list = list->next) { + for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == reg + ) + return 0; + op++; + } + } + } + return 1; + } + } + + return 0; +} + +static void strengthreducenestediv(short reg, SInt32 step, PCode *initializer, Loop *loop) { + Loop *scanloop; + BasicInductionVar *biv; + InductionVar *iv; + PCode *instr; + PCodeArg *op; + int i; + + for (scanloop = loop->children; scanloop; scanloop = scanloop->nextSibling) { + if ( + scanloop->isKnownCountingLoop && + scanloop->x4F && + bitvectorgetbit(scanloop->body->blockIndex, loop->vec2C) + ) + { + for (biv = scanloop->basicInductionVars; biv; biv = biv->next) { + for (iv = biv->inductionVars; iv; iv = iv->next) { + if (testnestediv(iv, biv->step, reg, step, loop, scanloop)) { + deletepcode(iv->instrC); + if (initializer) { + insertpcodeafter(initializer, iv->instrC); + } else if (loop->body->lastPCode) { + for (instr = loop->body->lastPCode; instr; instr = instr->prevPCode) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) && + op->data.reg.reg == reg + ) + break; + op++; + } + } + + if (instr) + insertpcodeafter(instr, iv->instrC); + else + insertpcodebefore(loop->body->firstPCode, iv->instrC); + } else { + appendpcode(loop->body, iv->instrC); + } + } + } + } + } + } +} + +static void strengthreducenestedbiv(BasicInductionVar *biv) { + Loop *loop; + InductionVar *iv; + + loop = biv->loop; + for (iv = biv->inductionVars; iv; iv = iv->next) + strengthreducenestediv(iv->x1E, iv->step * biv->step, iv->instrC, loop); + + strengthreducenestediv(biv->reg, biv->step, biv->initializer, loop); +} + +static void strengthreduceinductionvariable(BasicInductionVar *biv) { + int counter; + InductionVar *iv; + InductionVar *otherIv; + short reg; + + counter = 0; + for (iv = biv->inductionVars; iv; iv = iv->next) { + if (iv->step == 1) + counter++; + } + + for (iv = biv->inductionVars; iv; iv = iv->next) { + if ( + (counter <= 4 || iv->step != 1) && + iv->instr->block && + (iv->x1A == 0 || iv->instr->args[2].kind != PCOp_IMMEDIATE) + ) + { + if (iv->x1E == -1) { + iv->x1E = used_virtual_registers[RegClass_GPR]++; + initializeinductionvariable(iv); + incrementinductionvariable(iv); + if (iv->step == 1) { + reg = iv->instr->args[iv->x1A].data.reg.reg; + for (otherIv = iv->next; otherIv; otherIv = otherIv->next) { + if (otherIv->x1A != 0 && otherIv->instr->args[otherIv->x1A].data.reg.reg == reg) + otherIv->x1E = iv->x1E; + } + } else { + for (otherIv = iv->next; otherIv; otherIv = otherIv->next) { + if (otherIv->step == iv->step) + otherIv->x1E = iv->x1E; + } + } + } + + copyinductionvariable(iv); + strengthreducedloops = 1; + } + } +} + +#ifdef __MWERKS__ +#pragma options align=mac68k +#endif +typedef struct BivInit { + SInt32 x0; + short x4; + short x6; + short x8; + Object *xA; +} BivInit; +#ifdef __MWERKS__ +#pragma options align=reset +#endif + +static void calc_biv_init(BasicInductionVar *biv, BivInit *init) { + PCode *instr; + PCode *scan; + PCodeArg *op; + int i; + + instr = biv->initializer; + init->x0 = 0; + init->x4 = -1; + init->x6 = -1; + init->x8 = 0; + init->xA = NULL; + + if (!biv->initializer || (biv->initializer->op != PC_ADDI && biv->initializer->op != PC_ADD)) + return; + + if (instr->op == PC_ADDI) { + if (instr->args[1].data.reg.reg == biv->reg) { + init->x0 = instr->args[2].data.imm.value; + for (scan = instr->prevPCode; scan; scan = scan->prevPCode) { + op = scan->args; + i = scan->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == biv->reg && + (op->data.reg.effect & EffectWrite) + ) + { + if (scan->op == PC_ADD) { + init->x4 = scan->args[1].data.reg.reg; + init->x6 = scan->args[2].data.reg.reg; + } else if (scan->op == PC_ADDI) { + if (scan->args[2].kind == PCOp_IMMEDIATE) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.imm.value; + } else if (scan->args[2].kind == PCOp_MEMORY) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.mem.offset; + init->xA = scan->args[2].data.mem.obj; + } + } + return; + } + op++; + } + } + } else { + if (instr->args[2].kind == PCOp_IMMEDIATE) { + init->x4 = instr->args[1].data.reg.reg; + init->x8 = instr->args[2].data.imm.value; + } else if (instr->args[2].kind == PCOp_MEMORY) { + init->x4 = instr->args[1].data.reg.reg; + init->x8 = instr->args[2].data.mem.offset; + init->xA = instr->args[2].data.mem.obj; + } + } + } else if (instr->op == PC_ADD) { + if (instr->args[1].data.reg.reg == biv->reg) { + init->x6 = instr->args[2].data.reg.reg; + for (scan = instr->prevPCode; scan; scan = scan->prevPCode) { + op = scan->args; + i = scan->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == biv->reg && + (op->data.reg.effect & EffectWrite) && + scan->op == PC_ADDI + ) + { + if (scan->args[2].kind == PCOp_IMMEDIATE) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.imm.value; + } else if (scan->args[2].kind == PCOp_MEMORY) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.mem.offset; + init->xA = scan->args[2].data.mem.obj; + } + return; + } + op++; + } + } + } else { + init->x4 = instr->args[1].data.reg.reg; + init->x6 = instr->args[2].data.reg.reg; + } + } +} + +static void combineinductionvariables(Loop *loop, BasicInductionVar *biv1, BasicInductionVar *biv2, SInt32 difference) { + PCode *instr1; // r31 + int reg1; // r30 + int reg2; // r29 + PCode *instr2; // r24 + PCodeBlock *nextBlock; // r24 + BlockList *list; + PCodeArg *op; + int i; + PCode *instr; + + instr1 = NULL; + instr2 = NULL; + + reg1 = biv1->reg; + CError_ASSERT(930, reg1 >= 0); + + reg2 = biv2->reg; + CError_ASSERT(934, reg2 >= 0); + + if (!FITS_IN_SHORT(difference)) + return; + + for (list = loop->blocks; list; list = list->next) { + for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) { + if (instr1) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == reg1 + ) + return; + op++; + } + } + + if (instr->op == PC_ADDI) { + if (instr->args[0].data.reg.reg == reg1) { + if (instr1) + return; + instr1 = instr; + } else if (instr->args[0].data.reg.reg == reg2) { + if (instr2) + return; + instr2 = instr; + } + } + } + } + + if (loop->body->lastPCode->flags & fIsBranch) { + nextBlock = NULL; + + for (i = 0; i < loop->body->lastPCode->argCount; i++) { + if (loop->body->lastPCode->args[i].kind == PCOp_LABEL) { + nextBlock = loop->body->lastPCode->args[i].data.label.label->block; + break; + } + } + + if (!nextBlock) + return; + } else { + nextBlock = loop->body->nextBlock; + } + + deletepcode(instr1); + instr1->args[1].data.reg.reg = reg2; + instr1->args[2].data.imm.value = difference; + + if (nextBlock->firstPCode) + insertpcodebefore(nextBlock->firstPCode, instr1); + else + appendpcode(nextBlock, instr1); + + biv1->reg = -1; + strengthreducedloops = 1; +} + +static void strengthreduceinductionvariables(Loop *loop) { + BasicInductionVar *biv1; + BasicInductionVar *biv2; + BivInit init1; + BivInit init2; + + for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) { + if (biv1->inductionVars) + strengthreduceinductionvariable(biv1); + strengthreducenestedbiv(biv1); + } + + for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) { + if (biv1->reg != -1) { + calc_biv_init(biv1, &init1); + if (init1.x4 != -1) { + for (biv2 = loop->basicInductionVars; biv2; biv2 = biv2->next) { + if (biv2->reg != -1 && biv2 != biv1) { + calc_biv_init(biv2, &init2); + if ( + init2.x4 != -1 && + init1.x4 == init2.x4 && + init1.x6 == init2.x6 && + init1.x8 == init2.x8 && + init1.xA == init2.xA && + biv1->step == biv2->step + ) + { + if (init1.x0 < init2.x0) { + combineinductionvariables(loop, biv2, biv1, init2.x0 - init1.x0); + } else { + combineinductionvariables(loop, biv1, biv2, init1.x0 - init2.x0); + break; + } + } + } + } + } + } + } +} + +static void strengthreduceallinductionvariables(Loop *loop) { + while (loop) { + if (loop->children) + strengthreduceallinductionvariables(loop->children); + if (loop->basicInductionVars) + strengthreduceinductionvariables(loop); + loop = loop->nextSibling; + } +} + +void strengthreduceloops(void) { + strengthreducedloops = 0; + if (loopsinflowgraph) { + computeusedefchains(0); + findallbasicinductionvariables(loopsinflowgraph); + findallnonbasicinductionvariables(loopsinflowgraph); + strengthreduceallinductionvariables(loopsinflowgraph); + freeoheap(); + } +} diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ValueNumbering.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ValueNumbering.c new file mode 100644 index 0000000..0907fa1 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ValueNumbering.c @@ -0,0 +1,661 @@ +#include "compiler/ValueNumbering.h" +#include "compiler/Alias.h" +#include "compiler/PCode.h" +#include "compiler/Registers.h" +#include "compiler/RegisterInfo.h" +#include "compiler/CompilerTools.h" +#include "compiler/CError.h" + +typedef struct ValueLabel { + struct ValueLabel *next; + PCodeArg op; +} ValueLabel; + +typedef struct AvailableValue { + struct AvailableValue *next; + ValueLabel *labelled; + PCode *pcode; + int killedregister; + int aliasnumber; + int opnumbers[0]; +} AvailableValue; + +typedef struct RegValue { + int number; + int x4; + AvailableValue *available; +} RegValue; + +typedef struct State { + void *stackedvalues; + int valueceiling; +} State; + +typedef struct StackedValue { + struct StackedValue *next; + PCodeArg op; + RegValue value; + Alias *alias; + PCode *valuepcode; +} StackedValue; + +int removedcommonsubexpressions; +int nextvaluenumber; +static AvailableValue *opvalue[428]; +static RegValue *regvalue[RegClassMax]; +static StackedValue *stackedvalues; +static int valueceiling; +static int moreaggressiveoptimization; + +static void allocatecsedatastructures(void) { + char rclass; + + for (rclass = 0; rclass < RegClassMax; rclass++) + regvalue[(char) rclass] = oalloc(sizeof(RegValue) * used_virtual_registers[(char) rclass]); +} + +static void initializecsedatastructures(void) { + RegValue *rv; + char rclass; + int i; + + nextvaluenumber = 0; + + for (i = 0; i < 428; i++) + opvalue[i] = NULL; + + for (rclass = 0; rclass < RegClassMax; rclass++) { + rv = regvalue[(char) rclass]; + for (i = 0; i < used_virtual_registers[(char) rclass]; i++, rv++) { + rv->number = nextvaluenumber++; + rv->x4 = 0; + rv->available = NULL; + } + } + + initialize_alias_values(); + stackedvalues = NULL; + valueceiling = 0x7FFFFFFF; +} + +static void labelvalue(AvailableValue *av, PCodeArg *op) { + ValueLabel *label = oalloc(sizeof(ValueLabel)); + label->op = *op; + label->next = av->labelled; + av->labelled = label; +} + +static void unlabelvalue(AvailableValue *av, PCodeArg *op) { + ValueLabel *labelled; + ValueLabel **ptr; + + ptr = &av->labelled; + while ((labelled = *ptr)) { + if (labelled->op.data.reg.reg == op->data.reg.reg) + *ptr = labelled->next; + else + ptr = &labelled->next; + } +} + +static void stackregistervalue(PCodeArg *op, RegValue *value) { + StackedValue *stacked = oalloc(sizeof(StackedValue)); + stacked->next = stackedvalues; + stackedvalues = stacked; + + stacked->op = *op; + stacked->value = *value; +} + +static void stackmemoryvalue(Alias *alias) { + StackedValue *stacked = oalloc(sizeof(StackedValue)); + stacked->next = stackedvalues; + stackedvalues = stacked; + + stacked->op.kind = PCOp_MEMORY; + stacked->alias = alias; + stacked->value.number = alias->valuenumber; + stacked->valuepcode = alias->valuepcode; +} + +static void unstackvalue(StackedValue *stacked) { + PCodeArg *op = &stacked->op; + RegValue *value; + + if (stacked->op.kind == PCOp_MEMORY) { + stacked->alias->valuenumber = stacked->value.number; + stacked->alias->valuepcode = stacked->valuepcode; + } else { + value = ®value[op->arg][op->data.reg.reg]; + if (value->available) + unlabelvalue(value->available, op); + value->number = stacked->value.number; + value->x4 = stacked->value.x4; + value->available = stacked->value.available; + if (value->available) + labelvalue(value->available, op); + } +} + +static int samevalue(PCodeArg *op1, PCodeArg *op2) { + return regvalue[op1->arg][op1->data.reg.reg].number == regvalue[op2->arg][op2->data.reg.reg].number; +} + +static int killregister(PCodeArg *op) { + RegValue *value; + + value = ®value[op->arg][op->data.reg.reg]; + if (value->number < valueceiling && nextvaluenumber >= valueceiling) + stackregistervalue(op, value); + + if (value->available) + unlabelvalue(value->available, op); + value->available = NULL; + value->x4 = 0; + return value->number = nextvaluenumber++; +} + +void killmemory(Alias *alias, PCode *newValue) { + if (alias->valuenumber < valueceiling && nextvaluenumber >= valueceiling) + stackmemoryvalue(alias); + + if (newValue) { + alias->valuenumber = regvalue[newValue->args[0].arg][newValue->args[0].data.reg.reg].number; + alias->valuepcode = newValue; + } else { + alias->valuenumber = nextvaluenumber++; + alias->valuepcode = NULL; + } +} + +static void killspecificCSEs(short op) { + AvailableValue *av; + ValueLabel *labelled; + + for (av = opvalue[op]; av; av = av->next) { + for (labelled = av->labelled; labelled; labelled = labelled->next) + killregister(&labelled->op); + } +} + +static void killallCSEs(void) { + AvailableValue *av; + ValueLabel *labelled; + int i; + + for (i = 0; i < 428; i++) { + for (av = opvalue[i]; av; av = av->next) { + for (labelled = av->labelled; labelled; labelled = labelled->next) + killregister(&labelled->op); + } + } +} + +static void killregisters(PCode *pcode) { + PCodeArg *op; + int i; + + for (i = 0, op = pcode->args; i < pcode->argCount; i++, op++) { + if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite)) + killregister(op); + } +} + +static void copyregister(PCodeArg *src, PCodeArg *dest) { + RegValue *srcvalue; + RegValue *destvalue; + + srcvalue = ®value[src->arg][src->data.reg.reg]; + destvalue = ®value[dest->arg][dest->data.reg.reg]; + + if (destvalue->number < valueceiling && nextvaluenumber >= valueceiling) + stackregistervalue(dest, destvalue); + + if (destvalue->available) + unlabelvalue(destvalue->available, dest); + destvalue->available = srcvalue->available; + if (destvalue->available) + labelvalue(destvalue->available, dest); + + destvalue->number = srcvalue->number; + + if (srcvalue->x4 && srcvalue->number == regvalue[src->arg][srcvalue->x4].number) + destvalue->x4 = srcvalue->x4; + else + destvalue->x4 = src->data.reg.reg; +} + +static int matchvalues(AvailableValue *av, PCode *match) { + PCodeArg *avOp; + PCodeArg *matchOp; + int i; + + for (avOp = &av->pcode->args[0], matchOp = &match->args[0], i = 0; i < match->argCount; i++, avOp++, matchOp++) { + if (i != 0) { + switch (avOp->kind) { + case PCOp_REGISTER: + if (av->opnumbers[i] != regvalue[matchOp->arg][matchOp->data.reg.reg].number) + return 0; + break; + case PCOp_MEMORY: + if (matchOp->kind != PCOp_MEMORY) + return 0; + if (matchOp->data.mem.obj != avOp->data.mem.obj) + return 0; + if (matchOp->data.mem.offset != avOp->data.mem.offset) + return 0; + if ((unsigned char) matchOp->arg != (unsigned char) avOp->arg) + return 0; + break; + case PCOp_IMMEDIATE: + if (matchOp->kind != PCOp_IMMEDIATE) + return 0; + if (matchOp->data.imm.value != avOp->data.imm.value) + return 0; + break; + case PCOp_LABEL: + if (matchOp->kind != PCOp_LABEL) + return 0; + if (matchOp->data.label.label != avOp->data.label.label) + return 0; + break; + case PCOp_SYSREG: + CError_FATAL(572); + } + } + } + + if ((match->flags & (fIsRead | fPCodeFlag20000)) && match->alias->valuenumber != av->aliasnumber) + return 0; + + return 1; +} + +static void chooselocation(AvailableValue *av, PCodeArg *op) { + ValueLabel *labelled; + PCodeArg *baseop; + + baseop = &av->pcode->args[0]; + labelled = av->labelled; + while (labelled) { + if (labelled->op.data.reg.reg == baseop->data.reg.reg) { + *op = labelled->op; + return; + } + labelled = labelled->next; + } + + *op = av->labelled[0].op; +} + +static int findavailablevalue(PCode *pcode, PCodeArg *op) { + AvailableValue *av; + PCodeArg tmp1; + PCodeArg tmp2; + + for (av = opvalue[pcode->op]; av; av = av->next) { + if (av->labelled && av->pcode->flags == pcode->flags && av->pcode->argCount == pcode->argCount) { + if (!matchvalues(av, pcode)) { + if (!(pcode->flags & fCommutative)) + continue; + + tmp1 = pcode->args[1]; + pcode->args[1] = pcode->args[2]; + pcode->args[2] = tmp1; + + if (!matchvalues(av, pcode)) { + tmp2 = pcode->args[1]; + pcode->args[1] = pcode->args[2]; + pcode->args[2] = tmp2; + continue; + } + } + chooselocation(av, op); + return 1; + } + } + + return 0; +} + +static void addavailablevalue(PCode *pcode) { + AvailableValue *av; + PCodeArg *op; + int i; + + av = oalloc(sizeof(AvailableValue) + sizeof(int) * pcode->argCount); + av->labelled = NULL; + av->pcode = pcode; + for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) { + if (op->kind == PCOp_REGISTER) + av->opnumbers[i] = regvalue[op->arg][op->data.reg.reg].number; + } + + if (pcode->flags & (fIsRead | fPCodeFlag20000)) + av->aliasnumber = pcode->alias->valuenumber; + + op = &pcode->args[0]; + av->killedregister = killregister(op); + labelvalue(av, op); + regvalue[op->arg][op->data.reg.reg].available = av; + av->next = opvalue[pcode->op]; + opvalue[pcode->op] = av; +} + +static int isCSEop(PCode *pcode) { + PCodeArg *baseOp; + PCodeArg *op; + int i; + + baseOp = &pcode->args[0]; + + switch (pcode->op) { + case PC_CMPI: + case PC_CMP: + case PC_CMPLI: + case PC_CMPL: + case PC_FCMPU: + case PC_FCMPO: + if (!moreaggressiveoptimization) + return 0; + break; + case PC_LI: + case PC_LIS: + if (!moreaggressiveoptimization) + return 0; + if (pcode->args[0].data.reg.reg < first_fe_temporary_register[RegClass_GPR] || pcode->args[0].data.reg.reg > last_temporary_register[RegClass_GPR]) + return 0; + break; + } + + if (PCODE_FLAG_SET_F(pcode) & (fIsVolatile | fSideEffects | fOverflow | fSetsCarry | fRecordBit)) + return 0; + + for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) { + if (op != baseOp && + op->kind == baseOp->kind && + op->arg == baseOp->arg && + op->data.reg.reg == baseOp->data.reg.reg) + return 0; + } + + return 1; +} + +static int isCSEload(PCode *pcode) { + PCodeArg *op; + int i; + int count; + + count = 0; + for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) { + if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite)) + count++; + } + + return count == 1; +} + +static void registercopy(PCode *pcode) { + PCodeArg *op1; + PCodeArg *op2; + + op1 = &pcode->args[0]; + op2 = &pcode->args[1]; + if (samevalue(op2, op1)) + deletepcode(pcode); + else + copyregister(op2, op1); +} + +static PCode *recentlystored(Alias *alias, PCodeArg *op) { + PCode *pc; + if ((pc = alias->valuepcode) && alias->valuenumber == regvalue[pc->args[0].arg][pc->args[0].data.reg.reg].number) { + *op = pc->args[0]; + return pc; + } else { + return NULL; + } +} + +static void simpleload(PCode *pcode) { + PCodeArg *origOp; + PCodeArg op; + PCode *rs; + + origOp = &pcode->args[0]; + if ((pcode->flags & fIsVolatile) || !isCSEload(pcode)) { + killregisters(pcode); + return; + } + + if (findavailablevalue(pcode, &op)) { + if (!samevalue(origOp, &op)) { + insertpcodebefore(pcode, makecopyinstruction(&op, origOp)); + copyregister(&op, origOp); + } + deletepcode(pcode); + removedcommonsubexpressions = 1; + } else if ((rs = recentlystored(pcode->alias, &op)) && can_reuse_stored_value(rs, pcode)) { + if (!samevalue(origOp, &op)) { + insertpcodebefore(pcode, makecopyinstruction(&op, origOp)); + copyregister(&op, origOp); + } + deletepcode(pcode); + removedcommonsubexpressions = 1; + } else { + addavailablevalue(pcode); + } +} + +static void simplestore(PCode *pcode) { + update_alias_value(pcode->alias, pcode); + killregisters(pcode); +} + +static void pointerload(PCode *pcode) { + PCodeArg *op; + PCodeArg buf; + + op = &pcode->args[0]; + + if ((pcode->flags & fIsVolatile) || !isCSEload(pcode)) { + killregisters(pcode); + return; + } + + if (findavailablevalue(pcode, &buf)) { + if (!samevalue(op, &buf)) { + insertpcodebefore(pcode, makecopyinstruction(&buf, op)); + copyregister(&buf, op); + } + deletepcode(pcode); + removedcommonsubexpressions = 1; + } else { + addavailablevalue(pcode); + } +} + +static void pointerstore(PCode *pcode) { + update_alias_value(pcode->alias, NULL); + killregisters(pcode); +} + +static void arithmeticop(PCode *pcode) { + PCodeArg *op; + PCodeArg buf; + + op = &pcode->args[0]; + + if (findavailablevalue(pcode, &buf)) { + if (!samevalue(op, &buf)) { + insertpcodebefore(pcode, makecopyinstruction(&buf, op)); + copyregister(&buf, op); + } + deletepcode(pcode); + removedcommonsubexpressions = 1; + } else { + addavailablevalue(pcode); + } +} + +static void functioncall(PCode *pcode) { + killregisters(pcode); + if (coloring) { + update_all_alias_values(); + killallCSEs(); + } else { + update_alias_value(pcode->alias, NULL); + } +} + +static void operatefrommemory(PCode *pcode) { + CError_FATAL(980); +} + +static void operatetomemory(PCode *pcode) { + CError_FATAL(1011); +} + +static void propagatecopiesto(PCode *pcode) { + PCodeArg *op; + int i; + + for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) { + if ( + op->kind == PCOp_REGISTER && + (op->data.reg.effect & (EffectRead | EffectWrite | Effect8)) == EffectRead && + op->data.reg.reg >= n_real_registers[op->arg] && + regvalue[op->arg][op->data.reg.reg].x4 && + regvalue[op->arg][op->data.reg.reg].x4 >= n_real_registers[op->arg] && + regvalue[op->arg][op->data.reg.reg].number == regvalue[op->arg][regvalue[op->arg][op->data.reg.reg].x4].number + ) { + op->data.reg.reg = regvalue[op->arg][op->data.reg.reg].x4; + } + } +} + +static void removecsesfrombasicblock(PCodeBlock *block) { + PCode *pcode; + PCode *next; + + for (pcode = block->firstPCode; pcode; pcode = next) { + next = pcode->nextPCode; + propagatecopiesto(pcode); + if (pcode->flags & fIsMove) { + registercopy(pcode); + } else if ((pcode->flags & fIsCall) && (pcode->flags & (fLink | fSideEffects))) { + functioncall(pcode); + } else if (pcode->flags & fIsRead) { + if (pcode->flags & fIsPtrOp) + pointerload(pcode); + else + simpleload(pcode); + } else if (pcode->flags & fIsWrite) { + if (pcode->flags & fIsPtrOp) + pointerstore(pcode); + else + simplestore(pcode); + } else if (pcode->flags & fPCodeFlag20000) { + operatefrommemory(pcode); + } else if (pcode->flags & fPCodeFlag40000) { + operatetomemory(pcode); + } else if ((pcode->flags & fIsCSE) && isCSEop(pcode)) { + arithmeticop(pcode); + } else { + killregisters(pcode); + } + } + + block->flags |= fVisited; +} + +static void getvaluestate(State *state) { + state->stackedvalues = stackedvalues; + state->valueceiling = valueceiling; +} + +static void setvaluestate(State *state) { + stackedvalues = state->stackedvalues; + valueceiling = state->valueceiling; +} + +static void forkvaluestate(int number) { + stackedvalues = NULL; + valueceiling = number; +} + +static void regressvaluestate(void) { + AvailableValue *av; + AvailableValue **ptr; + int i; + StackedValue *stacked; + + for (i = 0; i < 428; i++) { + ptr = &opvalue[i]; + while ((av = *ptr)) { + if (av->killedregister >= valueceiling) + *ptr = av->next; + else + ptr = &av->next; + } + } + + for (stacked = stackedvalues; stacked; stacked = stacked->next) + unstackvalue(stacked); +} + +static void removecsesfromextendedbasicblock(PCodeBlock *block) { + PCLink *succ; + int counter; + State state; + + removecsesfrombasicblock(block); + while (block->successors && + !block->successors->nextLink && + block->successors->block->predecessors && + !block->successors->block->predecessors->nextLink) { + block = block->successors->block; + removecsesfrombasicblock(block); + } + + counter = 0; + for (succ = block->successors; succ; succ = succ->nextLink) { + if (!(succ->block->flags & fVisited) && succ->block->predecessors && !succ->block->predecessors->nextLink) + counter++; + } + + if (counter) { + getvaluestate(&state); + forkvaluestate(nextvaluenumber); + for (succ = block->successors; succ; succ = succ->nextLink) { + if (!(succ->block->flags & fVisited) && succ->block->predecessors && !succ->block->predecessors->nextLink) { + removecsesfromextendedbasicblock(succ->block); + regressvaluestate(); + } + } + setvaluestate(&state); + } +} + +void removecommonsubexpressions(Object *proc, int flag) { + PCodeBlock *block; + + moreaggressiveoptimization = flag; + removedcommonsubexpressions = 0; + gather_alias_info(); + allocatecsedatastructures(); + + for (block = pcbasicblocks; block; block = block->nextBlock) + block->flags &= ~fVisited; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (!(block->flags & fVisited)) { + initializecsedatastructures(); + removecsesfromextendedbasicblock(block); + } + } + + freeoheap(); +} + diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/VectorArraysToRegs.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/VectorArraysToRegs.c new file mode 100644 index 0000000..fde27f1 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/VectorArraysToRegs.c @@ -0,0 +1,548 @@ +#include "compiler/VectorArraysToRegs.h" +#include "compiler/CError.h" +#include "compiler/CFunc.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/Registers.h" +#include "compiler/UseDefChains.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +typedef struct LocalVectorArray { + struct LocalVectorArray *next; + Object *object; + unsigned int invalid:1; + SInt32 arraySize; + SInt32 elementCount; + int totalUses; + int elements[1]; +} LocalVectorArray; + +typedef struct VectorPropInfo { + UInt32 *use; + UInt32 *def; + UInt32 *in; + UInt32 *out; +} VectorPropInfo; + +typedef struct ADDI { + PCode *instr; + RegUseOrDef *list; +} ADDI; + +static int number_of_ADDIs; +static ADDI *ADDIs; +static VectorPropInfo *vectorpropinfo; +static int *naddsinblock; +static int *firstaddinblock; +static Boolean converted_arrays; + +static LocalVectorArray *scanforlocalvectorarrays(void) { + SInt32 elementCount; + LocalVectorArray *head; + LocalVectorArray *array; + ObjectList *list; + int i; + SInt32 arraySize; + + head = NULL; + + for (list = locals; list; list = list->next) { + if ( + list->object && + !(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) && + list->object->type && + IS_TYPE_ARRAY(list->object->type) && + IS_TYPE_VECTOR(TPTR_TARGET(list->object->type)) + ) { + arraySize = list->object->type->size; + elementCount = arraySize / 16; + if (elementCount > 0 && elementCount <= 8) { + array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalVectorArray)); + array->next = head; + head = array; + + array->object = list->object; + array->arraySize = arraySize; + array->elementCount = elementCount; + array->totalUses = 0; + array->invalid = 0; + + for (i = 0; i < elementCount; i++) { + array->elements[i] = 0; + } + } + } + } + + return head; +} + +static LocalVectorArray *lookup_vector_array_object(LocalVectorArray *arrays, Object *object) { + while (arrays) { + if (arrays->object == object) + return arrays; + arrays = arrays->next; + } + return NULL; +} + +static void scaninstructions(LocalVectorArray *arrays) { + PCodeBlock *block; + PCode *instr; + int counter; + int i; + PCodeArg *op; + LocalVectorArray *array; + int element; + + naddsinblock = oalloc(sizeof(int) * pcblockcount); + memclrw(naddsinblock, sizeof(int) * pcblockcount); + + firstaddinblock = oalloc(sizeof(int) * pcblockcount); + memclrw(firstaddinblock, sizeof(int) * pcblockcount); + + number_of_ADDIs = 0; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + firstaddinblock[block->blockIndex] = number_of_ADDIs; + counter = 0; + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (!(instr->flags & fIsBranch) && instr->argCount) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_MEMORY && + (PCOpMemoryArg) op->arg == PCOpMemory1 && + (array = lookup_vector_array_object(arrays, op->data.mem.obj)) && + !array->invalid + ) + { + if (instr->op != PC_ADDI) { + array->invalid = 1; + } else if (instr->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) { + array->invalid = 1; + } else { + number_of_ADDIs++; + counter++; + } + + if (!array->invalid) { + element = op->data.mem.offset / 16; + if (element < array->elementCount) + array->elements[element]++; + else + array->invalid = 1; + } + } + op++; + } + } + } + naddsinblock[block->blockIndex] = counter; + } +} + +static void computeaddilist(LocalVectorArray *arrays) { + PCodeBlock *block; + PCode *instr; + RegUseOrDef *list; + ADDI *addi; + UInt32 *vec; + LocalVectorArray *array; + UseOrDef *def; + int defID; + UseOrDef *use; + int useID; + + ADDIs = oalloc(sizeof(ADDI) * number_of_ADDIs); + memclrw(ADDIs, sizeof(ADDI) * number_of_ADDIs); + + vec = oalloc(4 * ((number_of_Uses + 31) >> 5)); + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (naddsinblock[block->blockIndex]) { + bitvectorcopy(vec, usedefinfo[block->blockIndex].usevec1C, number_of_Uses); + addi = &ADDIs[firstaddinblock[block->blockIndex] + naddsinblock[block->blockIndex] - 1]; + for (instr = block->lastPCode; instr; instr = instr->prevPCode) { + if (!(instr->flags & fIsBranch) && instr->argCount) { + int reg; // r18 + if ( + instr->op == PC_ADDI && + (reg = instr->args[0].data.reg.reg) >= n_real_registers[RegClass_GPR] && + instr->args[2].kind == PCOp_MEMORY && + (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 && + (array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) && + !array->invalid + ) + { + addi->instr = instr; + addi->list = NULL; + for (list = reg_Uses[RegClass_GPR][reg]; list; list = list->next) { + if (bitvectorgetbit(list->id, vec)) { + RegUseOrDef *node = oalloc(sizeof(RegUseOrDef)); + node->id = list->id; + node->next = addi->list; + addi->list = node; + } + } + addi--; + } + + for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) { + if (def->v.kind == PCOp_REGISTER) { + RegUseOrDef *l; + for (l = reg_Uses[def->v.arg][def->v.u.reg]; l; l = l->next) + bitvectorclearbit(l->id, vec); + } + } + + for (use = &Uses[useID = instr->useID]; useID < number_of_Uses && use->pcode == instr; use++, useID++) { + if (use->v.kind == PCOp_REGISTER) + bitvectorsetbit(useID, vec); + } + } + } + } + } +} + +static void allocatevectorpropinfo(void) { + VectorPropInfo *info; + int i; + + vectorpropinfo = oalloc(sizeof(VectorPropInfo) * pcblockcount); + for (i = 0, info = vectorpropinfo; i < pcblockcount; i++, info++) { + info->use = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); + info->def = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); + info->in = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); + info->out = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); + } +} + +static void computelocalvectorpropinfo(LocalVectorArray *arrays) { + VectorPropInfo *info; + PCodeBlock *block; + PCode *instr; + UInt32 *vec0; + UInt32 *vec4; + int index; + PCodeArg *op; + int i; + int addi_i; + ADDI *addi; + LocalVectorArray *array; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + info = &vectorpropinfo[block->blockIndex]; + vec0 = info->use; + vec4 = info->def; + bitvectorinitialize(vec0, number_of_ADDIs, 0); + bitvectorinitialize(vec4, number_of_ADDIs, 0); + index = firstaddinblock[block->blockIndex]; + + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (!(instr->flags & fIsBranch) && instr->argCount) { + i = instr->argCount; + op = instr->args; + while (i--) { + if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && (op->data.reg.effect & EffectWrite)) { + for (addi_i = 0, addi = ADDIs; addi_i < number_of_ADDIs; addi_i++, addi++) { + if ( + addi->instr && + addi->instr->args[0].arg == op->arg && + addi->instr->args[0].data.reg.reg == op->data.reg.reg + ) + { + if (addi->instr->block == block) + bitvectorclearbit(addi_i, vec0); + else + bitvectorsetbit(addi_i, vec4); + } + } + } + op++; + } + + if ( + instr->op == PC_ADDI && + instr->args[2].kind == PCOp_MEMORY && + (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 && + (array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) && + !array->invalid + ) + { + bitvectorsetbit(index, vec0); + index++; + } + } + } + } +} + +static void computeglobalvectorpropinfo(void) { + VectorPropInfo *info; + PCodeBlock *block; + UInt32 *vec0; + UInt32 *vec4; + UInt32 *vec8; + UInt32 *vecC; + int bitvecsize; + int blockIndex; + int i; + int j; + int flag; + PCLink *preds; + UInt32 val; + + bitvecsize = (number_of_ADDIs + 31) >> 5; + flag = 1; + info = &vectorpropinfo[pcbasicblocks->blockIndex]; + bitvectorinitialize(info->in, number_of_ADDIs, 0); + bitvectorcopy(info->out, info->use, number_of_ADDIs); + + for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) { + info = &vectorpropinfo[block->blockIndex]; + vecC = info->out; + vec4 = info->def; + for (i = 0; i < bitvecsize; vecC++, vec4++, i++) + *vecC = ~*vec4; + } + + while (flag) { + flag = 0; + for (blockIndex = 0; blockIndex < pcblockcount; blockIndex++) { + if (depthfirstordering[blockIndex]) { + info = &vectorpropinfo[depthfirstordering[blockIndex]->blockIndex]; + if ((preds = depthfirstordering[blockIndex]->predecessors)) { + vec8 = info->in; + bitvectorcopy(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs); + for (preds = preds->nextLink; preds; preds = preds->nextLink) + bitvectorintersect(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs); + } + + vecC = info->out; + vec8 = info->in; + vec0 = info->use; + vec4 = info->def; + for (j = 0; j < bitvecsize; j++) { + val = *vec0 | (*vec8 & ~*vec4); + if (val != *vecC) { + *vecC = val; + flag = 1; + } + vec8++; + vecC++; + vec4++; + vec0++; + } + } + } + } +} + +static int precedes(PCode *a, PCode *b) { + PCode *scan; + + for (scan = a->nextPCode; scan; scan = scan->nextPCode) { + if (scan == b) + return 1; + } + + return 0; +} + +static int checkvectorstoreorload(int addiID, int useID) { + PCode *addiInstr; + UseOrDef *use; + + addiInstr = ADDIs[addiID].instr; + use = Uses + useID; + if (!addiInstr) + return 0; + + if (addiInstr->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) + return 0; + + if (use->pcode->op != PC_LVX && use->pcode->op != PC_STVX) + return 0; + + if ( + use->pcode->args[1].kind != PCOp_REGISTER || + use->pcode->args[1].arg != RegClass_GPR || + use->pcode->args[1].data.reg.reg != 0 + ) + return 0; + + return use->pcode->args[2].data.reg.reg == addiInstr->args[0].data.reg.reg; +} + +static int checkalluses(LocalVectorArray *arrays, int addiID) { + RegUseOrDef *list; + PCode *instr; + LocalVectorArray *array; + + instr = ADDIs[addiID].instr; + for (list = ADDIs[addiID].list; list; list = list->next) { + if (list && !checkvectorstoreorload(addiID, list->id)) { + array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj); + array->invalid = 1; + return 0; + } + } + + return 1; +} + +static void convert_array_to_register(LocalVectorArray *arrays, int addiID) { + ADDI *addi; + int newReg; + RegUseOrDef *list; + PCode *instr; + PCode *useInstr; + LocalVectorArray *array; + int element; + + addi = ADDIs + addiID; + + if (!(instr = addi->instr)) + return; + + if ( + !(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) || + array->invalid + ) + return; + + element = instr->args[2].data.mem.offset / 16; + if (element > array->elementCount) + return; + + newReg = array->elements[element]; + for (list = addi->list; list; list = list->next) { + useInstr = Uses[list->id].pcode; + if (useInstr->op == PC_LVX) { + converted_arrays = 1; + change_opcode(useInstr, PC_VMR); + change_num_operands(useInstr, 2); + useInstr->args[1].kind = PCOp_REGISTER; + useInstr->args[1].arg = RegClass_VR; + useInstr->args[1].data.reg.reg = newReg; + useInstr->args[1].data.reg.effect = EffectRead; + } else if (useInstr->op == PC_STVX) { + converted_arrays = 1; + change_opcode(useInstr, PC_VMR); + change_num_operands(useInstr, 2); + useInstr->args[1] = useInstr->args[0]; + useInstr->args[0].kind = PCOp_REGISTER; + useInstr->args[0].arg = RegClass_VR; + useInstr->args[0].data.reg.reg = newReg; + useInstr->args[0].data.reg.effect = EffectWrite; + } else { + CError_FATAL(661); + } + } + deletepcode(addi->instr); +} + +static void convert_arrays_to_registers(LocalVectorArray *arrays) { + int i; + int counter; + LocalVectorArray **ptr; + LocalVectorArray *array; + + for (i = 0; i < number_of_ADDIs; i++) + checkalluses(arrays, i); + + counter = 0; + ptr = &arrays; + array = *ptr; + while (array) { + if (array->invalid) { + *ptr = array->next; + array = *ptr; + continue; + } + + counter += array->elementCount; + + for (i = 0; i < array->elementCount; i++) + array->totalUses += array->elements[i]; + + array = array->next; + } + + if (arrays) { + while (counter > 32) { + LocalVectorArray *best; + int score; + score = 0; + best = NULL; + for (array = arrays; array; array = array->next) { + if (best) { + if (array->totalUses < score) { + score = array->totalUses; + best = array; + } + } else { + best = array; + score = array->totalUses; + } + } + + if (!best) + break; + + if (best == arrays) { + arrays = best->next; + } else { + for (array = arrays; array; array = array->next) { + if (array->next == best) { + array->next = best->next; + break; + } + } + } + + counter -= best->elementCount; + } + + if (!(array = arrays)) + return; + + while (array) { + for (i = 0; i < array->elementCount; i++) + array->elements[i] = used_virtual_registers[RegClass_VR]++; + array = array->next; + } + + if (arrays) { + for (i = 0; i < number_of_ADDIs; i++) + convert_array_to_register(arrays, i); + } + } +} + +int vectorarraystoregs(void) { + LocalVectorArray *arrays; + + converted_arrays = 0; + if ((arrays = scanforlocalvectorarrays())) { + scaninstructions(arrays); + if (number_of_ADDIs > 0) { + computeusedefchains(0); + computeaddilist(arrays); + allocatevectorpropinfo(); + computelocalvectorpropinfo(arrays); + computedepthfirstordering(); + computeglobalvectorpropinfo(); + convert_arrays_to_registers(arrays); + } + } + + freeoheap(); + return converted_arrays; +} diff --git a/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/FuncLevelAsmPPC.c b/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/FuncLevelAsmPPC.c new file mode 100644 index 0000000..340a54b --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/FuncLevelAsmPPC.c @@ -0,0 +1,393 @@ +#include "compiler/FuncLevelAsmPPC.h" +#include "compiler/CCompiler.h" +#include "compiler/CError.h" +#include "compiler/CFunc.h" +#include "compiler/CMangler.h" +#include "compiler/CParser.h" +#include "compiler/CPrepTokenizer.h" +#include "compiler/CodeGen.h" +#include "compiler/Coloring.h" +#include "compiler/CompilerTools.h" +#include "compiler/DumpIR.h" +#include "compiler/InlineAsmPPC.h" +#include "compiler/InlineAsmRegisters.h" +#include "compiler/ObjGenMachO.h" +#include "compiler/PCode.h" +#include "compiler/PCodeAssembly.h" +#include "compiler/PCodeListing.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/PPCError.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StackFrame.h" +#include "compiler/TOC.h" +#include "compiler/objects.h" + +static EntryPoint *entrypoints_head; +static EntryPoint **entrypoints_tail; + +void setup_assembly_argument(Object *obj, short reg) { + VarInfo *vi; + Type *type; + + vi = Registers_GetVarInfo(obj); + type = obj->type; + vi->used = 1; + + if (!requires_frame) { + if (is_register_object(obj)) { + if (!reg) + CError_Error(CErrorStr263, obj->name->name); + + if (TYPE_IS_8BYTES(type)) { + short regLo; + short regHi; + if (reg < 10) { + if (copts.littleendian) { + regLo = reg; + regHi = reg + 1; + } else { + regLo = reg + 1; + regHi = reg; + } + retain_GPR_pair(obj, regLo, regHi); + InlineAsm_InsertRegister(obj->name->name, RegClass_GPR, regLo, obj); + } + } else if (IS_TYPE_FLOAT(type)) { + retain_register(obj, RegClass_FPR, reg); + InlineAsm_InsertRegister(obj->name->name, RegClass_FPR, reg, obj); + } else if (IS_TYPE_VECTOR(type)) { + retain_register(obj, RegClass_VR, reg); + InlineAsm_InsertRegister(obj->name->name, RegClass_VR, reg, obj); + } else { + retain_register(obj, RegClass_GPR, reg); + InlineAsm_InsertRegister(obj->name->name, RegClass_GPR, reg, obj); + } + } + } else { + if (is_register_object(obj)) { + vi = Registers_GetVarInfo(obj); + if (!vi->reg) { + assign_register_by_type(obj); + if (!(vi->flags & VarInfoFlag2)) + CError_Error(CErrorStr263, obj->name->name); + else + InlineAsm_InsertRegister(obj->name->name, vi->rclass, vi->reg, obj); + } + } + } +} + +void assign_local_addresses(void) { + VarInfo *vi; + ObjectList *list; + Object *object; + + for (list = locals; list; list = list->next) { + vi = CodeGen_GetNewVarInfo(); + list->object->u.var.info = vi; + list->object->flags |= OBJECT_USED; + vi->used = 1; + } + + for (list = locals; list; list = list->next) { + object = list->object; + if (is_register_object(object)) { + vi = Registers_GetVarInfo(object); + if (!vi->reg) { + assign_register_by_type(object); + if (!(vi->flags & VarInfoFlag2)) + CError_Error(CErrorStr263, object->name->name); + else + InlineAsm_InsertRegister(object->name->name, vi->rclass, vi->reg, object); + } + } + } + + for (list = locals; list; list = list->next) { + object = list->object; + if (OBJECT_REG(object) == 0) + assign_local_memory(object); + } +} + +static void FuncAsm_PreScanDirectives(void) { + SInt32 directive; + Boolean save_eoltokens; + + in_assembler = 1; + save_eoltokens = cprep_eoltokens; + cprep_eoltokens = 1; + + if (setjmp(InlineAsm_assemblererror) == 0) { + while (tk == TK_IDENTIFIER && (directive = InlineAsm_IsDirective(AssemblerType_1))) { + InlineAsm_ProcessDirective(directive); + + if (tk == ';' || tk == TK_EOL) { + CPrep_TokenStreamFlush(); + tk = lex(); + } else { + InlineAsm_SyntaxError(CErrorStr113); + } + + if (directive == IADirective_FrAlloc) { + requires_frame = 1; + break; + } else if (directive == IADirective_NoFrAlloc) { + user_responsible_for_frame = 1; + break; + } + } + } + + in_assembler = 0; + cprep_eoltokens = save_eoltokens; +} + +static void FuncAsm_AddEntryPoint(Statement *stmt, PCodeBlock *block) { + EntryPoint *ep; + IAEntryPoint *ia_ep; + + ia_ep = (IAEntryPoint *) stmt->expr; + ep = lalloc(sizeof(EntryPoint)); + memclrw(ep, sizeof(EntryPoint)); + + ep->object = ia_ep->x8; + ep->block = block; + + *entrypoints_tail = ep; + entrypoints_tail = &ep->next; + + block->flags |= fPCBlockFlag8000; +} + +void Assembler(Object *func) { + PCodeBlock *block; + Statement *stmt; + Boolean flag17; + Boolean flag16; + char *name; + InlineAsm *ia; + Boolean save_unusedvar; + Boolean save_unusedarg; + + flag17 = 0; + flag16 = 0; + + init_endian(); + init_stack_globals(func); + memclrw(asm_alloc_flags, sizeof(asm_alloc_flags)); + fralloc_parameter_area_size = 0; + user_responsible_for_frame = 0; + assembledinstructions = 0; + + entrypoints_head = NULL; + entrypoints_tail = &entrypoints_head; + + stmt = curstmt; + + if (func && func->name) + PrintProgressFunction(func->name->name); + + CodeGen_InitialSanityCheck(); + + if (func->qual & Q_INLINE) + PPCError_Warning(PPCErrorStr173); + + CheckCLabels(); + + if (fatalerrors) + return; + + if (copts.filesyminfo) + CPrep_SetSourceFile(&cparser_fileoffset); + + sm_section = SECT_TEXT; + + initpcode(); + + pclabel(prologue = makepcblock(), makepclabel()); + pclabel(block = makepcblock(), makepclabel()); + pcbranch(prologue, block->labels); + + resetTOCvarinfo(); + InlineAsm_InitializePPC(); + FuncAsm_PreScanDirectives(); + + disable_optimizer = 1; + + init_registers(); + assign_arguments_to_memory(func, 0, 0); + init_frame_sizes(0); + + if (copts.debuglisting) + DumpIR(stmt, func); + + cprep_eoltokens = 1; + in_assembler = 1; + + save_unusedvar = copts.warn_unusedvar; + save_unusedarg = copts.warn_unusedarg; + copts.warn_unusedvar = 0; + copts.warn_unusedarg = 0; + + InlineAsm_ScanFunction('}'); + + expandTOCreferences(&stmt->next); + + if (!anyerrors && copts.debuglisting) + DumpIR(stmt, func); + + in_assembler = 0; + cprep_eoltokens = 0; + + name = CMangler_GetLinkName(func)->name; + func->flags |= OBJECT_DEFINED; + + if (fralloc_parameter_area_size) + update_out_param_size(fralloc_parameter_area_size); + if (!user_responsible_for_frame) + process_arguments(move_assigned_argument, 0); + + branch_label(makepclabel()); + assign_labels(stmt->next); + + copts.warn_unusedvar = save_unusedvar; + copts.warn_unusedarg = save_unusedarg; + + for (stmt = stmt->next; stmt; stmt = stmt->next) { + current_statement = stmt; + switch (stmt->type) { + case ST_ASM: + if ((ia = (InlineAsm *) stmt->expr)) { + if (ia->flags & IAFlag1) { + if (ia->opcode == IADirective_Entry) { + branch_label(makepclabel()); + FuncAsm_AddEntryPoint(stmt, pclastblock); + } else if (ia->opcode == IADirective_FrFree) { + if (flag16) + PPCError_Error(PPCErrorStr188); + else + flag16 = 1; + + asm_alloc_flags[3] = 1; + asm_alloc_flags[4] = 1; + branch_label(makepclabel()); + + epilogue = pclastblock; + pclastblock->flags |= fIsEpilogue; + + CheckCLabels(); + if (fatalerrors) + return; + + pccomputepredecessors(); + if (copts.debuglisting) + pclistblocks(name, "[FUNCTION-LEVEL ASM] INITIAL CODE"); + colorinstructions(func); + if (copts.debuglisting) + pclistblocks(name, "[FUNCTION-LEVEL ASM] AFTER REGISTER COLORING"); + compute_frame_sizes(); + generate_prologue(prologue, 0); + epilogue = pclastblock; + generate_epilogue(epilogue, 0); + if (copts.debuglisting) + pclistblocks(name, "[FUNCTION-LEVEL ASM] AFTER PROLOGUE/EPILOGUE CREATION"); + + flag17 = 1; + } + } else { + branch_label(makepclabel()); + asm_alloc_flags[6] = 0; + asm_alloc_flags[7] = 0; + InlineAsm_TranslateIRtoPCode(stmt); + asm_alloc_flags[4] = 0; + } + } + break; + case ST_LABEL: + if (!stmt->label->pclabel->resolved) + branch_label(stmt->label->pclabel); + break; + default: + CError_FATAL(525); + } + } + + current_statement = NULL; + + if (fatalerrors) + return; + CheckCLabels(); + if (fatalerrors) + return; + + if (!flag17) { + branch_label(makepclabel()); + + epilogue = pclastblock; + pclastblock->flags |= fIsEpilogue; + + pccomputepredecessors(); + if (copts.debuglisting) + pclistblocks(name, "[FUNCTION-LEVEL ASM] INITIAL CODE"); + + if (!asm_alloc_flags[1]) { + colorinstructions(func); + if (fatalerrors) + return; + + if (copts.debuglisting) + pclistblocks(name, "[FUNCTION-LEVEL ASM] AFTER REGISTER COLORING"); + } + + compute_frame_sizes(); + if (asm_alloc_flags[1]) + no_frame_for_asm(); + + if (fatalerrors) + return; + + if (!asm_alloc_flags[1]) { + generate_prologue(prologue, 0); + generate_epilogue(epilogue, !asm_alloc_flags[6] && !asm_alloc_flags[7]); + } + + if (copts.debuglisting) + pclistblocks(name, "[FUNCTION-LEVEL ASM] AFTER PROLOGUE/EPILOGUE CREATION"); + } + + if (fatalerrors) + return; + + if (!asm_alloc_flags[1] && needs_frame()) { + if (asm_alloc_flags[3]) { + if (!asm_alloc_flags[5] || !asm_alloc_flags[6]) + PPCError_Warning(PPCErrorStr187, "blr"); + if (asm_alloc_flags[8]) + PPCError_Warning(PPCErrorStr186); + } else { + PPCError_Warning(PPCErrorStr185, "blr"); + } + } + + func->section = sm_section; + + if (copts.filesyminfo) + symdeclend = CPrep_GetFileOffsetInfo(&cparser_fileoffset); + + copts.peephole = 0; + if (pic_base_label) + pic_base_pcodelabel = pic_base_label->pclabel; + assemblefunction(func, entrypoints_head); + + if (copts.debuglisting) + pclistblocks(CMangler_GetLinkName(func)->name, "[FUNCTION-LEVEL ASM] FINAL CODE"); + + CFunc_WarnUnused(); +} + +void SetupAssembler(void) { +} + +void CleanupAssembler(void) { +} diff --git a/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/GCCInlineAsm.c b/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/GCCInlineAsm.c new file mode 100644 index 0000000..897df9b --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/GCCInlineAsm.c @@ -0,0 +1,230 @@ +#include "compiler/GCCInlineAsm.h" +#include "compiler/CError.h" +#include "compiler/CExpr.h" +#include "compiler/CFunc.h" +#include "compiler/CInt64.h" +#include "compiler/CParser.h" +#include "compiler/CPrep.h" +#include "compiler/CPrepTokenizer.h" +#include "compiler/InlineAsm.h" +#include "compiler/objects.h" + +Statement *first_ST_ASM; +IALookupResult gcc_name_list[20]; +int gcc_name_list_index; + +void InlineAsm_SkipComment(void) { + while (1) { + if (tk != '/') + break; + if (lookahead() != '*') + break; + + tk = lex(); + while (!((tk = lex()) == '*' && (tk = lex()) == '/')) { + // nothing + } + tk = lex(); + } +} + +static char gcc_parse_attribute(void) { + char ch; + + while (tk == TK_EOL) + tk = lex(); + + if (tk != '"') + CError_Error(CErrorStr105); + + while ((tk = lex()) != TK_IDENTIFIER) { + // nothing + } + + ch = tkidentifier->name[0]; + + if ((tk = lex()) != '"') + CError_Error(CErrorStr105); + tk = lex(); + + return ch; +} + +static void gcc_parse_name(Boolean flag, char attribute) { + IALookupResult *nameentry; + ENode *expr; + Object *tempobj; + ENode *tempexpr; + Statement *stmt; + + while (tk == TK_EOL) + tk = lex(); + + if (tk != '(') + CError_Error(CErrorStr114); + + tk = lex(); + + if (flag) { + if (tk != TK_IDENTIFIER) + CError_Error(CErrorStr105); + + InlineAsm_LookupSymbol(tkidentifier, &gcc_name_list[++gcc_name_list_index]); + if (gcc_name_list[gcc_name_list_index].object && gcc_name_list[gcc_name_list_index].object->u.var.info) + gcc_name_list[gcc_name_list_index].object->u.var.info->used = 1; + + tk = lex(); + } else { + in_assembler = 0; + cprep_nostring = 0; + nameentry = &gcc_name_list[++gcc_name_list_index]; + + expr = expression(); + if (attribute == 'i' || attribute == 'I') { + if (!ENODE_IS(expr, EINTCONST)) + CError_Error(CErrorStr144); + nameentry->value = CInt64_GetULong(&expr->data.intval); + nameentry->has_value = 1; + } else { + tempobj = create_temp_object(expr->rtype); + tempexpr = create_objectnode(tempobj); + if (tempobj->u.var.info) + tempobj->u.var.info->used = 1; + expr = makediadicnode(tempexpr, expr, EASS); + + stmt = CFunc_InsertBeforeStatement(ST_EXPRESSION, first_ST_ASM); + first_ST_ASM = stmt->next; + if (!first_ST_ASM->next) + curstmt = first_ST_ASM; + stmt->expr = expr; + + nameentry->name = tempobj->name; + nameentry->object = tempobj; + nameentry->label = NULL; + nameentry->type = NULL; + nameentry->has_value = 0; + } + } + + cprep_nostring = 1; + in_assembler = 1; + + if (tk != ')') + CError_Error(CErrorStr115); + tk = lex(); +} + +static void gcc_parse_expression(Boolean flag) { + while (1) { + gcc_parse_name(flag, gcc_parse_attribute()); + if (tk != ',') + break; + tk = lex(); + } +} + +static void gcc_parse_input(void) { + if (tk == ':') { + if ((tk = lex()) == ':' || tk == ')' || tk == '}') + return; + gcc_parse_expression(0); + } +} + +static void gcc_parse_output(void) { + if (tk == ':') { + if ((tk = lex()) == ':' || tk == ')' || tk == '}') + return; + gcc_parse_expression(1); + } +} + +static void gcc_parse_killed(void) { + if (tk == ':') { + while (1) { + if ((tk = lex()) != '"') + return; + + tk = lex(); + while (1) { + if (tk == '"') { + if (lookahead() == ',') { + tk = lex(); + break; + } + tk = lex(); + return; + } + tk = lex(); + } + } + } +} + +static void gcc_replace_arg_st_asm(Statement *stmt) { + InlineAsm *ia; + int i; + IAOperand *op; + short effect; + short rclass; + SInt32 num; + + if ((ia = (InlineAsm *) stmt->expr)) { + for (i = 0, op = ia->args; i < ia->argcount; i++, op++) { + switch (op->type) { + case IAOpnd_Imm: + case IAOpnd_Reg: + case IAOpnd_3: + case IAOpnd_4: + case IAOpnd_Lab: + break; + + case IAOpnd_6: + if (op->u.unk6.unk4 == 2) { + effect = op->u.unk6.effect; + rclass = op->u.unk6.rclass; + num = op->u.unk6.num; + op->type = IAOpnd_Reg; + op->u.reg.effect = effect; + op->u.reg.rclass = rclass; + op->u.reg.object = NULL; + if (num <= gcc_name_list_index) + op->u.reg.object = gcc_name_list[num].object; + else + CError_Error(CErrorStr144); + op->u.reg.num = 0; + } else { + CError_FATAL(365); + } + break; + + case IAOpnd_7: + op->type = IAOpnd_Imm; + op->u.imm.value = gcc_name_list[op->u.unk7.value].value; + break; + } + } + } +} + +static void gcc_replace_arg(void) { + Statement *stmt; + + for (stmt = first_ST_ASM; stmt; stmt = stmt->next) { + if (stmt->type == ST_ASM) + gcc_replace_arg_st_asm(stmt); + } +} + +void InlineAsm_gcc_parse(void) { + gcc_name_list_index = -1; + cprep_eoltokens = 0; + + if (tk == TK_EOL) + tk = lex(); + + gcc_parse_output(); + gcc_parse_input(); + gcc_parse_killed(); + gcc_replace_arg(); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/InlineAsm.c b/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/InlineAsm.c new file mode 100644 index 0000000..46a95d2 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/InlineAsm.c @@ -0,0 +1,680 @@ +#include "compiler/InlineAsm.h" +#include "compiler/InlineAsmPPC.h" +#include "compiler/GCCInlineAsm.h" +#include "compiler/CompilerTools.h" +#include "compiler/CError.h" +#include "compiler/CExpr.h" +#include "compiler/CFunc.h" +#include "compiler/CInit.h" +#include "compiler/CInline.h" +#include "compiler/CInt64.h" +#include "compiler/CMachine.h" +#include "compiler/COptimizer.h" +#include "compiler/CParser.h" +#include "compiler/CPrep.h" +#include "compiler/CPrepTokenizer.h" +#include "compiler/CScope.h" +#include "compiler/PCode.h" +#include "compiler/Registers.h" +#include "compiler/objects.h" +#include "compiler/scopes.h" +#include "compiler/types.h" + +int allow_array_expressions = 1; + +int backtracking; +jmp_buf backtrack; +jmp_buf InlineAsm_assemblererror; +static int ASMstmtnb; + +void AssemblerError(void) { + longjmp(InlineAsm_assemblererror, 1); +} + +void InlineAsm_SyntaxError(short code) { + if (backtracking) + longjmp(backtrack, 1); + + if (tk == TK_EOL || tk == ';') + code = CErrorStr112; + CError_Error(code); +} + +CLabel *InlineAsm_LookupLabel(HashNameNode *name) { + CLabel *label; + + for (label = Labels; label; label = label->next) { + if (name == label->name) + break; + } + + return label; +} + +CLabel *InlineAsm_DeclareLabel(HashNameNode *name) { + CLabel *label = newlabel(); + label->name = name; + label->next = Labels; + Labels = label; + return label; +} + +static void InlineAsm_DefineLabel(HashNameNode *name) { + CLabel *label; + Statement *stmt; + + label = InlineAsm_LookupLabel(name); + if (!label) { + label = InlineAsm_DeclareLabel(name); + } else { + if (label->stmt) + CError_Error(CErrorStr171, name->name); + } + + stmt = CFunc_AppendStatement(ST_LABEL); + stmt->label = label; + label->stmt = stmt; +} + +Boolean InlineAsm_LookupSymbolOrTag(HashNameNode *name, IALookupResult *result, Boolean allow_tag) { + ObjBase *obj; + NameSpace *nspace; + NameSpaceObjectList *list; + + result->name = name; + result->object = NULL; + result->label = NULL; + result->type = NULL; + result->has_value = 0; + + if ((result->label = InlineAsm_LookupLabel(name))) + return 1; + + for (nspace = cscope_current; nspace; nspace = nspace->parent) { + if ((list = CScope_FindName(nspace, name))) { + obj = list->object; + switch (obj->otype) { + case OT_ENUMCONST: + result->has_value = 1; + result->value = OBJ_ENUM_CONST(list->object)->val.lo; + return 1; + case OT_OBJECT: + if (OBJECT(obj)->datatype == DABSOLUTE) { + result->has_value = 1; + result->value = OBJECT(obj)->u.address; + } else { + if (OBJECT(obj)->datatype == DDATA && (OBJECT(obj)->qual & Q_INLINE_DATA)) + CInit_ExportConst(OBJECT(obj)); + result->object = OBJECT(obj); + } + return 1; + case OT_TYPE: + result->type = OBJ_TYPE(obj)->type; + return 1; + case OT_TYPETAG: + if (allow_tag) { + result->type = OBJ_TYPE_TAG(obj)->type; + return 1; + } + case OT_NAMESPACE: + case OT_MEMBERVAR: + return 0; + default: + CError_FATAL(245); + } + } + } + + return 0; +} + +Boolean InlineAsm_LookupSymbol(HashNameNode *name, IALookupResult *result) { + return InlineAsm_LookupSymbolOrTag(name, result, 0); +} + +static ObjMemberVar *isclassmember(TypeClass *tclass, HashNameNode *name) { + NameSpaceObjectList *list; + + list = CScope_FindName(tclass->nspace, name); + return (list && list->object->otype == OT_MEMBERVAR) ? OBJ_MEMBER_VAR(list->object) : NULL; +} + +SInt32 InlineAsm_StructMemberOffset(Type *type) { + StructMember *member; + ObjMemberVar *ivar; + SInt32 offset = 0; + + do { + if (IS_TYPE_STRUCT(type)) { + tk = lex(); + if (tk != TK_IDENTIFIER) + InlineAsm_SyntaxError(CErrorStr107); + member = ismember(TYPE_STRUCT(type), tkidentifier); + if (!member) + CError_Error(CErrorStr150, tkidentifier->name); + offset += member->offset; + type = member->type; + tk = lex(); + } else if (IS_TYPE_CLASS(type)) { + tk = lex(); + if (tk != TK_IDENTIFIER) + InlineAsm_SyntaxError(CErrorStr107); + ivar = isclassmember(TYPE_CLASS(type), tkidentifier); + if (!ivar) + CError_Error(CErrorStr150, tkidentifier->name); + offset += ivar->offset; + type = ivar->type; + tk = lex(); + } else { + CError_Error(CErrorStr149); + } + } while (tk == '.'); + + return offset; +} + +SInt32 InlineAsm_StructArrayMemberOffset(Type *type) { + StructMember *member; + ObjMemberVar *ivar; + SInt32 offset = 0; + + do { + if (tk == '.') { + if (IS_TYPE_STRUCT(type)) { + tk = lex(); + if (tk != TK_IDENTIFIER) + InlineAsm_SyntaxError(CErrorStr107); + member = ismember(TYPE_STRUCT(type), tkidentifier); + if (!member) + CError_Error(CErrorStr150, tkidentifier->name); + offset += member->offset; + type = member->type; + tk = lex(); + } else if (IS_TYPE_CLASS(type)) { + tk = lex(); + if (tk != TK_IDENTIFIER) + InlineAsm_SyntaxError(CErrorStr107); + ivar = isclassmember(TYPE_CLASS(type), tkidentifier); + if (!ivar) + CError_Error(CErrorStr150, tkidentifier->name); + offset += ivar->offset; + type = ivar->type; + tk = lex(); + } else { + CError_Error(CErrorStr149); + } + } else { + if (IS_TYPE_ARRAY(type)) { + type = TPTR_TARGET(type); + tk = lex(); + offset += type->size * InlineAsm_ConstantExpression(); + if (tk != ']') + InlineAsm_SyntaxError(125); + tk = lex(); + } else { + CError_Error(CErrorStr148); + } + } + } while (tk == '.' || tk == '['); + + return offset; +} + +SInt32 InlineAsm_StructPointerMemberOffset(Type *type) { + StructMember *member; + ObjMemberVar *ivar; + SInt32 offset; + + tk = lex(); + if (tk != TK_IDENTIFIER) + InlineAsm_SyntaxError(107); + + if (IS_TYPE_STRUCT(type)) { + member = ismember(TYPE_STRUCT(type), tkidentifier); + if (!member) + CError_Error(CErrorStr150, tkidentifier->name); + offset = member->offset; + type = member->type; + } else { + ivar = isclassmember(TYPE_CLASS(type), tkidentifier); + if (!ivar) + CError_Error(CErrorStr150, tkidentifier->name); + offset = ivar->offset; + type = ivar->type; + } + + tk = lex(); + if (tk == '.' || tk == '[') + offset += InlineAsm_StructArrayMemberOffset(type); + + return offset; +} + +static SInt32 DiadicOperator(SInt32 left, short op, SInt32 right) { + CInt64 left64; + CInt64 right64; + CInt64_SetLong(&left64, left); + CInt64_SetLong(&right64, right); + right64 = CMach_CalcIntDiadic(TYPE(&stsignedint), left64, op, right64); + return CInt64_GetULong(&right64); +} + +static SInt32 PrimaryExpression(void) { + IALookupResult result; + SInt32 value; + + switch (tk) { + case TK_IDENTIFIER: + if (InlineAsm_LookupSymbol(tkidentifier, &result)) { + if (result.has_value) { + tk = lex(); + return result.value; + } + + if (result.type && (IS_TYPE_STRUCT(result.type) || IS_TYPE_CLASS(result.type))) { + tk = lex(); + if (tk != '.') + InlineAsm_SyntaxError(120); + if (allow_array_expressions) + return InlineAsm_StructArrayMemberOffset(result.type); + else + return InlineAsm_StructMemberOffset(result.type); + } else { + InlineAsm_SyntaxError(124); + } + } else { + InlineAsm_SyntaxError(124); + } + break; + case TK_INTCONST: + value = tkintconst.lo; + tk = lex(); + return value; + case TK_SIZEOF: + return scansizeof(); + case '+': + tk = lex(); + return PrimaryExpression(); + case '-': + tk = lex(); + return -PrimaryExpression(); + case '!': + tk = lex(); + return PrimaryExpression() == 0; + case '~': + tk = lex(); + return ~PrimaryExpression(); + case '(': + tk = lex(); + value = InlineAsm_ConstantExpression(); + if (tk != ')') + InlineAsm_SyntaxError(115); + tk = lex(); + return value; + default: + InlineAsm_SyntaxError(120); + } + + return 0; +} + +static SInt32 ConstantExpressionTail(SInt32 value) { + SInt32 right; + short left_token; + short right_prec; + + while (1) { + left_token = tk; + tk = lex(); + right = PrimaryExpression(); + + right_prec = GetPrec(tk); + if (right_prec == 0) + return DiadicOperator(value, left_token, right); + + if (GetPrec(left_token) >= right_prec) { + value = DiadicOperator(value, left_token, right); + } else { + value = DiadicOperator(value, left_token, ConstantExpressionTail(right)); + if (GetPrec(tk) == 0) + return value; + } + } +} + +SInt32 InlineAsm_ConstantExpression(void) { + SInt32 value = PrimaryExpression(); + + if (GetPrec(tk) == 0) + return value; + else + return ConstantExpressionTail(value); +} + +HashNameNode *MakeLocalLabel(CInt64 num) { + char buf[80]; + sprintf(buf, "@%i_%i", ASMstmtnb, CInt64_GetULong(&num)); + return GetHashNameNodeExport(buf); +} + +static void ScanOptionalLabel(void) { + if (tk == TK_INTCONST) { + if (lookahead() == ':') { + InlineAsm_DefineLabel(MakeLocalLabel(tkintconst)); + tk = lex(); + tk = lex(); + } + } else { + if (tkidentifier->name[0] == '@') { + InlineAsm_DefineLabel(tkidentifier); + tk = lex(); + if (tk == ':') + tk = lex(); + } else { + HashNameNode *name = tkidentifier; + short t = lookahead(); + tkidentifier = name; + if (t == ':') { + InlineAsm_DefineLabel(name); + tk = lex(); + tk = lex(); + } + } + } +} + +static void ScanStatements(volatile short endToken, AssemblerType mode) { + if (setjmp(InlineAsm_assemblererror)) { + while (tk != TK_EOL && tk != endToken && tk != '}' && tk) + tk = lex(); + if (tk == ';' || tk == TK_EOL) + tk = lex(); + } else { + InlineAsm_Initialize(mode); + InlineAsm_gccmode = 0; + if (setjmp(InlineAsm_assemblererror)) { + while (tk != ';' && tk != TK_EOL && tk != endToken && tk != '}' && tk) + tk = lex(); + if (tk == ';' || tk == TK_EOL) + tk = lex(); + } + + while (tk && tk != endToken) { + backtracking = 0; + sourceoffset = CPrep_GetFileOffsetInfo(&cparser_fileoffset); + if (tk == '"') { + if (InlineAsm_gccmode) { + tk = lex(); + InlineAsm_gcc_parse(); + } else { + InlineAsm_gccmode = 1; + copts.cplusplus = 0; + copts.asmpoundcomment = 1; + tk = lex(); + } + } + + if (tk == '.') { + InlineAsm_ScanAssemblyDirective(); + } else if (tk == TK_IDENTIFIER) { + ScanOptionalLabel(); + if (tk == TK_IDENTIFIER) + InlineAsm_ScanAssemblyInstruction(); + } else if (tk == TK_INTCONST) { + ScanOptionalLabel(); + if (tk == TK_IDENTIFIER) + InlineAsm_ScanAssemblyInstruction(); + } + + if (InlineAsm_gccmode && tk == '"') { + tk = lex(); + InlineAsm_gcc_parse(); + } + + if (tk == ';' || tk == TK_EOL) { + CPrep_TokenStreamFlush(); + tk = lex(); + } else if (tk != endToken) { + if (endToken == ')') + CError_Error(CErrorStr115); + else + CError_Error(CErrorStr113); + } + } + } +} + +void InlineAsm_ScanStatements(volatile short endToken) { + ScanStatements(endToken, AssemblerType_0); +} + +void InlineAsm_ScanFunction(volatile short endToken) { + ScanStatements(endToken, AssemblerType_1); +} + +void InlineAsm_Assemble(void) { + short token = (tk == '(') ? ')' : '}'; + char save_pc = copts.asmpoundcomment; + char save_cpp = copts.cplusplus; + + cprep_nostring = 1; + CFunc_AppendStatement(ST_NOP); + first_ST_ASM = curstmt; + ASMstmtnb++; + + cprep_eoltokens = 1; + in_assembler = 1; + tk = lex(); + InlineAsm_ScanStatements(token); + in_assembler = 0; + cprep_eoltokens = 0; + cprep_nostring = 0; + + copts.asmpoundcomment = save_pc; + copts.cplusplus = save_cpp; +} + +void InlineAsm_PackAsmStatement(Statement *stmt, Statement *first, void **output, SInt32 *outsize) { + InlineAsm *src; + InlineAsm *dest; + IAOperand *op; + SInt32 i; + SInt32 size; + + src = (InlineAsm *) stmt->expr; + size = sizeof(InlineAsm) + sizeof(IAOperand) * src->argcount; + dest = galloc(size); + memcpy(dest, src, size); + + for (i = 0, op = dest->args; i < dest->argcount; i++, op++) { + switch (op->type) { + case IAOpnd_0: + break; + case IAOpnd_Reg: + case IAOpnd_4: + op->u.reg.object = (Object *) CInline_GetLocalID(op->u.reg.object); + break; + case IAOpnd_Lab: + op->u.lab.label = (CLabel *) CInline_GetStatementNumber(first, op->u.lab.label->stmt); + break; + case IAOpnd_LabDiff: + op->u.labdiff.label1 = (CLabel *) CInline_GetStatementNumber(first, op->u.labdiff.label1->stmt); + op->u.labdiff.label2 = (CLabel *) CInline_GetStatementNumber(first, op->u.labdiff.label2->stmt); + break; + } + } + + *output = dest; + *outsize = size; +} + +void InlineAsm_UnpackAsmStatement(Statement *stmt, CLabel **labelArray, Boolean flag, void *data, SInt32 size) { + InlineAsm *ia; + IAOperand *op; + SInt32 i; + + ia = galloc(size); + memcpy(ia, data, size); + + for (i = 0, op = ia->args; i < ia->argcount; i++, op++) { + switch (op->type) { + case IAOpnd_0: + break; + case IAOpnd_Reg: + case IAOpnd_4: + op->u.reg.object = CInline_GetLocalObj((SInt32) op->u.reg.object, flag); + break; + case IAOpnd_Lab: + op->u.lab.label = labelArray[(SInt16) op->u.lab.label]; + break; + case IAOpnd_LabDiff: + op->u.labdiff.label1 = labelArray[(SInt16) op->u.labdiff.label1]; + op->u.labdiff.label2 = labelArray[(SInt16) op->u.labdiff.label2]; + break; + } + } + + stmt->expr = (ENode *) ia; +} + +void InlineAsm_CheckLocalUsage(Statement *stmt) { + InlineAsm *ia = (InlineAsm *) stmt->expr; + IAOperand *op; + SInt32 i; + + for (i = 0, op = ia->args; i < ia->argcount; i++, op++) { + switch (op->type) { + case IAOpnd_Reg: + if (op->u.reg.object) + SetVarUsage(op->u.reg.object, 0); + break; + case IAOpnd_4: + SetVarUsage(op->u.obj.obj, 1); + break; + } + } +} + +CLabel *InlineAsm_GetReferencedLabel(Statement *stmt) { + InlineAsm *ia = (InlineAsm *) stmt->expr; + IAOperand *op; + SInt32 i; + + for (i = 0, op = ia->args; i < ia->argcount; i++, op++) { + if (op->type == IAOpnd_Lab) + return op->u.lab.label; + if (op->type == IAOpnd_LabDiff) + return op->u.labdiff.label1; + } + + return NULL; +} + +CLabel *InlineAsm_GetReferencedLabel2(Statement *stmt) { + InlineAsm *ia = (InlineAsm *) stmt->expr; + IAOperand *op; + SInt32 i; + + for (i = 0, op = ia->args; i < ia->argcount; i++, op++) { + if (op->type == IAOpnd_LabDiff) + return op->u.labdiff.label2; + } + + return NULL; +} + +Object *InlineAsm_GetObjectOffset(InlineAsm *ia, SInt32 index, SInt32 *offset) { + IAOperand *op; + SInt32 i; + SInt32 counter; + + for (i = 0, counter = 0, op = ia->args; i < ia->argcount; i++, op++) { + if (op->type == IAOpnd_3) { + if (counter++ == index) { + *offset = ((intptr_t) &op->u.obj.obj) - ((intptr_t) ia); + return op->u.obj.obj; + } + } + } + + return NULL; +} + +char *InlineAsm_DumpStatement(Statement *stmt) { + static char buffer[1024]; + InlineAsm *ia; + IAOperand *arg; + int i; + char ch; + SInt32 offset; + + ia = (InlineAsm *) stmt->expr; + + strcpy(buffer, "\""); + strcat(buffer, InlineAsm_GetMnemonic(ia)); + strcat(buffer, "\""); + + for (i = 0, arg = ia->args; i < ia->argcount; i++, arg++) { + char argbuf[1024]; + + switch (arg->type) { + case IAOpnd_Imm: + sprintf(argbuf, " imm(%ld)", arg->u.imm.value); + break; + case IAOpnd_Reg: + ch = ' '; + if (arg->u.reg.effect & EffectWrite) { + if (arg->u.reg.effect & EffectRead) + ch = '+'; + else + ch = '='; + } else { + if (!(arg->u.reg.effect & EffectRead)) + ch = '0'; + } + + if (arg->u.reg.object) { + sprintf(argbuf, + "%creg(%s)", + ch, + arg->u.reg.object->name->name); + } else { + sprintf(argbuf, + "%creg(%s%d)", + ch, + register_class_name[arg->u.reg.rclass], + arg->u.reg.num); + } + break; + + case IAOpnd_3: + case IAOpnd_4: + if (arg->u.obj.offset > 0) + sprintf(argbuf, " obj(%s+%ld)", arg->u.obj.obj->name->name, arg->u.obj.offset); + else if (arg->u.obj.offset < 0) + sprintf(argbuf, " obj(%s-%ld)", arg->u.obj.obj->name->name, -arg->u.obj.offset); + else + sprintf(argbuf, " obj(%s)", arg->u.obj.obj->name->name); + break; + + case IAOpnd_Lab: + sprintf(argbuf, " lab(%s)", arg->u.lab.label->uniquename->name); + break; + + case IAOpnd_LabDiff: + offset = !arg->negated ? 0 : arg->u.labdiff.offset; + sprintf(argbuf, + " labdiff(%s-%s%c%d)", + arg->u.labdiff.label1->uniquename->name, + arg->u.labdiff.label2->uniquename->name, + (arg->negated == 1) ? '-' : '+', + offset + ); + break; + } + + strcat(buffer, argbuf); + } + + return buffer; +} diff --git a/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/InlineAsmPPC.c b/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/InlineAsmPPC.c new file mode 100644 index 0000000..464f9f9 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/InlineAssembler/InlineAsmPPC.c @@ -0,0 +1,2586 @@ +#include "compiler/InlineAsmPPC.h" +#include "compiler/CError.h" +#include "compiler/CExpr.h" +#include "compiler/CInt64.h" +#include "compiler/CFunc.h" +#include "compiler/CMachine.h" +#include "compiler/CParser.h" +#include "compiler/CPrep.h" +#include "compiler/CPrepTokenizer.h" +#include "compiler/Alias.h" +#include "compiler/CodeGen.h" +#include "compiler/CodeGenOptPPC.h" +#include "compiler/CompilerTools.h" +#include "compiler/Exceptions.h" +#include "compiler/FuncLevelAsmPPC.h" +#include "compiler/InlineAsm.h" +#include "compiler/InlineAsmMnemonicsPPC.h" +#include "compiler/InlineAsmRegisters.h" +#include "compiler/InlineAsmRegistersPPC.h" +#include "compiler/PCode.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/PPCError.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StackFrame.h" +#include "compiler/TOC.h" +#include "compiler/objects.h" + +char asm_alloc_flags[10]; +Section sm_section; +UInt32 cpu; +SInt32 fralloc_parameter_area_size; +Boolean user_responsible_for_frame; +Boolean supports_hardware_fpu; +UInt32 assembledinstructions; +AssemblerType assembler_type; +char volatileasm; +Boolean InlineAsm_gccmode; +Boolean InlineAsm_labelref; +CLabel *pic_base_label; + +// forward decls +static SInt32 InlineAsm_ConstantExpressionPPC(SInt32 value); +static Object *isvariableoperand(void); +static Object *isregisterstructpointeroperand(void); +static void registeroperand(IAOperand *op, char rclass, short effect); +static void DiadicOperatorPPC(IAExpr *left, short token, IAExpr *right); +static void InlineAsm_ExpressionPPC(IAExpr *expr, SInt32 value); +static void savepicbase(short reg, HashNameNode *name); + +CW_INLINE SInt32 ExtractValue(CInt64 value) { + return (SInt32) CInt64_GetULong(&value); +} + +static void IllegalObjectOperator(HashNameNode *name1, HashNameNode *name2, short token) { + char *opstr; + switch (token) { + case '*': opstr = "*"; break; + case '/': opstr = "/"; break; + case '%': opstr = "%"; break; + case '+': opstr = "+"; break; + case '-': opstr = "-"; break; + case TK_SHL: opstr = "<<"; break; + case TK_SHR: opstr = ">>"; break; + case '<': opstr = "<"; break; + case '>': opstr = ">"; break; + // bug? these two seem swapped + case TK_LESS_EQUAL: opstr = ">="; break; + case TK_GREATER_EQUAL: opstr = "<="; break; + case TK_LOGICAL_EQ: opstr = "=="; break; + case TK_LOGICAL_NE: opstr = "!="; break; + case '&': opstr = "&"; break; + case '^': opstr = "^"; break; + case '|': opstr = "|"; break; + case TK_LOGICAL_AND: opstr = "&&"; break; + case TK_LOGICAL_OR: opstr = "||"; break; + default: opstr = "???"; + } + + if (!name2) { + PPCError_Error(PPCErrorStr119, opstr, name1->name); + } else if (!name1) { + PPCError_Error(PPCErrorStr120, opstr, name2->name); + } else { + PPCError_Error(PPCErrorStr118, name1->name, opstr, name2->name); + } +} + +static void IllegalObjectInConst(IAExpr *expr) { + if (expr->xC) { + PPCError_Error(PPCErrorStr122, expr->xC->name->name); + } else if (expr->object) { + PPCError_Error(PPCErrorStr122, expr->object->name->name); + } else if (expr->label) { + PPCError_Error(PPCErrorStr166, expr->label->name->name); + } +} + +static void NotInRegisterError(char *name, char rclass) { + PPCError_Error(PPCErrorStr167, name, register_class_name[rclass]); +} + +static int isregisteroperand(char rclass) { + IARegister *reg; + + return (tk == TK_IDENTIFIER && (reg = InlineAsm_LookupRegisterPPCName(tkidentifier)) && reg->rclass == rclass); +} + +static SInt32 getcroperand(char rclass) { + SInt32 value; + IARegister *reg; + + value = 0; + if (tk == TK_IDENTIFIER && (reg = InlineAsm_LookupRegisterPPCName(tkidentifier)) && reg->rclass == rclass) { + value = reg->num; + } else { + PPCError_Error(PPCErrorStr167, tkidentifier->name); + } + + tk = lex(); + return value; +} + +static SInt32 getimmediateoperand(SInt32 minimum, SInt32 maximum) { + SInt32 value = InlineAsm_ConstantExpressionPPC(0); + if (value < minimum || value > maximum) + CError_Error(CErrorStr154); + return value; +} + +static void PrimaryExpressionPPC(IAExpr *expr, SInt32 value) { + Object *obj; + IALookupResult result; + IAExpr subexpr; + IAExprType type = IAExpr_0; + SInt32 v; + + switch (tk) { + case TK_IDENTIFIER: + if ((obj = isregisterstructpointeroperand())) { + tk = lex(); + InlineAsm_InitExpr5(expr, InlineAsm_StructPointerMemberOffset(TPTR_TARGET(obj->type))); + expr->object = obj; + return; + } + + if ((obj = isvariableoperand())) { + short reg; + InlineAsm_InitExpr5(expr, 0); + expr->xC = obj; + + if (obj->datatype == DLOCAL) + reg = 1; + else + reg = pic_base_reg; + + tk = lex(); + if (tk == '.' || tk == '[') { + InlineAsm_InitExpr5(&subexpr, InlineAsm_StructArrayMemberOffset(obj->type)); + DiadicOperatorPPC(expr, '+', &subexpr); + } + + if (tk == '+') { + tk = lex(); + InlineAsm_ExpressionPPC(&subexpr, value); + DiadicOperatorPPC(expr, '+', &subexpr); + } else if (tk == '-') { + tk = lex(); + InlineAsm_ExpressionPPC(&subexpr, value); + DiadicOperatorPPC(expr, '-', &subexpr); + } else if (!strcmp(tkidentifier->name, "@loword")) { + tk = lex(); + if (low_offset) { + InlineAsm_InitExpr5(&subexpr, low_offset); + DiadicOperatorPPC(expr, '+', &subexpr); + } + } else if (!strcmp(tkidentifier->name, "@hiword")) { + tk = lex(); + if (high_offset) { + InlineAsm_InitExpr5(&subexpr, high_offset); + DiadicOperatorPPC(expr, '+', &subexpr); + } + } + expr->reg = reg; + return; + } + + if (InlineAsm_LookupSymbolOrTag(tkidentifier, &result, 1)) { + if (result.has_value) { + tk = lex(); + InlineAsm_InitExpr5(expr, result.value); + return; + } + + if (result.type && (IS_TYPE_STRUCT(result.type) || IS_TYPE_CLASS(result.type))) { + tk = lex(); + if (tk != '.') + CError_Error(CErrorStr120); + + if (allow_array_expressions) { + InlineAsm_InitExpr5(expr, InlineAsm_StructArrayMemberOffset(result.type)); + return; + } + + InlineAsm_InitExpr5(expr, InlineAsm_StructMemberOffset(result.type)); + return; + } + + if (result.object && result.object->datatype == DABSOLUTE) { + tk = lex(); + InlineAsm_InitExpr5(expr, result.object->u.address); + return; + } + } else if (value) { + if (isregisteroperand(RegClass_CRFIELD)) { + InlineAsm_InitExpr5(expr, getcroperand(RegClass_CRFIELD)); + return; + } + if (isregisteroperand(RegClass_6)) { + InlineAsm_InitExpr5(expr, getcroperand(RegClass_6)); + return; + } + if (strlen(tkidentifier->name) == 6 && !strncmp(tkidentifier->name, "cr", 2) && tkidentifier->name[3] == '_') { + IARegister *reg; + char regname[4]; + regname[0] = tkidentifier->name[0]; + regname[1] = tkidentifier->name[1]; + regname[2] = tkidentifier->name[2]; + regname[3] = 0; + if ((reg = InlineAsm_LookupRegisterPPC(regname)) && reg->rclass == RegClass_CRFIELD) { + SInt32 v = reg->num * 4; + regname[0] = tkidentifier->name[4]; + regname[1] = tkidentifier->name[5]; + regname[2] = 0; + if ((reg = InlineAsm_LookupRegisterPPC(regname)) && reg->rclass == RegClass_6) { + tk = lex(); + InlineAsm_InitExpr5(expr, v + reg->num); + return; + } + } + } + } + + if (!strcmp("ha16", tkidentifier->name)) + type = IAExpr_8; + else if (!strcmp("hi16", tkidentifier->name)) + type = IAExpr_7; + else if (!strcmp("lo16", tkidentifier->name)) + type = IAExpr_6; + + if (type != IAExpr_0) { + tk = lex(); + if (tk == '(') { + SInt32 v; + tk = lex(); + PrimaryExpressionPPC(expr, value); + expr->type = type; + if (tk != ')') + CError_Error(CErrorStr115); + + tk = lex(); + if (InlineAsm_CheckExpr(expr)) { + expr->value = InlineAsm_GetExprValue(expr); + expr->type = IAExpr_5; + } + return; + } + + CError_Error(CErrorStr114); + } else { + if (!result.label) + result.label = InlineAsm_DeclareLabel(tkidentifier); + + InlineAsm_InitExpr5(expr, 0); + expr->flags |= IAFlag1; + expr->label = result.label; + tk = lex(); + return; + } + break; + + case TK_INTCONST: + v = CInt64_GetULong(&tkintconst); + tk = lex(); + InlineAsm_InitExpr5(expr, v); + return; + + case TK_SIZEOF: + InlineAsm_InitExpr5(expr, scansizeof()); + return; + + case '+': + tk = lex(); + PrimaryExpressionPPC(expr, value); + return; + + case '-': + tk = lex(); + PrimaryExpressionPPC(expr, value); + if (InlineAsm_CheckExpr(expr)) + expr->value = -expr->value; + else + CError_Error(CErrorStr124); + return; + + case '!': + tk = lex(); + PrimaryExpressionPPC(expr, value); + if (InlineAsm_CheckExpr(expr)) + expr->value = !expr->value; + else + CError_Error(CErrorStr124); + return; + + case '~': + tk = lex(); + PrimaryExpressionPPC(expr, value); + if (InlineAsm_CheckExpr(expr)) + expr->value = ~expr->value; + else + CError_Error(CErrorStr124); + return; + + case '(': + tk = lex(); + InlineAsm_ExpressionPPC(expr, value); + if (tk != ')') + CError_Error(CErrorStr115); + tk = lex(); + return; + + default: + CError_Error(CErrorStr120); + } + + InlineAsm_InitExpr5(expr, 0); +} + +static void DiadicOperatorPPC(IAExpr *left, short token, IAExpr *right) { + CInt64 leftval; + CInt64 rightval; + + CInt64_SetLong(&leftval, left->value); + CInt64_SetLong(&rightval, right->value); + rightval = CMach_CalcIntDiadic(TYPE(&stsignedint), leftval, token, rightval); + + if (left->xC) { + if (right->label) + PPCError_Error(PPCErrorStr124, left->xC->name->name, right->label->name->name); + + if (right->xC) { + if (left->x10) { + PPCError_Error(PPCErrorStr121, left->xC->name->name, left->x10->name->name, right->xC->name->name); + } else if (right->x10) { + PPCError_Error(PPCErrorStr121, left->xC->name->name, right->xC->name->name, right->x10->name->name); + } else if (token == '-') { + left->value = CInt64_GetULong(&rightval); + left->x10 = right->xC; + } else { + IllegalObjectOperator(left->xC->name, right->xC->name, token); + } + } else if (token == '-' || token == '+') { + left->value = CInt64_GetULong(&rightval); + } else { + IllegalObjectOperator(left->xC->name, NULL, token); + } + } else if (right->xC) { + if (right->label) + PPCError_Error(PPCErrorStr124, right->xC->name->name, right->label->name->name); + + if (token == '+') { + left->xC = right->xC; + left->x10 = right->x10; + left->value = CInt64_GetULong(&rightval); + } else { + IllegalObjectOperator(NULL, right->xC->name, token); + } + } else if (left->label) { + if (left->xC) + PPCError_Error(PPCErrorStr124, left->label->name->name, left->xC->name->name); + + if (right->label) { + if (left->x18) { + PPCError_Error(PPCErrorStr121, left->label->name->name, left->x18->name->name, right->label->name->name); + } else if (right->x18) { + PPCError_Error(PPCErrorStr121, left->label->name->name, right->label->name->name, right->x18->name->name); + } else if (token == '-') { + left->value = CInt64_GetULong(&rightval); + left->x18 = right->label; + } else { + IllegalObjectOperator(left->label->name, right->label->name, token); + } + } else if (token == '+' || token == '-') { + left->value = CInt64_GetULong(&rightval); + } else { + IllegalObjectOperator(NULL, left->label->name, token); + } + } else if (right->label) { + if (token == '+') { + left->label = right->label; + left->x18 = right->x18; + left->value = CInt64_GetULong(&rightval); + } else { + IllegalObjectOperator(NULL, right->label->name, token); + } + } else { + left->value = CInt64_GetULong(&rightval); + } + + left->flags |= right->flags; + if (left->type == IAOpnd_Lab) { + if (right->type != IAOpnd_Lab) + left->type = right->type; + } else { + if (right->type != IAOpnd_Lab && right->type != left->type) + PPCError_Error(PPCErrorStr126); + } +} + +static void ExpressionTailPPC(IAExpr *left, SInt32 value) { + IAExpr right; + short left_tk; + short right_prec; + + while (1) { + left_tk = tk; + tk = lex(); + + PrimaryExpressionPPC(&right, value); + right_prec = GetPrec(tk); + if (right_prec == 0) { + DiadicOperatorPPC(left, left_tk, &right); + break; + } + + if (GetPrec(left_tk) >= right_prec) { + DiadicOperatorPPC(left, left_tk, &right); + } else { + ExpressionTailPPC(&right, value); + DiadicOperatorPPC(left, left_tk, &right); + if (GetPrec(tk) == 0) + break; + } + } +} + +static void immediateoperand(IAOperand *op, SInt32 minval, SInt32 maxval) { + SInt32 value; + if (InlineAsm_gccmode && tk == '%') { + tk = lex(); + if (tk != TK_INTCONST) { + CError_Error(CErrorStr144); + } else { + CInt64 c = tkintconst; + value = CInt64_GetULong(&c); + if (value < 0) + CError_Error(CErrorStr144); + } + op->type = IAOpnd_7; + op->u.unk7.value = value; + op->u.unk7.unk1 = 1; + op->u.unk7.unk2 = 0; + op->u.unk7.unk3 = 0; + tk = lex(); + } else { + value = InlineAsm_ConstantExpressionPPC(0); + if (value < minval || value > maxval) + CError_Error(CErrorStr154); + op->type = IAOpnd_Imm; + op->u.imm.value = value; + } +} + +static SInt32 immediatevalue(IAOperand *op, SInt32 minval, SInt32 maxval) { + SInt32 value; + if (InlineAsm_gccmode && tk == '%') { + CError_Error(CErrorStr144); + return 0; + } else { + value = InlineAsm_ConstantExpressionPPC(0); + if (value < minval || value > maxval) + CError_Error(CErrorStr154); + op->type = IAOpnd_Imm; + op->u.imm.value = value; + return value; + } +} + +static void InlineAsm_ExpressionPPC(IAExpr *expr, SInt32 value) { + PrimaryExpressionPPC(expr, value); + if (GetPrec(tk)) + ExpressionTailPPC(expr, value); + if (GetPrec(tk)) + ExpressionTailPPC(expr, value); + + if (expr->type == IAExpr_5 && tk == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "@l")) { + expr->type = IAExpr_6; + tk = lex(); + } else if (!strcmp(tkidentifier->name, "@ha")) { + expr->type = IAExpr_8; + tk = lex(); + } else if (!strcmp(tkidentifier->name, "@h")) { + expr->type = IAExpr_7; + tk = lex(); + } + } +} + +static SInt32 InlineAsm_ConstantExpressionPPC(SInt32 value) { + IAExpr expr; + + InlineAsm_ExpressionPPC(&expr, value); + if (!InlineAsm_CheckExpr(&expr)) { + IllegalObjectInConst(&expr); + return 0; + } else { + return InlineAsm_GetExprValue(&expr); + } +} + +static SInt32 crbitoperand(void) { + SInt32 value = InlineAsm_ConstantExpressionPPC(1); + if (value < 0 || value > 31) + CError_Error(CErrorStr154); + return value; +} + +static void floatoperand(IAOperand *op, InlineAsm *ia, Type *type) { + Object *obj; + + obj = createfloatconstant(type, &tkfloatconst); + op[0].type = IAOpnd_Reg; + op[0].u.reg.rclass = RegClass_GPR; + op[0].u.reg.object = NULL; + op[0].u.reg.effect = EffectRead; + op[0].u.reg.num = pic_base_reg; + + ia->argcount++; + op[1].type = IAOpnd_4; + op[1].u.obj.obj = obj; + PPCError_Error(PPCErrorStr179); + op[1].u.obj.unk = IAExpr_2; + op[1].u.obj.offset = 0; + tk = lex(); +} + +static Object *isvariableoperand(void) { + IALookupResult result; + Object *obj; + + if (tk == TK_IDENTIFIER) { + InlineAsm_LookupSymbol(tkidentifier, &result); + if ((obj = result.object)) { + if (obj->datatype == DLOCAL) { + if (OBJECT_REG(obj)) + return NULL; + return obj; + } + + if (obj->datatype == DFUNC || obj->datatype == DVFUNC) + return obj; + + if (obj->datatype == DDATA) { + createIndirect(obj, 0, 0); + return obj; + } + } + } + + return NULL; +} + +static Object *isregisterstructpointeroperand(void) { + IALookupResult result; + Object *obj; + + if (tk != TK_IDENTIFIER) + return NULL; + + InlineAsm_LookupSymbol(tkidentifier, &result); + if ((obj = result.object)) { + if (obj->datatype != DLOCAL) + return NULL; + if (!OBJECT_REG(obj) && !is_register_object(obj)) + return NULL; + if (!IS_TYPE_POINTER(obj->type)) + return NULL; + if (!IS_TYPE_STRUCT(TPTR_TARGET(obj->type)) && !IS_TYPE_CLASS(TPTR_TARGET(obj->type))) + return NULL; + return obj; + } + + return NULL; +} + +static void memoryoperand(IAOperand *op, InlineAsm *ia, Boolean flag, short effect) { + IAExpr expr; + + InlineAsm_ExpressionPPC(&expr, 0); + if (expr.object) { + if (!flag) + CError_Error(CErrorStr155); + if (expr.xC) + PPCError_Error(PPCErrorStr122, expr.xC); + + op[0].type = IAOpnd_Reg; + op[0].u.reg.rclass = RegClass_GPR; + op[0].u.reg.object = expr.object; + op[0].u.reg.num = 0; + op[0].u.reg.effect = effect; + if (expr.object || expr.reg) + op[0].u.reg.effect = effect; + else + op[0].u.reg.effect = 0; + + ia->argcount++; + op[1].type = IAOpnd_Imm; + switch (expr.type) { + case IAExpr_5: + expr.type = IAExpr_6; + if (expr.object->datatype != DLOCAL || !OBJECT_REG(expr.object)) + PPCError_Error(PPCErrorStr180); + case IAExpr_2: + case IAExpr_6: + case IAExpr_7: + case IAExpr_8: + op[1].u.imm.value = InlineAsm_GetExprValue(&expr); + break; + default: + CError_Error(CErrorStr155); + } + + expr.object->flags |= OBJECT_USED; + return; + } + + if (expr.xC) { + if (expr.x10) + PPCError_Error(PPCErrorStr123, expr.xC->name->name, expr.x10->name->name); + + if (flag) { + if (tk == '(') { + if (flag) { + tk = lex(); + registeroperand(op, RegClass_GPR, effect); + if ((expr.type == IAExpr_8 || expr.type == IAExpr_7) && copts.codegen_pic && pic_base_reg) + CError_Error(CErrorStr155); + if (tk != ')') + CError_Error(CErrorStr115); + tk = lex(); + } + } else { + op[0].type = IAOpnd_Reg; + op[0].u.reg.rclass = RegClass_GPR; + op[0].u.reg.object = expr.object; + op[0].u.reg.num = expr.reg; + } + + if (expr.object || expr.reg) + op[0].u.reg.effect = effect; + else + op[0].u.reg.effect = 0; + + ia->argcount++; + op++; + } + + if (expr.xC->datatype == DLOCAL) { + op[0].type = IAOpnd_4; + op[0].u.obj.unk = IAExpr_1; + } else { + op[0].type = IAOpnd_3; + if (expr.type == IAExpr_5) { + expr.type = IAExpr_6; + PPCError_Error(PPCErrorStr180); + } + op[0].u.obj.unk = expr.type; + } + + op[0].u.obj.obj = expr.xC; + op[0].u.obj.offset = expr.value; + expr.xC->flags |= OBJECT_USED; + return; + } + + if (flag) { + if (tk == '(') { + tk = lex(); + registeroperand(op, RegClass_GPR, effect); + ia->argcount++; + op++; + if (tk != ')') + CError_Error(CErrorStr115); + tk = lex(); + } else { + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_GPR; + op->u.reg.object = expr.object; + op->u.reg.num = expr.reg; + ia->argcount++; + op++; + } + } + + if (expr.label) { + if (expr.x18) { + op->type = IAOpnd_LabDiff; + op->negated = 0; + op->u.labdiff.label1 = expr.label; + op->u.labdiff.label2 = expr.x18; + op->u.labdiff.offset = expr.value; + } else { + PPCError_Error(PPCErrorStr125, expr.label->name->name); + } + } else { + op->type = IAOpnd_Imm; + op->u.imm.value = InlineAsm_GetExprValue(&expr); + } +} + +static void registeroperand(IAOperand *op, char rclass, short effect) { + IARegister *reg; + Object *obj; + + if (tk == TK_IDENTIFIER && (reg = InlineAsm_LookupRegisterPPCName(tkidentifier)) && reg->rclass == rclass) { + SInt32 num; + obj = reg->object; + num = reg->num; + op->type = IAOpnd_Reg; + op->u.reg.rclass = rclass; + op->u.reg.object = obj; + op->u.reg.num = num; + op->u.reg.effect = effect; + if (obj) { + reg->object->flags |= OBJECT_USED; + Registers_GetVarInfo(obj)->flags |= VarInfoFlag40; + } + } else if (rclass == RegClass_CRFIELD) { + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_CRFIELD; + op->u.reg.object = NULL; + op->u.reg.num = getimmediateoperand(0, 7); + op->u.reg.effect = effect; + return; + } else if (rclass == RegClass_SPR) { + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_SPR; + op->u.reg.object = NULL; + op->u.reg.num = getimmediateoperand(0, 1024); + op->u.reg.effect = effect; + return; + } else if (tk == '%' && InlineAsm_gccmode) { + SInt32 num; + tk = lex(); + if (tk != TK_INTCONST) { + CError_Error(CErrorStr144); + } else { + num = ExtractValue(tkintconst); + if (num < 0) + CError_Error(CErrorStr144); + } + + op->type = IAOpnd_6; + op->u.unk6.num = num; + op->u.unk6.unk4 = 2; + op->u.unk6.effect = effect; + op->u.unk6.rclass = rclass; + tk = lex(); + return; + } else if (tk == TK_IDENTIFIER) { + NotInRegisterError(tkidentifier->name, rclass); + } else { + PPCError_Error(PPCErrorStr171); + } + + tk = lex(); + if (!strcmp(tkidentifier->name, "@loword")) { + tk = lex(); + } else if (!strcmp(tkidentifier->name, "@hiword")) { + if (reg->object) + op->u.reg.num = 1; + else + PPCError_Error(PPCErrorStr168); + tk = lex(); + } else if (rclass == RegClass_GPR) { + if (reg->object && reg->object->type->size == 8) { + HashNameNode *name = reg->object->name; + PPCError_Error(PPCErrorStr127, name->name, name->name, name->name); + } + } +} + +static void cr0_operand(IAOperand *op, short effect) { + IARegister *reg; + + if ((reg = InlineAsm_LookupRegisterPPC("cr0"))) { + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_CRFIELD; + op->u.reg.object = reg->object; + op->u.reg.num = reg->num; + op->u.reg.effect = effect; + } else { + NotInRegisterError("cr0", RegClass_CRFIELD); + } +} + +static void r0_operand(IAOperand *op) { + IARegister *reg; + + if ((reg = InlineAsm_LookupRegisterPPC("r0"))) { + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_GPR; + op->u.reg.object = reg->object; + op->u.reg.num = reg->num; + op->u.reg.effect = 0; + tk = lex(); + } else { + NotInRegisterError("r0", RegClass_GPR); + } +} + +static void labeloperand(InlineAsm *ia, IAOperand *op, Boolean flag1, Boolean flag2, Boolean flag3) { + IALookupResult result; + + if (tk == TK_IDENTIFIER) { + if (!InlineAsm_LookupSymbol(tkidentifier, &result)) + result.label = InlineAsm_DeclareLabel(tkidentifier); + + if (result.label) { + op->type = IAOpnd_Lab; + op->u.lab.label = result.label; + } else if (result.object && result.object->datatype == DFUNC) { + if (flag3) + ia->flags |= IAFlag2; + op->type = IAOpnd_3; + op->u.obj.obj = result.object; + op->u.obj.offset = 0; + + if (flag1) { + op->u.obj.unk = IAExpr_4; + if (flag2) + CError_Error(CErrorStr261); + } else { + op->u.obj.unk = IAExpr_2; + if (flag2) + op->u.obj.unk = IAExpr_10; + op->u.obj.unk = IAExpr_6; + PPCError_Error(PPCErrorStr180); + } + } else { + CError_Error(CErrorStr144); + } + + tk = lex(); + } else if (tk == '*') { + if (!flag2) { + tk = lex(); + if (tk == '+') { + tk = lex(); + if (flag1) + immediateoperand(op, -0x2000000, 0x1FFFFFF); + else + immediateoperand(op, -0x8000, 0xFFFF); + } else if (tk == '-') { + tk = lex(); + op->u.imm.value = flag1 ? -immediatevalue(op, -0x1FFFFFF, 0x2000000) : -immediatevalue(op, -0x7FFF, 0x10000); + } else { + CError_Error(CErrorStr144); + } + } else { + CError_Error(CErrorStr144); + } + } else { + if (flag2) { + if (flag1) + immediateoperand(op, -0x2000000, 0x1FFFFFF); + else + immediateoperand(op, -0x8000, 0xFFFF); + } else { + CError_Error(CErrorStr144); + } + } +} + +static void imm_or_labeldiff_operand(InlineAsm *ia, IAOperand *op, SInt32 minimum, SInt32 maximum, Boolean negate) { + IAExpr expr; + Object *obj; + SInt32 value; + + InlineAsm_ExpressionPPC(&expr, 0); + + if ((obj = expr.xC) && !expr.x10 && expr.type == IAExpr_5 && obj->datatype == DDATA && IS_TYPE_INT_OR_ENUM(obj->type) && (obj->qual & Q_INLINE_DATA)) { + switch (ia->opcode) { + case PC_ADDI: + case PC_ADDIC: + case PC_ADDICR: + case PC_ADDIS: + case PC_ORI: + case PC_ORIS: + if (ia->args[1].u.imm.value) { + SInt32 cmp; + if (obj->datatype != DLOCAL) { + cmp = copts.codegen_pic ? INVALID_PIC_REG : NO_REG; + } else { + cmp = local_base_register(obj); + } + if (ia->args[1].u.imm.value == cmp) + break; + } + case PC_LI: + case PC_LIS: + case PC_TWI: + case PC_OPWORD: + InlineAsm_InitExpr5(&expr, expr.value + CInt64_GetULong(&obj->u.data.u.intconst)); + break; + } + } + + if (expr.label) { + if (expr.x18) { + op->type = IAOpnd_LabDiff; + if (negate) + op->negated = 1; + else + op->negated = 0; + op->u.labdiff.label1 = expr.label; + op->u.labdiff.label2 = expr.x18; + op->u.labdiff.offset = expr.value; + } else { + PPCError_Error(PPCErrorStr125, expr.label->name->name); + } + return; + } + + if (expr.xC) { + if (!expr.x10) { + if (ia->opcode != PC_OPWORD && expr.type == IAExpr_5) { + IllegalObjectInConst(&expr); + return; + } + + if (expr.xC->datatype == DLOCAL) { + op->type = IAOpnd_4; + op->u.obj.unk = IAExpr_1; + } else { + op->type = IAOpnd_3; + if (expr.type == IAExpr_5) { + expr.type = IAExpr_6; + PPCError_Error(PPCErrorStr180); + } + op->u.obj.unk = expr.type; + } + op->u.obj.obj = expr.xC; + op->u.obj.offset = expr.value; + } else { + PPCError_Error(PPCErrorStr123, expr.xC->name->name, expr.x10->name->name); + } + return; + } + + value = InlineAsm_GetExprValue(&expr); + if (value < minimum || value > maximum) + CError_Error(CErrorStr154); + + op->type = IAOpnd_Imm; + if (negate) + op->u.imm.value = -value; + else + op->u.imm.value = value; +} + +static void eatcommatoken(void) { + if (tk == ',') + tk = lex(); + else + CError_Error(CErrorStr116); +} + +static InlineAsm *InlineAsm_ScanAssemblyOperands(IAMnemonic *mnemonic) { + OpcodeInfo *info; + InlineAsm *ia; + SInt32 buffersize; + IAOperand *op; + int argcount; + char *format; + IARegister *reg; + + char code; + short effect; + Boolean has_excl; + Boolean has_question; + Boolean negate; + UInt32 t; + + info = &opcodeinfo[mnemonic->x4]; + argcount = info->x8; + if (PCODE_FLAG_SET_F(info) & fCanSetRecordBit) + argcount++; + if (!(PCODE_FLAG_SET_F(info) & fSetsCarry) && (PCODE_FLAG_SET_F(info) & fCanSetCarry)) + argcount++; + if (PCODE_FLAG_SET_T(info) & fCanLink) + argcount++; + + buffersize = sizeof(InlineAsm) + sizeof(IAOperand) * argcount; + ia = galloc(buffersize); + memset(ia, 0, buffersize); + + ia->opcode = mnemonic->x4; + ia->flags = 0; + ia->argcount = 0; + + op = ia->args; + for (format = mnemonic->format; *format; format++) { + CError_ASSERT(1664, ia->argcount < argcount); + + if (*format == ',') { + eatcommatoken(); + format++; + } else if (*format == ';') { + format++; + } + + if (*format == '[' || *format == '(') + format++; + + effect = EffectRead; + has_excl = 0; + has_question = 0; + negate = 0; + if (*format == '=') { + effect = EffectWrite; + format++; + } else if (*format == '+') { + effect = EffectRead | EffectWrite; + format++; + } + + if (*format == '-') { + negate = 1; + format++; + } + + if (*format == '?') { + has_question = 1; + format++; + } + + if (*format == '!') { + has_excl = 1; + format++; + } + + switch (*format) { + case 'p': + continue; + case '&': + if (format[1] == '2') { + op[0] = op[-2]; + format++; + if (op->type == IAOpnd_Reg) + op->u.reg.effect = effect; + ia->argcount++; + + op[1] = op[-1]; + op++; + if (op->type == IAOpnd_Reg) + op->u.reg.effect = effect; + } else { + op[0] = op[-1]; + if (op->type == IAOpnd_Reg) + op->u.reg.effect = effect; + } + break; + + case '%': { + SInt32 value; + format += pcode_const_from_format(format + 1, &value); + op->type = IAOpnd_Imm; + op->u.imm.value = value; + break; + } + + case 'b': + if (!isregisteroperand(RegClass_GPR)) { + if (tk == TK_INTCONST && tkintconst.lo == 0) + r0_operand(op); + } else { + registeroperand(op, RegClass_GPR, effect); + if (op->u.reg.object == NULL && op->u.reg.num == 0) + op->u.reg.effect = 0; + } + break; + + case 'r': + registeroperand(op, RegClass_GPR, effect); + break; + + case 'B': + case 'a': + case 'i': + case 't': + case 'u': + case 'x': + { + SInt32 value, hi, lo; + code = *format; + value = 16; + if (code == 'a') { + if (isdigit(format[1])) + code = *(++format); + else + CError_FATAL(1804); + } + + if (isdigit(format[1])) { + format += pcode_const_from_format(format + 1, &value); + } else if (code == 't' || code == 'B') { + code = 'u'; + value = 5; + } + + pcode_get_hi_lo(value, code, &hi, &lo); + if (!has_question || tk == ',') { + if (has_question) + tk = lex(); + if (has_excl) + getimmediateoperand(lo, hi); + else + immediateoperand(op, lo, hi); + } else { + op->type = IAOpnd_Imm; + op->u.imm.value = 0; + } + + if (code == 'i' && value == 16) + op->u.imm.value = (short) op->u.imm.value; + if (negate) + op->u.imm.value = -op->u.imm.value; + + if (pcode_check_imm_bits(op->u.imm.value, value, code)) + CError_FATAL(1838); + + break; + } + + case 'O': + if (tk == TK_INTCONST && tkintconst.lo == 0) { + getimmediateoperand(0, 0); + eatcommatoken(); + } + continue; + + case 'f': + registeroperand(op, RegClass_FPR, effect); + break; + + case 'v': + registeroperand(op, RegClass_VR, effect); + break; + + case 'T': + if (!has_question || tk == ',') { + if (has_question) + tk = lex(); + + immediateoperand(op, 268, 269); + if (op->u.reg.num == 268) + op->u.reg.num = 284; + else + op->u.reg.num = 285; + } else { + op->u.reg.num = 284; + } + + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_SPR; + op->u.reg.effect = effect; + break; + + case 'S': + { + SInt32 value; + format += pcode_const_from_format(format + 1, &value); + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_SPR; + op->u.reg.num = value; + op->u.reg.effect = effect; + break; + } + + case 'c': + if (has_question) { + if (isregisteroperand(RegClass_CRFIELD) || tk == TK_INTCONST) { + registeroperand(op, RegClass_CRFIELD, effect); + } else { + cr0_operand(op, effect); + if (format[1] == ',') + format++; + } + } else { + registeroperand(op, RegClass_CRFIELD, effect); + } + break; + + case 'l': + switch (ia->opcode) { + case PC_B: + case PC_BL: + labeloperand(ia, op, 1, mnemonic->x10 & 2, mnemonic->x10 & 1); + break; + default: + labeloperand(ia, op, 0, mnemonic->x10 & 2, mnemonic->x10 & 1); + break; + } + break; + + case 'w': + imm_or_labeldiff_operand(ia, op, -0x80000000, 0x7FFFFFFF, negate); + break; + + case 'M': + case 'm': + case 'n': + imm_or_labeldiff_operand(ia, op, -0x8000, 0xFFFF, negate); + break; + + case 'Q': + { + SInt32 cr = crbitoperand(); + op->type = IAOpnd_Reg; + op->u.reg.num = cr >> 2; + op->u.reg.rclass = RegClass_CRFIELD; + op->u.reg.effect = effect; + + ia->argcount++; + op[1].type = IAOpnd_Imm; + op[1].u.imm.value = cr % 4; + op++; + break; + } + + case 'd': { + short effect2; + CError_ASSERT(1971, format[1] == '('); + format++; + effect2 = EffectRead; + if (format[1] == '=') { + effect2 = EffectWrite; + format++; + } else if (format[1] == '+') { + effect2 = EffectRead | EffectWrite; + format++; + } + + CError_ASSERT(1983, format[1] == 'b'); + CError_ASSERT(1985, format[2] == ')'); + format += 2; + + switch (ia->opcode) { + case PC_LFS: + if (tk == TK_FLOATCONST) + floatoperand(op, ia, TYPE(&stfloat)); + else + memoryoperand(op, ia, 1, effect2); + break; + case PC_LFD: + if (tk == TK_FLOATCONST) + floatoperand(op, ia, TYPE(&stdouble)); + else + memoryoperand(op, ia, 1, effect2); + break; + default: + memoryoperand(op, ia, 1, effect2); + } + + break; + } + + case 'N': + immediateoperand(op, 1, 32); + break; + + case 's': + registeroperand(op, RegClass_SPR, effect); + break; + + case 'D': + if (tk == TK_IDENTIFIER && (reg = InlineAsm_LookupDCRRegister(tkidentifier->name))) { + op->type = IAOpnd_Imm; + op->u.imm.value = reg->num; + } else { + immediateoperand(op, 0, 1023); + } + break; + + case 'Y': + { + SInt32 mask = 255; + SInt32 i; + if (ia->opcode == PC_MTCRF) { + if (ia->args[0].type != IAOpnd_Imm) + CError_Error(CErrorStr144); + mask = ia->args[0].u.imm.value; + } + for (i = 0; i < 8; i++) { + if ((0x80 >> i) & mask) { + op->type = IAOpnd_Reg; + op->u.reg.num = i; + op->u.reg.rclass = RegClass_CRFIELD; + op->u.reg.effect = effect; + ia->argcount++; + op++; + } + } + ia->argcount--; + op--; + break; + } + + case 'P': + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_SPR; + op->u.reg.object = NULL; + if (format[1] == '3') { + format++; + op->u.reg.num = getimmediateoperand(0, 7); + } else { + op->u.reg.num = getimmediateoperand(0, 3); + } + op->u.reg.effect = effect; + break; + + case 'C': + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_SPR; + op->u.reg.object = NULL; + op->u.reg.num = 9; + op->u.reg.effect = effect; + break; + + case 'L': + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_SPR; + op->u.reg.object = NULL; + op->u.reg.num = 8; + op->u.reg.effect = effect; + break; + + case 'X': + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_SPR; + op->u.reg.object = NULL; + op->u.reg.num = 1; + op->u.reg.effect = effect; + break; + + case 'Z': + op->type = IAOpnd_Reg; + op->u.reg.rclass = RegClass_CRFIELD; + op->u.reg.object = NULL; + t = info->flags & 0xC0000000; + if (t == 0x40000000) { + op->u.reg.num = 1; + } else if (t == 0xC0000000) { + op->u.reg.num = 6; + } else { + op->u.reg.num = 0; + } + op->u.reg.effect = effect; + break; + + default: + CError_FATAL(2266); + } + + while (format[1] && strchr("/<>|*", format[1])) { + int index; + SInt32 value; + SInt32 value2; + IAOperand tmp; + switch ((code = *(++format))) { + case '/': + index = -1; + if (format[1] == '2') { + index = -2; + format++; + } + + tmp = op[index]; + op[index] = op[0]; + op[0] = tmp; + break; + case '*': + case '<': + case '>': + case '|': + if (op->type == IAOpnd_Imm) + value = op->u.imm.value; + else if (op->type == IAOpnd_Reg) + value = op->u.reg.num; + else + CError_FATAL(2312); + + if (format[1] == 'p') { + format++; + if (op[-1].type == IAOpnd_Imm) + value2 = op[-1].u.imm.value; + else if (op[-1].type == IAOpnd_Reg) + value2 = op[-1].u.reg.num; + else + CError_FATAL(2322); + } else if (isdigit(format[1])) { + format += pcode_const_from_format(format + 1, &value2); + } else { + CError_FATAL(2327); + } + + switch (code) { + case '<': + value = value2 - value; + break; + case '>': + value = value - value2; + break; + case '|': + value = value + value2; + break; + case '*': + value = value * value2; + break; + default: + CError_FATAL(2348); + } + + if (op->type == IAOpnd_Imm) + op->u.imm.value = value; + else if (op->type == IAOpnd_Reg) + op->u.reg.num = value; + else + CError_FATAL(2355); + break; + } + } + + ia->argcount++; + op++; + if (format[1] == ']' || format[1] == ')') + format++; + } + + return ia; +} + +static int mnemonic_has_overflow(char *buf) { + int result = 0; + + if (buf[strlen(buf) - 1] == 'o') { + if (!strcmp(buf, "bso")) + return 0; + if (!strcmp(buf, "fcmpo")) + return 0; + if (!strcmp(buf, "eieio")) + return 0; + if (!strcmp(buf, "vslo")) + return 0; + if (!strcmp(buf, "vsro")) + return 0; + result = 1; + } + + return result; +} + +static int mnemonic_has_absolute(const char *buf) { + int result = 0; + char last = buf[strlen(buf) - 1]; + + if (buf[0] == 'b' && last == 'a') + result = 1; + + return result; +} + +static int mnemonic_has_linkregister(const char *buf) { + int result = 0; + char last = buf[strlen(buf) - 1]; + + if (buf[0] == 'b' && last == 'l') + result = 1; + + return result; +} + +static int mnemonic_has_record(char *buf) { + int result = 0; + + if (lookahead() == '.') { + tk = lex(); + result = 1; + strcat(buf, "."); + } + + return result; +} + +static void installregistervariables(void) { +} + +void InlineAsm_InitializePPC(void) { + Test_Version_Numbers(); + allow_array_expressions = 1; + supports_hardware_fpu = 1; + assembledinstructions = 0; + + switch (copts.processor) { + case CPU_PPC401: cpu = CPUMask_401; break; + case CPU_PPC403: cpu = CPUMask_403; break; + case CPU_PPC505: cpu = CPUMask_50x; break; + case CPU_PPC509: cpu = CPUMask_50x; break; + case CPU_PPC555: cpu = CPUMask_55x_56x; break; + case CPU_PPC556: cpu = CPUMask_55x_56x; break; + case CPU_PPC565: cpu = CPUMask_55x_56x; break; + case CPU_PPC601: cpu = CPUMask_601; break; + case CPU_PPC602: cpu = CPUMask_602; break; + case CPU_PPC8240: cpu = CPUMask_8240; break; + case CPU_PPC8260: cpu = CPUMask_8260; break; + case CPU_PPC603: cpu = CPUMask_603; break; + case CPU_PPC603e: cpu = CPUMask_603; break; + case CPU_PPC604: cpu = CPUMask_604; break; + case CPU_PPC604e: cpu = CPUMask_604; break; + case CPU_PPC740: cpu = CPUMask_740_750; break; + case CPU_PPC750: cpu = CPUMask_740_750; break; + case CPU_PPC801: cpu = CPUMask_801_821_860; break; + case CPU_PPC821: cpu = CPUMask_801_821_860; break; + case CPU_PPC823: cpu = CPUMask_823_850; break; + case CPU_PPC850: cpu = CPUMask_823_850; break; + case CPU_PPC860: cpu = CPUMask_801_821_860; break; + case CPU_PPC7400: case CPU_PPC7450: cpu = CPUMask_74xx; break; + case CPU_Generic: cpu = CPUMask_Generic; break; + default: + CError_FATAL(2613); + } + + if (copts.altivec_model) + cpu |= 0x40000000; +} + +void InlineAsm_Initialize(AssemblerType assemblertype) { + assembler_type = assemblertype; + + if (assembler_type == 0) { + InlineAsm_InitializePPC(); + pic_base_reg = INVALID_PIC_REG; + } else { + pic_base_reg = 0; + pic_base_label = NULL; + } + + InlineAsm_InitializeMnemonicsPPC(); + InlineAsm_InitializeRegisters(); + InlineAsm_InitializeRegistersPPC(); + + if (assembler_type == 0) { + installregistervariables(); + } else { + process_arguments(setup_assembly_argument, 0); + assign_local_addresses(); + } +} + +static IAEntryPoint *InlineAsm_CreateEntryPoint(HashNameNode *name, Boolean flag) { + Object *obj; + IAEntryPoint *ep; + IALookupResult result; + + if (InlineAsm_LookupSymbol(name, &result) && (obj = result.object)) { + if (!(obj->datatype == DFUNC && !(obj->flags & OBJECT_DEFINED))) + CError_Error(CErrorStr122, name->name); + + obj->flags |= OBJECT_DEFINED; + obj->sclass = flag ? TK_STATIC : TK_EXTERN; + + ep = lalloc(sizeof(IAEntryPoint)); + memclrw(ep, sizeof(IAEntryPoint)); + ep->x0 = IADirective_Entry; + ep->x2 = 1; + ep->x8 = obj; + ep->size = assembledinstructions * 4; + } else { + CError_Error(CErrorStr140, name->name); + } + + return ep; +} + +static InlineAsm *InlineAsm_CreateFrFree(void) { + InlineAsm *ia = lalloc(sizeof(InlineAsm)); + memclrw(ia, sizeof(InlineAsm)); + ia->opcode = IADirective_FrFree; + ia->flags = IAFlag1; + return ia; +} + +SInt32 InlineAsm_IsDirective(AssemblerType assemblertype) { + char *name; + SInt32 directive = IADirective_Null; + + if (tk == '.') + tk = lex(); + + if (tk == TK_IDENTIFIER) { + name = tkidentifier->name; + if (!strcmp(name, "machine")) { + directive = IADirective_Machine; + } else if (assemblertype == AssemblerType_1) { + if (!strcmp(name, "entry")) { + directive = IADirective_Entry; + } else if (!strcmp(name, "fralloc")) { + directive = IADirective_FrAlloc; + } else if (!strcmp(name, "nofralloc")) { + directive = IADirective_NoFrAlloc; + } else if (!strcmp(name, "frfree")) { + directive = IADirective_FrFree; + } else if (!strcmp(name, "smclass")) { + directive = IADirective_SmClass; + } else if (!strcmp(name, "picbase")) { + directive = IADirective_PicBase; + } else { + directive = IADirective_Null; + } + } + } + + return directive; +} + +void InlineAsm_ProcessDirective(SInt32 directive) { + Boolean flag; + Statement *stmt; + IAOperand op; + + switch (directive) { + case IADirective_Entry: + flag = 0; + tk = lex(); + if (tk == TK_STATIC) { + flag = 1; + tk = lex(); + } else if (tk == TK_EXTERN) { + tk = lex(); + } + + if (tk != TK_IDENTIFIER) + CError_Error(CErrorStr107); + + stmt = CFunc_AppendStatement(ST_ASM); + stmt->expr = NULL; + stmt->expr = (ENode *) InlineAsm_CreateEntryPoint(tkidentifier, flag); + stmt->sourceoffset = -1; + tk = lex(); + break; + + case IADirective_FrAlloc: + if (assembledinstructions) + CError_Error(CErrorStr166); + if (asm_alloc_flags[0]) + CError_Error(CErrorStr165); + if (asm_alloc_flags[1] || asm_alloc_flags[2]) + CError_Error(CErrorStr166); + + tk = lex(); + if (tk != TK_EOL && tk != ';') + fralloc_parameter_area_size = getimmediateoperand(0x20, 0x7FFE); + + requires_frame = 1; + asm_alloc_flags[0] = 1; + break; + + case IADirective_NoFrAlloc: + if (asm_alloc_flags[0] || asm_alloc_flags[1]) + CError_Error(CErrorStr165); + if (asm_alloc_flags[2]) + CError_Error(CErrorStr166); + if (assembledinstructions) + CError_Error(CErrorStr166); + tk = lex(); + + asm_alloc_flags[1] = 1; + user_responsible_for_frame = 1; + break; + + case IADirective_FrFree: + if (asm_alloc_flags[1] || asm_alloc_flags[2]) + CError_Error(CErrorStr166); + + asm_alloc_flags[2] = 1; + stmt = CFunc_AppendStatement(ST_ASM); + stmt->expr = NULL; + stmt->expr = (ENode *) InlineAsm_CreateFrFree(); + if (copts.filesyminfo) + stmt->sourceoffset = CPrep_GetFileOffsetInfo(&cparser_fileoffset); + else + stmt->sourceoffset = -1; + tk = lex(); + break; + + case IADirective_Machine: + tk = lex(); + if (tk == TK_INTCONST) { + switch (tkintconst.lo) { + case 401: cpu = CPUMask_401; break; + case 403: cpu = CPUMask_403; break; + case 505: cpu = CPUMask_50x; break; + case 509: cpu = CPUMask_50x; break; + case 555: cpu = CPUMask_55x_56x; break; + case 556: cpu = CPUMask_55x_56x; break; + case 565: cpu = CPUMask_55x_56x; break; + case 601: cpu = CPUMask_601; break; + case 602: cpu = CPUMask_602; break; + case 8240: cpu = CPUMask_8240; break; + case 8260: cpu = CPUMask_8260; break; + case 603: cpu = CPUMask_603; break; + case 604: cpu = CPUMask_604; break; + case 740: cpu = CPUMask_740_750; break; + case 750: cpu = CPUMask_740_750; break; + case 801: cpu = CPUMask_801_821_860; break; + case 821: cpu = CPUMask_801_821_860; break; + case 823: cpu = CPUMask_823_850; break; + case 850: cpu = CPUMask_823_850; break; + case 860: cpu = CPUMask_801_821_860; break; + case 7400: cpu = CPUMask_74xx; break; + default: + CError_Error(CErrorStr144); + } + } else if (tk == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "all")) { + cpu = CPUMask_All; + } else if (!strcmp(tkidentifier->name, "generic")) { + cpu = CPUMask_Generic; + } else if (!strcmp(tkidentifier->name, "603e")) { + cpu = CPUMask_603; + } else if (!strcmp(tkidentifier->name, "604e")) { + cpu = CPUMask_604; + } else if (!strcmp(tkidentifier->name, "PPC603e")) { + cpu = CPUMask_603; + } else if (!strcmp(tkidentifier->name, "PPC604e")) { + cpu = CPUMask_604; + } else if (!strcmp(tkidentifier->name, "altivec")) { + cpu = CPUMask_74xx; + } else { + CError_Error(CErrorStr144); + } + } else { + CError_Error(CErrorStr144); + } + tk = lex(); + break; + + case IADirective_SmClass: + tk = lex(); + if (tk == TK_IDENTIFIER) { + if (!strcmp(tkidentifier->name, "PR")) + sm_section = SECT_TEXT; + else + CError_Error(CErrorStr144); + } else { + CError_Error(CErrorStr144); + } + tk = lex(); + break; + + case IADirective_PicBase: + tk = lex(); + registeroperand(&op, RegClass_GPR, EffectRead); + + if (!(op.type == IAOpnd_Reg && op.u.reg.object == NULL)) + CError_Error(CErrorStr144); + + tk = lex(); + if (tk == TK_IDENTIFIER) { + savepicbase(op.u.reg.num, tkidentifier); + tk = lex(); + } else { + CError_Error(CErrorStr144); + } + break; + + default: + CError_Error(CErrorStr261); + } +} + +void InlineAsm_ScanAssemblyDirective(void) { + SInt32 directive; + + if ((directive = InlineAsm_IsDirective(assembler_type)) != IADirective_Null) + InlineAsm_ProcessDirective(directive); +} + +void InlineAsm_ScanAssemblyInstruction(void) { + Statement *stmt; + InlineAsm *ia; + IAMnemonic *mnemonic; + Boolean record_flag; + Boolean flag3; + Boolean flag4; + Boolean flag5; + Boolean flag1; + Boolean flag2; + SInt32 directive; + OpcodeInfo *info; + char buf[20]; + + flag1 = 0; + flag2 = 0; + if ((directive = InlineAsm_IsDirective(assembler_type)) != IADirective_Null) { + InlineAsm_ProcessDirective(directive); + return; + } + + stmt = CFunc_AppendStatement(ST_ASM); + stmt->expr = NULL; + if (copts.filesyminfo) + stmt->sourceoffset = CPrep_GetFileOffsetInfo(&cparser_fileoffset); + else + stmt->sourceoffset = -1; + + strncpy(buf, tkidentifier->name, sizeof(buf)); + record_flag = mnemonic_has_record(buf); + mnemonic = InlineAsm_LookupMnemonicPPC(buf); + if (!mnemonic) + CError_Error(CErrorStr261); + + info = &opcodeinfo[mnemonic->x4]; + flag3 = (FLAG_SET_F(info->flags) & fCanSetCarry) && (mnemonic->x10 & 0x400); + flag4 = (FLAG_SET_T(info->flags) & fCanBeAbsolute) && (mnemonic->x10 & 2); + flag5 = (FLAG_SET_T(info->flags) & fCanLink) && (mnemonic->x10 & 1); + + if ((cpu == CPUMask_Generic) && (cpu & CPUFLAG_LOW_MASK) != ((cpu & mnemonic->cpu) & CPUFLAG_LOW_MASK)) { + CError_Error(CErrorStr152); + } else if (((cpu & mnemonic->cpu) & CPUFLAG_LOW_MASK) == 0) { + CError_Error(CErrorStr152); + } else if ((mnemonic->cpu & CPUFLAG_10000000) && !(cpu & CPUFLAG_10000000)) { + CError_Error(CErrorStr152); + } else if ((mnemonic->cpu & CPUFLAG_8000000) && !(cpu & CPUFLAG_8000000)) { + CError_Error(CErrorStr152); + } else if ((mnemonic->cpu & CPUFLAG_2000000) && !(cpu & CPUFLAG_2000000)) { + CError_Error(CErrorStr152); + } else if ((mnemonic->cpu & CPUFLAG_4000000) && !(cpu & CPUFLAG_4000000)) { + CError_Error(CErrorStr152); + } else if ((mnemonic->cpu & CPUFLAG_1000000) && !(cpu & CPUFLAG_1000000)) { + CError_Error(CErrorStr152); + } else if ((mnemonic->cpu & CPUFLAG_40000000) && !(cpu & CPUFLAG_40000000)) { + CError_Error(CErrorStr152); + } else if ((mnemonic->cpu & CPUFLAG_20000000) && !(cpu & CPUFLAG_20000000)) { + CError_Error(CErrorStr152); + } else if ((mnemonic->cpu & CPUFLAG_800000) && !(cpu & CPUFLAG_800000)) { + CError_Error(CErrorStr152); + } + + tk = lex(); + if (tk == '+' || tk == '-') { + if ( + ((OPCODE_PART_1(mnemonic->x10) == 16) && (OPCODE_PART_2(mnemonic->x10) != 20)) || + ((OPCODE_PART_1(mnemonic->x10) == 19) && (OPCODE_PART_3(mnemonic->x10) == 528) && (OPCODE_PART_2(mnemonic->x10) != 20)) || + ((OPCODE_PART_1(mnemonic->x10) == 19) && (OPCODE_PART_3(mnemonic->x10) == 16) && (OPCODE_PART_2(mnemonic->x10) != 20)) + ) { + if (tk == '+') + flag1 = 1; + else if (tk == '-') + flag2 = 1; + tk = lex(); + } else { + CError_Error(CErrorStr120); + } + } + + stmt->expr = (ENode *) InlineAsm_ScanAssemblyOperands(mnemonic); + + ia = (InlineAsm *) stmt->expr; + if (record_flag) + ia->flags2 |= IAFlagsB_1; + if (flag3) + ia->flags2 |= IAFlagsB_2; + if (flag4) + ia->flags2 |= IAFlagsB_4; + if (flag5) + ia->flags2 |= IAFlagsB_8; + if (flag1) + ia->flags2 |= IAFlagsB_10; + if (flag2) + ia->flags2 |= IAFlagsB_20; + if (volatileasm) + ia->flags2 |= IAFlagsB_40; + + ++assembledinstructions; +} + +static PCode *InlineAsm_TranslateIRtoPCodePPC(InlineAsm *ia, int argcount, AssemblerType assemblertype) { + PCode *pc; + int index; + int extra_args; + int reg; + int newargcount; + SInt32 buffersize; + IAOperand *src; + PCodeArg *dest; + OpcodeInfo *info; + + info = &opcodeinfo[ia->opcode]; + index = 0; + extra_args = 0; + reg = 0; + + if ((PCODE_FLAG_SET_F(info) & fCanSetRecordBit) && !(PCODE_FLAG_SET_F(info) & fRecordBit)) + extra_args++; + + if (!(PCODE_FLAG_SET_F(info) & fSetsCarry) && (PCODE_FLAG_SET_F(info) & fCanSetCarry)) + extra_args++; + + if (argcount < ia->argcount) { + if ((argcount + extra_args) >= ia->argcount) { + extra_args -= (ia->argcount - argcount); + argcount = ia->argcount; + } else { + CError_FATAL(3317); + } + } + + if (ia->opcode == PC_B && ia->args[0].type == IAOpnd_Imm) + ia->flags2 |= IAFlagsB_40; + + if (ia->opcode == PC_TWI || ia->opcode == PC_TW || ia->opcode == PC_TRAP || ia->opcode == PC_SC) + ia->flags |= IAFlag2; + + if (ia->flags & IAFlag2) { + newargcount = argcount + branch_count_volatiles(); + if (copts.exceptions && current_statement && assembler_type == 0) + newargcount += countexceptionactionregisters(current_statement->dobjstack); + + buffersize = sizeof(PCode) + sizeof(PCodeArg) * (newargcount + extra_args); + pc = lalloc(buffersize); + memset(pc, 0, buffersize); + pc->argCount = newargcount; + } else if (ia->opcode == PC_STMW || ia->opcode == PC_LMW) { + reg = ia->args[0].u.reg.object ? OBJECT_REG(ia->args[0].u.reg.object) : ia->args[0].u.reg.num; + newargcount = argcount + (32 - reg); + + buffersize = sizeof(PCode) + sizeof(PCodeArg) * newargcount; + pc = lalloc(buffersize); + memset(pc, 0, buffersize); + pc->argCount = newargcount; + } else { + buffersize = sizeof(PCode) + sizeof(PCodeArg) * (argcount + extra_args); + pc = lalloc(buffersize); + memset(pc, 0, buffersize); + pc->argCount = (ia->argcount > argcount) ? ia->argcount : argcount; + } + + pc->op = ia->opcode; + pc->flags = info->flags; + pc->sourceoffset = current_statement ? current_statement->sourceoffset : -1; + + dest = pc->args; + src = ia->args; + argcount += extra_args; + while (index < argcount) { + if (index >= ia->argcount) { + dest->kind = PCOp_PLACEHOLDEROPERAND; + } else { + switch (src->type) { + case IAOpnd_0: + dest->kind = PCOp_PLACEHOLDEROPERAND; + break; + + case IAOpnd_Imm: + dest->kind = PCOp_IMMEDIATE; + dest->data.imm.value = src->u.imm.value; + if (pc->flags & (fIsRead | fIsWrite)) + pc->flags |= fIsPtrOp; + dest->data.imm.obj = NULL; + break; + + case IAOpnd_Reg: { + int r20; + r20 = 0; + if (src->u.reg.object) { + if (Registers_GetVarInfo(src->u.reg.object)->flags & VarInfoFlag2) { + if (src->u.reg.num == 1) + r20 = OBJECT_REG_HI(src->u.reg.object); + else + r20 = OBJECT_REG(src->u.reg.object); + } else { + if (Registers_GetVarInfo(src->u.reg.object)->flags & VarInfoFlag40) + PPCError_Error(PPCErrorStr172, src->u.reg.object->name->name); + else + PPCError_Error(PPCErrorStr167, src->u.reg.object->name->name); + } + } else if (src->u.reg.num == INVALID_PIC_REG) { + r20 = pic_base_reg; + } else { + r20 = src->u.reg.num; + } + + dest->kind = PCOp_REGISTER; + dest->arg = src->u.reg.rclass; + dest->data.reg.reg = r20; + dest->data.reg.effect = src->u.reg.effect; + if (pc->op == PC_RLWIMI && (dest->data.reg.effect & EffectWrite) && dest->arg == RegClass_GPR && !(dest->data.reg.effect & EffectRead)) + CError_FATAL(3442); + + if (dest->arg == RegClass_SPR) { + int i; + dest->kind = PCOp_SYSREG; + for (i = 0; i < 4; i++) { + if (dest->data.reg.reg == spr_to_sysreg[i]) { + dest->kind = PCOp_REGISTER; + dest->arg = RegClass_SPR; + dest->data.reg.reg = i; + break; + } + } + pcsetsideeffects(pc); + } else if (dest->arg == RegClass_6 || dest->arg == RegClass_DCR) { + short save = dest->data.reg.reg; + dest->kind = PCOp_IMMEDIATE; + dest->data.imm.value = save; + dest->data.imm.obj = NULL; + break; + } + + if ((src->u.reg.effect & EffectWrite) && dest->data.reg.reg < n_real_registers[dest->arg]) { + if (src->u.reg.object) { + if (Registers_GetVarInfo(src->u.reg.object)->flags & VarInfoFlag4) { + int reg, regHi; + CError_ASSERT(3474, dest->arg == RegClass_GPR); + + regHi = OBJECT_REG_HI(src->u.reg.object); + reg = OBJECT_REG(src->u.reg.object); + retain_GPR_pair(src->u.reg.object, reg, regHi); + } else { + retain_register(src->u.reg.object, dest->arg, dest->data.reg.reg); + } + } else { + retain_register(NULL, dest->arg, dest->data.reg.reg); + } + } + + break; + } + + case IAOpnd_Lab: + if (!src->u.lab.label->pclabel) + src->u.lab.label->pclabel = makepclabel(); + dest->kind = PCOp_LABEL; + dest->data.label.label = src->u.lab.label->pclabel; + break; + + case IAOpnd_3: + case IAOpnd_4: + dest->kind = PCOp_MEMORY; + dest->arg = src->u.obj.unk; + dest->data.mem.obj = src->u.obj.obj; + dest->data.mem.offset = src->u.obj.offset; + if (pc->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) { + pc->alias = make_alias(dest->data.mem.obj, dest->data.mem.offset, nbytes_loaded_or_stored_by(pc)); + if (is_volatile_object(dest->data.mem.obj)) + pc->flags |= fIsVolatile; + if (OBJ_GET_TARGET_CONST(dest->data.mem.obj)) + pc->flags |= fIsConst; + } + break; + + case IAOpnd_LabDiff: + if (src->u.labdiff.label1->pclabel == NULL) + src->u.labdiff.label1->pclabel = makepclabel(); + if (src->u.labdiff.label2->pclabel == NULL) + src->u.labdiff.label2->pclabel = makepclabel(); + if (pc->flags & (fIsRead | fIsWrite)) + pc->flags |= fIsPtrOp; + dest->kind = PCOp_LABELDIFF; + dest->data.labeldiff.labelA = src->u.labdiff.label1->pclabel; + dest->data.labeldiff.labelB = src->u.labdiff.label2->pclabel; + dest->arg = src->negated; + dest->data.labeldiff.offset = src->u.labdiff.offset; + break; + + default: + CError_FATAL(3528); + } + } + + index++; + dest++; + src++; + } + + if (ia->opcode == PC_STMW || ia->opcode == PC_LMW) { + int i; + for (i = reg; i < 32; i++, dest++) { + dest->kind = PCOp_REGISTER; + dest->arg = RegClass_GPR; + dest->data.reg.reg = i; + dest->data.reg.effect = ((ia->opcode == PC_LMW) ? EffectWrite : EffectRead); + } + } + + if (ia->flags & IAFlag2) { + UInt32 masks[5] = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; + branch_record_volatiles(dest, masks); + if (copts.exceptions && current_statement && assembler_type == 0) + noteexceptionactionregisters(current_statement->dobjstack, dest); + } + + return pc; +} + +void InlineAsm_TranslateIRtoPCode(Statement *stmt) { + InlineAsm *ia; + PCode *pc; + + ia = (InlineAsm *) stmt->expr; + pc = InlineAsm_TranslateIRtoPCodePPC(ia, opcodeinfo[ia->opcode].x8, assembler_type); + appendpcode(pclastblock, pc); + + if ((ia->flags2 & IAFlagsB_40) || !copts.optimizewithasm) + setpcodeflags(fSideEffects); + + if (ia->flags2 & IAFlagsB_1) { + if (PCODE_FLAG_SET_F(pc) & fCanSetRecordBit) + pcsetrecordbit(pclastblock->lastPCode); + else + CError_Error(CErrorStr261); + } + + if (ia->flags2 & IAFlagsB_2) { + if (PCODE_FLAG_SET_F(pc) & fCanSetCarry) + setpcodeflags(fOverflow); // idk? + else + CError_Error(CErrorStr261); + } + + if (ia->flags2 & IAFlagsB_4) { + if (PCODE_FLAG_SET_T(pc) & fCanBeAbsolute) { + int i; + for (i = 0; i < pc->argCount; i++) { + if (pc->args[i].kind == PCOp_LABEL || pc->args[i].kind == PCOp_MEMORY) { + PPCError_Error(PPCErrorStr177); + break; + } + } + setpcodeflags(fAbsolute); + } else { + CError_Error(CErrorStr261); + } + } + + if (ia->flags2 & IAFlagsB_8) { + if (PCODE_FLAG_SET_T(pc) & fCanLink) { + pcsetlinkbit(pclastblock->lastPCode); + if (!(ia->flags & IAFlag2)) { + pclastblock->lastPCode->flags &= ~fIsCall; + pclastblock->lastPCode->flags |= fIsBranch; + } + makes_call = 1; + } else { + CError_Error(CErrorStr261); + } + } + + if (ia->flags2 & IAFlagsB_10) + setpcodeflags(fCanSetRecordBit); + if (ia->flags2 & IAFlagsB_20) + setpcodeflags(fCanSetCarry); + + if (OPCODE_PART_1(opcodeinfo[pc->op].insn) == 16) { + PCodeLabel *dest = NULL; + PCodeLabel *src = makepclabel(); + switch (pc->op) { + case PC_BC: + if (pc->args[3].kind == PCOp_LABEL) + dest = pc->args[3].data.label.label; + break; + case PC_BT: + case PC_BF: + case PC_BDNZT: + case PC_BDNZF: + case PC_BDZT: + case PC_BDZF: + if (pc->args[2].kind == PCOp_LABEL) + dest = pc->args[2].data.label.label; + break; + case PC_BDNZ: + case PC_BDZ: + if (pc->args[0].kind == PCOp_LABEL) + dest = pc->args[0].data.label.label; + break; + default: + CError_FATAL(3715); + } + + if (dest) { + pcbranch(pclastblock, dest); + pcbranch(pclastblock, src); + makepcblock(); + pclabel(pclastblock, src); + } + return; + } + + if (OPCODE_PART_1(opcodeinfo[pc->op].insn) == 18) { + PCodeLabel *label; + if (ia->flags2 & IAFlagsB_8) { + label = makepclabel(); + pcbranch(pclastblock, label); + } + if (pc->args[0].kind == PCOp_LABEL) + pcbranch(pclastblock, pc->args[0].data.label.label); + makepcblock(); + if (ia->flags2 & IAFlagsB_8) { + pclabel(pclastblock, label); + } + return; + } + + if (OPCODE_PART_1(opcodeinfo[pc->op].insn) == 19) { + switch (pc->op) { + case PC_BLR: + case PC_RFI: + if (asm_alloc_flags[3]) + asm_alloc_flags[9] = 1; + else + asm_alloc_flags[8] = 1; + + if (asm_alloc_flags[4]) + asm_alloc_flags[5] = 1; + + if (pc->op == PC_BLR) + asm_alloc_flags[6] = 1; + else + asm_alloc_flags[7] = 1; + break; + } + } +} + +const char *InlineAsm_GetMnemonic(InlineAsm *ia) { + return opcodeinfo[ia->opcode].name; +} + +static void savepicbase(short reg, HashNameNode *name) { + IALookupResult result; + + if (!InlineAsm_LookupSymbol(name, &result)) + result.label = InlineAsm_DeclareLabel(name); + + pic_base_label = result.label; + pic_base_reg = reg; + uses_globals = 1; +} + +static SInt32 InlineAsm_OpcodeSize(InlineAsm *ia) { + if (opcodeinfo[ia->opcode].flags & (fIsRead | fIsWrite)) { + switch (ia->opcode) { + case PC_LBZ: + case PC_LBZU: + case PC_LBZX: + case PC_LBZUX: + case PC_STB: + case PC_STBU: + case PC_STBX: + case PC_STBUX: + return 1; + case PC_LHZ: + case PC_LHZU: + case PC_LHZX: + case PC_LHZUX: + case PC_LHA: + case PC_LHAU: + case PC_LHAX: + case PC_LHAUX: + case PC_LHBRX: + case PC_STH: + case PC_STHU: + case PC_STHX: + case PC_STHUX: + case PC_STHBRX: + return 2; + case PC_LWZ: + case PC_LWZU: + case PC_LWZX: + case PC_LWZUX: + case PC_LWBRX: + case PC_STW: + case PC_STWU: + case PC_STWX: + case PC_STWUX: + case PC_STWBRX: + case PC_LFS: + case PC_LFSU: + case PC_LFSX: + case PC_LFSUX: + case PC_STFS: + case PC_STFSU: + case PC_STFSX: + case PC_STFSUX: + case PC_LWARX: + case PC_STFIWX: + case PC_STWCX: + case PC_ECIWX: + case PC_ECOWX: + case PC_MFROM: + case PC_LSCBX: + return 4; + case PC_LMW: + case PC_STMW: + if (ia->args[0].type == IAOpnd_Reg && ia->args[0].u.reg.object == NULL) + return (32 - ia->args[0].u.reg.num) * 4; + else + return 128; + case PC_LFD: + case PC_LFDU: + case PC_LFDX: + case PC_LFDUX: + case PC_STFD: + case PC_STFDU: + case PC_STFDX: + case PC_STFDUX: + return 8; + case PC_LSWI: + case PC_STSWI: + return ia->args[2].u.imm.value; + case PC_LSWX: + case PC_STSWX: + return 128; + case PC_LVEBX: + case PC_STVEBX: + return 1; + case PC_LVEHX: + case PC_STVEHX: + return 2; + case PC_LVEWX: + case PC_STVEWX: + return 4; + case PC_LVSL: + case PC_LVSR: + case PC_LVX: + case PC_LVXL: + case PC_STVX: + case PC_STVXL: + return 16; + default: + CError_FATAL(3924); + } + } else { + if (opcodeinfo[ia->opcode].flags & fOpTypeGPR) + return 4; + if (opcodeinfo[ia->opcode].flags & fOpTypeFPR) + return 8; + if (opcodeinfo[ia->opcode].flags & fOpTypeVR) + return 16; + + if (opcodeinfo[ia->opcode].flags & fSideEffects) { + switch (ia->opcode) { + case PC_TLBIE: + case PC_TLBLD: + case PC_TLBLI: + return 4; + default: + CError_FATAL(3941); + } + } + } + + CError_FATAL(3944); + return 0; +} + +void CodeGen_GetAsmEffects(Statement *stmt, IAEffects *effects) { + InlineAsm *ia; + OpcodeInfo *info; + int i; + IAOperand *op; + VarInfo *vi; + + ia = (InlineAsm *) stmt->expr; + info = &opcodeinfo[ia->opcode]; + + effects->numoperands = 0; + effects->numlabels = 0; + effects->x1 = 0; + effects->x2 = 0; + effects->x3 = 0; + effects->x4 = 0; + effects->x0 = 0; + effects->x5 = 0; + + if (info->flags & fIsPtrOp) { + if (info->flags & fIsRead) + effects->x1 = 1; + if (info->flags & fIsWrite) + effects->x2 = 1; + } + + if (PCODE_FLAG_SET_T(info) & fCanLink) { + if (ia->flags2 & IAFlagsB_8) + effects->x4 = 1; + else if ((info->flags & fIsCall) || (info->flags & fIsWrite)) + effects->x3 = 1; + + if (ia->opcode == PC_B) { + if (ia->args[0].type == IAOpnd_Imm) + effects->x0 = 1; + effects->x5 = 1; + } + } + + if (info->flags & fSideEffects) + effects->x0 = 1; + + if (ia->opcode == PC_BC && (ia->flags2 & IAFlagsB_8)) + effects->x4 = 1; + + for (i = 0, op = ia->args; i < ia->argcount; i++, op++) { + switch (op->type) { + case IAOpnd_0: + case IAOpnd_Imm: + break; + case IAOpnd_Reg: + if (op->u.reg.object) { + if ((vi = Registers_GetVarInfo(op->u.reg.object))) + vi->flags |= VarInfoFlag40; + if (op->u.reg.effect & EffectRead) { + if ( + TYPE_FITS_IN_REGISTER(op->u.reg.object->type) || + IS_TYPE_FLOAT(op->u.reg.object->type) || + IS_TYPE_VECTOR(op->u.reg.object->type) + ) { + effects->operands[effects->numoperands].type = IAEffect_0; + effects->operands[effects->numoperands].object = op->u.reg.object; + effects->operands[effects->numoperands].offset = 0; + effects->operands[effects->numoperands].size = op->u.reg.object->type->size; + effects->numoperands++; + } else { + CError_FATAL(4051); + } + } + } + break; + case IAOpnd_3: + case IAOpnd_4: + if (op->u.obj.obj) { + if (info->flags & fIsRead) { + effects->operands[effects->numoperands].type = IAEffect_0; + effects->operands[effects->numoperands].object = op->u.obj.obj; + effects->operands[effects->numoperands].offset = op->u.obj.offset; + effects->operands[effects->numoperands].size = InlineAsm_OpcodeSize(ia); + effects->numoperands++; + } else if (!(info->flags & (fIsBranch | fIsCall))) { + effects->operands[effects->numoperands].type = IAEffect_3; + effects->operands[effects->numoperands].object = op->u.obj.obj; + effects->operands[effects->numoperands].offset = op->u.obj.offset; + effects->operands[effects->numoperands].size = InlineAsm_OpcodeSize(ia); + effects->numoperands++; + } + } + break; + case IAOpnd_Lab: + effects->labels[effects->numlabels] = op->u.lab.label; + effects->numlabels++; + break; + case IAOpnd_LabDiff: + effects->labels[effects->numlabels] = op->u.labdiff.label1; + effects->numlabels++; + effects->labels[effects->numlabels] = op->u.labdiff.label2; + effects->numlabels++; + effects->x3 = 1; + break; + default: + CError_FATAL(4087); + } + + CError_ASSERT(4090, (UInt32) effects->numoperands <= IAMaxOperands); + CError_ASSERT(4093, (UInt32) effects->numlabels <= IAMaxLabels); + } + + for (i = 0, op = ia->args; i < ia->argcount; i++, op++) { + switch (op->type) { + case IAOpnd_Reg: + if (op->u.reg.object) { + if ((vi = Registers_GetVarInfo(op->u.reg.object))) + vi->flags |= VarInfoFlag40; + if (op->u.reg.effect & EffectWrite) { + if ( + TYPE_FITS_IN_REGISTER(op->u.reg.object->type) || + IS_TYPE_FLOAT(op->u.reg.object->type) || + IS_TYPE_VECTOR(op->u.reg.object->type) + ) { + effects->operands[effects->numoperands].type = IAEffect_1; + effects->operands[effects->numoperands].object = op->u.reg.object; + effects->operands[effects->numoperands].offset = 0; + effects->operands[effects->numoperands].size = op->u.reg.object->type->size; + effects->numoperands++; + } else { + CError_FATAL(4132); + } + } + } + break; + case IAOpnd_3: + case IAOpnd_4: + if (op->u.obj.obj) { + if (info->flags & fIsWrite) { + effects->operands[effects->numoperands].type = IAEffect_1; + effects->operands[effects->numoperands].object = op->u.obj.obj; + effects->operands[effects->numoperands].offset = op->u.obj.offset; + effects->operands[effects->numoperands].size = InlineAsm_OpcodeSize(ia); + effects->numoperands++; + } + } + break; + } + + CError_ASSERT(4151, (UInt32) effects->numoperands <= IAMaxOperands); + } + + if ((info->flags & (fIsBranch | fIsCall)) && (SInt32)effects->numlabels == 0) + effects->x3 = 1; +} + +void CodeGen_PropagateIntoAsm(Statement *stmt, Object *obj, ENode *expr) { + InlineAsm *ia; + Object *newobj; + + ia = (InlineAsm *) stmt->expr; + if (ENODE_IS(expr, EOBJREF)) { + newobj = expr->data.objref; + if (obj->otype == newobj->otype && obj->datatype == newobj->datatype) { + int i; + for (i = 0; i < ia->argcount; i++) { + switch (ia->args[i].type) { + case IAOpnd_Reg: + if (ia->args[i].u.reg.object == obj && + (ia->args[i].u.reg.effect & (EffectRead | EffectWrite)) == EffectRead) { + if (TYPE_FITS_IN_REGISTER(newobj->type) && + ia->args[i].u.reg.rclass == RegClass_GPR) { + ia->args[i].u.reg.object = newobj; + } else if (IS_TYPE_FLOAT(newobj->type) && + ia->args[i].u.reg.rclass == RegClass_FPR) { + ia->args[i].u.reg.object = newobj; + } else if (IS_TYPE_VECTOR(newobj->type) && + ia->args[i].u.reg.rclass == RegClass_VR) { + ia->args[i].u.reg.object = newobj; + } + } + break; + case IAOpnd_3: + case IAOpnd_4: + if (!(opcodeinfo[ia->opcode].flags & (fIsWrite | fPCodeFlag40000)) && + ia->args[i].u.obj.obj == obj) + ia->args[i].u.obj.obj = newobj; + break; + } + } + } + } +} + +Statement *CodeGen_CopyAsmStat(Statement *stmt) { + Statement *copy; + SInt32 size; + InlineAsm *ia; + InlineAsm *iacopy; + + copy = galloc(sizeof(Statement)); + *copy = *stmt; + + ia = (InlineAsm *) stmt->expr; + size = sizeof(InlineAsm) + sizeof(IAOperand) * ia->argcount; + iacopy = galloc(size); + memcpy(iacopy, ia, size); + copy->expr = (ENode *) iacopy; + + return copy; +} + diff --git a/compiler_and_linker/BackEnd/PowerPC/PCode/PCodeInfo.c b/compiler_and_linker/BackEnd/PowerPC/PCode/PCodeInfo.c new file mode 100644 index 0000000..b2e2385 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/PCode/PCodeInfo.c @@ -0,0 +1,1354 @@ +#include "compiler/PCodeInfo.h" +#include "compiler/CError.h" +#include "compiler/CMangler.h" +#include "compiler/CParser.h" +#include "compiler/Alias.h" +#include "compiler/CodeGen.h" +#include "compiler/CompilerTools.h" +#include "compiler/PCode.h" +#include "compiler/PCodeListing.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StackFrame.h" +#include "compiler/TOC.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +#pragma pool_strings on + +int pcode_bad_operand; +char *orig_buf; +static int do_show_basic_blocks; + +void pcode_get_hi_lo(int bits, char typechar, SInt32 *hi, SInt32 *lo) { + if (bits == 0) { + *hi = 0; + *lo = 0; + return; + } + if (bits == 32) { + *hi = 0x7FFFFFFF; + *lo = -0x80000000; + return; + } + + if (bits < 32 && bits > 0) { + switch (typechar) { + case 'u': + *hi = (1 << bits) - 1; + *lo = 0; + break; + case 'i': + case 'x': + *hi = (1 << bits) - 1; + *lo = -(1 << (bits - 1)); + break; + default: + *hi = (1 << (bits - 1)) - 1; + *lo = -(1 << (bits - 1)); + } + } else { + CError_FATAL(65); + } +} + +int pcode_check_imm_bits(SInt32 value, int bits, char typechar) { + char buf[2]; + int forcedBits; + SInt32 r6; + SInt32 r0; + + forcedBits = 0; + if (bits >= 0 && bits < 32) { + if (bits == 0) { + r6 = 0; + r0 = 0; + } else { + switch (typechar) { + case 'u': + r6 = (1 << bits) - 1; + r0 = 0; + break; + case 'i': + case 'x': + r6 = (1 << bits) - 1; + r0 = -(1 << (bits - 1)); + break; + case '1': + case '2': + case '3': + case '4': + buf[0] = typechar; + buf[1] = 0; + forcedBits = atoi(buf); + default: + r6 = (1 << (bits - 1)) - 1; + r0 = -(1 << (bits - 1)); + } + } + + if ((value < r0) || (value > r6)) + return 1; + if (forcedBits > 0 && value != ((value >> forcedBits) << forcedBits)) + return 1; + } else if (bits > 32) { + CError_FATAL(110); + } + + return 0; +} + +int pcode_const_from_format(const char *format, SInt32 *pResult) { + char buf[32]; + int len = 0; + + for (len = 0; len < 30 && isdigit(format[len]); len++) { + buf[len] = format[len]; + } + buf[len] = 0; + + *pResult = atoi(buf); + return len; +} + +PCode *vformatpcode(short opcode, va_list argList) { + // has many wrong registers but probably ok otherwise + int argcount; // r29 + OpcodeInfo *info; // r27 for a short time + int effect; // r27 + const char *format; // r26 + PCode *pcode; // r25 + PCodeArg *arg; // r24 + int unkreg_r23; // r23 + int unkreg_r22; // r22 + PCodeArg *lastArg; // r21 + int pcode_size; // r20 for a short time + int tmp; + int tmp2; // r19 + int i; + SInt32 thing; + SInt32 thing2; + SInt32 thing3; + SInt32 thing4; + Object *obj; + PCodeLabel *label; + char c; + + info = &opcodeinfo[opcode]; + format = info->format; + argcount = info->x8; + unkreg_r23 = 0; + unkreg_r22 = 0; + + if (format[0] == '#') { + unkreg_r23 = va_arg(argList, int); + argcount += unkreg_r23; + format++; + } + + if (info->flags & fCanSetRecordBit) + unkreg_r22 = 1; + + if ((argcount + unkreg_r22) < 5) + unkreg_r22 += 5 - (argcount + unkreg_r22); + + pcode_size = (argcount + unkreg_r22) * sizeof(PCodeArg) + sizeof(PCode); + pcode = lalloc(pcode_size); + memclrw(pcode, pcode_size); + + pcode->op = opcode; + pcode->argCount = argcount; + pcode->flags = info->flags; + lastArg = pcode->args + pcode->argCount; + arg = pcode->args; + while (*format) { + if (arg >= lastArg) + CError_FATAL(189); + + if (*format == ',' || *format == ';') + format++; + if (*format == '[' || *format == '(') + format++; + + effect = EffectRead; + if (*format == '=') { + effect = EffectWrite; + format++; + } else if (*format == '+') { + effect = EffectRead | EffectWrite; + format++; + } + + if (*format == '-') + format++; + if (*format == '?') + format++; + if (*format == '!') + format++; + + switch ((c = *format)) { + case 'b': + tmp = va_arg(argList, int); + if (!tmp) + effect = 0; + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_GPR; + arg->data.reg.reg = tmp; + arg->data.reg.effect = effect; + break; + case 'r': + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_GPR; + arg->data.reg.reg = va_arg(argList, int); + arg->data.reg.effect = effect; + break; + case 'f': + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_FPR; + arg->data.reg.reg = va_arg(argList, int); + arg->data.reg.effect = effect; + break; + case 'v': + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_VR; + arg->data.reg.reg = va_arg(argList, int); + arg->data.reg.effect = effect; + break; + case 'c': + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_CRFIELD; + arg->data.reg.reg = va_arg(argList, int); + arg->data.reg.effect = effect; + break; + case 'C': + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_SPR; + arg->data.reg.reg = 2; + arg->data.reg.effect = effect; + break; + case 'L': + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_SPR; + arg->data.reg.reg = 1; + arg->data.reg.effect = effect; + break; + case 'V': + i = unkreg_r23; + while (i > 0) { + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_GPR; + arg->data.reg.reg = 31 - --i; + arg->data.reg.effect = effect; + arg++; + } + arg--; + break; + case 'X': + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_SPR; + arg->data.reg.reg = 0; + arg->data.reg.effect = effect; + break; + case 'Y': + if (pcode->op == PC_MTCRF) { + tmp = pcode->args[0].data.imm.value; + for (i = 0; i < 8; i++) { + if (tmp & (0x80 >> i)) { + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_CRFIELD; + arg->data.reg.reg = i; + arg->data.reg.effect = EffectWrite; + arg++; + } + } + } else { + i = 0; + while (i < 8) { + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_CRFIELD; + arg->data.reg.reg = i++; + arg->data.reg.effect = EffectRead; + arg++; + } + } + arg--; + break; + case 'Z': + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_CRFIELD; + arg->data.reg.reg = 0; + arg->data.reg.effect = effect; + break; + case 'P': + if (isdigit(format[1])) + format += pcode_const_from_format(format + 1, &thing); + else + CError_FATAL(319); + case 's': + tmp = va_arg(argList, int); + tmp2 = -1; + for (i = 0; i < 4; i++) { + if (tmp == spr_to_sysreg[i]) { + tmp2 = i; + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_SPR; + arg->data.reg.reg = i; + arg->data.reg.effect = effect; + if ((effect & EffectWrite) && (tmp == 0x100)) + pcsetsideeffects(pcode); + break; + } + } + + if (tmp2 < 0) { + pcsetsideeffects(pcode); + arg->kind = PCOp_SYSREG; + arg->arg = RegClass_SPR; + arg->data.reg.reg = tmp; + arg->data.reg.effect = effect; + } + + break; + + case 'T': + tmp = va_arg(argList, int); + if (tmp == 0x10C) { + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_SPR; + arg->data.reg.reg = 0x11C; + arg->data.reg.effect = effect; + } else if (tmp == 0x10D) { + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_SPR; + arg->data.reg.reg = 0x11D; + arg->data.reg.effect = effect; + } else { + CError_FATAL(353); + } + pcsetsideeffects(pcode); + arg->kind = PCOp_SYSREG; + arg->arg = RegClass_SPR; + arg->data.reg.reg = va_arg(argList, int); + arg->data.reg.effect = effect; + break; + + case 'S': + tmp2 = -1; + if (isdigit(format[1])) + format += pcode_const_from_format(format + 1, &thing2); + else + CError_FATAL(371); + + for (i = 0; i < 4; i++) { + if (thing2 == spr_to_sysreg[i]) { + tmp2 = i; + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_SPR; + arg->data.reg.reg = i; + arg->data.reg.effect = effect; + if ((effect & EffectWrite) && (thing2 == 0x100)) + pcsetsideeffects(pcode); + break; + } + } + + if (tmp2 < 0) { + pcsetsideeffects(pcode); + arg->kind = PCOp_SYSREG; + arg->arg = RegClass_SPR; + arg->data.reg.reg = va_arg(argList, int); + arg->data.reg.effect = effect; + } + break; + + case 'a': + case 'i': + case 'n': + case 'u': + case 'w': + case 'x': + tmp2 = (unsigned char) c; + thing3 = 16; + if (*format == 'a') { + if (isdigit(format[1])) + tmp2 = (unsigned char) *(++format); + else + CError_FATAL(408); + } + if (isdigit(format[1])) { + format += pcode_const_from_format(format + 1, &thing3); + } + arg->kind = PCOp_IMMEDIATE; + arg->data.imm.value = va_arg(argList, int); + arg->data.imm.obj = NULL; + if (pcode_check_imm_bits(arg->data.imm.value, thing3, tmp2)) + CError_FATAL(419); + break; + + case 'N': + arg->kind = PCOp_IMMEDIATE; + arg->data.imm.value = va_arg(argList, int); + arg->data.imm.obj = NULL; + if (pcode_check_imm_bits(arg->data.imm.value, 6, 'u')) + CError_FATAL(429); + break; + + case 'D': + arg->kind = PCOp_IMMEDIATE; + arg->data.imm.value = va_arg(argList, int); + arg->data.imm.obj = NULL; + if (pcode_check_imm_bits(arg->data.imm.value, 10, 'u')) + CError_FATAL(438); + break; + + case 'B': + case 't': + arg->kind = PCOp_IMMEDIATE; + arg->data.imm.value = va_arg(argList, int); + arg->data.imm.obj = NULL; + if (pcode_check_imm_bits(arg->data.imm.value, 5, 'u')) + CError_FATAL(448); + break; + + case 'Q': + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_CRFIELD; + arg->data.reg.reg = va_arg(argList, int); + arg->data.reg.effect = effect; + arg++; + case 'q': + arg->kind = PCOp_IMMEDIATE; + arg->data.imm.value = va_arg(argList, int); + arg->data.imm.obj = NULL; + if (pcode_check_imm_bits(arg->data.imm.value, 4, 'u')) + CError_FATAL(463); + break; + + case 'l': + if ((label = va_arg(argList, PCodeLabel *))) { + arg->kind = PCOp_LABEL; + arg->data.label.label = label; + } else { + arg->kind = PCOp_MEMORY; + obj = va_arg(argList, Object *); + CError_ASSERT(476, obj->otype == OT_OBJECT); + arg->data.mem.obj = obj; + arg->data.mem.offset = 0; + arg->arg = RefType_4; + } + break; + + case 'd': + CError_ASSERT(490, format[1] == '('); + effect = EffectRead; + format += 2; + if (*format == '=') { + effect = EffectWrite; + format++; + } else if (*format == '+') { + effect = EffectRead | EffectWrite; + format++; + } + + CError_ASSERT(502, format[0] == 'b'); + + tmp = va_arg(argList, int); + if (tmp == 0) + effect = 0; + arg->kind = PCOp_REGISTER; + arg->arg = RegClass_GPR; + arg->data.reg.reg = tmp; + arg->data.reg.effect = effect; + arg++; + + case 'm': + obj = va_arg(argList, Object *); + if (obj) { + CError_ASSERT(515, obj->otype == OT_OBJECT); + + if (obj->datatype == DABSOLUTE) { + arg->kind = PCOp_IMMEDIATE; + arg->data.imm.obj = obj; + arg->data.imm.value = va_arg(argList, SInt32); + } else { + arg->kind = PCOp_MEMORY; + arg->data.mem.obj = obj; + arg->data.mem.offset = va_arg(argList, SInt32); + + if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) { + pcode->alias = make_alias(obj, arg->data.mem.offset, nbytes_loaded_or_stored_by(pcode)); + if (is_volatile_object(obj)) + pcode->flags |= fIsVolatile; + //if ((obj->type->type == TYPEPOINTER || obj->type->type == TYPEARRAY) ? (TYPE_POINTER(obj->type)->qual & Q_CONST) : (obj->qual & Q_CONST)) + if (OBJ_GET_TARGET_CONST(obj)) + pcode->flags |= fIsConst; + } else { + if (pcode->op == PC_ADDI) + pcode->alias = make_alias(obj, arg->data.mem.offset, 1); + } + CError_ASSERT(536, obj->datatype == DLOCAL || arg->data.mem.offset == 0); + if (pcode->flags & (fIsRead | fIsWrite)) { + //if ((obj->type->type == TYPEPOINTER || obj->type->type == TYPEARRAY) ? (TYPE_POINTER(obj->type)->qual & Q_VOLATILE) : (obj->qual & Q_VOLATILE)) + if (OBJ_GET_TARGET_VOLATILE(obj)) + pcode->flags |= fIsVolatile; + //if ((obj->type->type == TYPEPOINTER || obj->type->type == TYPEARRAY) ? (TYPE_POINTER(obj->type)->qual & Q_CONST) : (obj->qual & Q_CONST)) + if (OBJ_GET_TARGET_CONST(obj)) + pcode->flags |= fIsConst; + } + + if (pcode->flags & (fIsBranch | fIsCall)) { + arg->arg = RefType_4; + } else if (obj->datatype == DLOCAL) { + if (!local_is_16bit_offset(obj)) + arg->arg = RefType_D; + else + arg->arg = RefType_1; + } else { + arg->arg = RefType_6; + } + } + } else { + arg->kind = PCOp_IMMEDIATE; + arg->data.imm.value = va_arg(argList, SInt32); + arg->data.imm.obj = NULL; + if (pcode->flags & (fIsRead | fIsWrite)) + pcode->flags |= fIsPtrOp; + } + break; + + case 'M': + obj = va_arg(argList, Object *); + if (obj) { + CError_ASSERT(578, obj->otype == OT_OBJECT); + + if (obj->datatype == DABSOLUTE) { + arg->kind = PCOp_IMMEDIATE; + arg->data.imm.obj = obj; + arg->data.imm.value = va_arg(argList, SInt32); + } else { + arg->kind = PCOp_MEMORY; + arg->data.mem.obj = obj; + arg->data.mem.offset = va_arg(argList, SInt32); + + CError_ASSERT(590, obj->datatype == DLOCAL || arg->data.mem.offset == 0); + if (pcode->flags & (fIsRead | fIsWrite)) { + //if ((obj->type->type == TYPEPOINTER || obj->type->type == TYPEARRAY) ? (TYPE_POINTER(obj->type)->qual & Q_VOLATILE) : (obj->qual & Q_VOLATILE)) + if (OBJ_GET_TARGET_VOLATILE(obj)) + pcode->flags |= fIsVolatile; + //if ((obj->type->type == TYPEPOINTER || obj->type->type == TYPEARRAY) ? (TYPE_POINTER(obj->type)->qual & Q_CONST) : (obj->qual & Q_CONST)) + if (OBJ_GET_TARGET_CONST(obj)) + pcode->flags |= fIsConst; + } + + if (obj->datatype == DLOCAL) { + arg->arg = RefType_C; + } else { + arg->arg = RefType_8; + } + } + } else { + arg->kind = PCOp_IMMEDIATE; + arg->data.imm.value = va_arg(argList, SInt32); + arg->data.imm.obj = NULL; + if (pcode->flags & (fIsRead | fIsWrite)) + pcode->flags |= fIsPtrOp; + } + break; + + case 'p': + arg->kind = PCOp_PLACEHOLDEROPERAND; + break; + + case 'O': + arg--; + break; + + default: + CError_FATAL(629); + } + + while (format[1] && strchr("/<>|*", format[1])) { + switch (*(++format)) { + case '/': + if (format[1] == '2') + format++; + break; + case '<': + case '*': + case '|': + case '>': + if (format[1] == 'p') + format++; + else if (isdigit(format[1])) + format += pcode_const_from_format(format + 1, &thing4); + else + CError_FATAL(659); + break; + } + } + + if ((c = *(++format)) == ']' || c == ')') + format++; + arg++; + } + + while (arg < lastArg) { + arg->kind = PCOp_PLACEHOLDEROPERAND; + arg++; + } + while (unkreg_r22) { + arg->kind = PCOp_PLACEHOLDEROPERAND; + arg++; + unkreg_r22--; + } + return pcode; +} + +int expectandformatoperand(PCodeArg *operand, PCOpKind expectedKind, char a3, int bitCount, char *buf) { + int errorlen; + char *name; + int tmp; + int regclass; + int refis1; + int b_null; + + errorlen = 0; + + if (operand->kind != expectedKind) { + char *kindstr; + switch (expectedKind) { + case PCOp_REGISTER: kindstr = "REGISTER"; break; + case PCOp_SYSREG: kindstr = "SYSREG"; break; + case PCOp_IMMEDIATE: kindstr = "IMMEDIATE"; break; + case PCOp_MEMORY: kindstr = "MEMORY"; break; + case PCOp_LABEL: kindstr = "LABEL"; break; + case PCOp_LABELDIFF: kindstr = "LABELDIFF"; break; + case PCOp_PLACEHOLDEROPERAND: kindstr = "PLACEHOLDEROPERAND"; break; + default: kindstr = "unknown kind"; + } + tmp = sprintf(buf, "{EXPECTED %s}", kindstr); + errorlen += tmp; + buf += tmp; + pclist_bad_operand = 1; + } + + switch (operand->kind) { + case PCOp_REGISTER: + if (operand->arg != (regclass = a3)) { + tmp = sprintf(buf, "{EXPECTED %s}", register_class_name[regclass]); + errorlen += tmp; + buf += tmp; + } + if (operand->data.reg.reg < n_real_registers[regclass] && (name = special_register_names[operand->arg][operand->data.reg.reg])) + tmp = sprintf(buf, "%s", name); + else + tmp = sprintf(buf, register_class_format[operand->arg], operand->data.reg.reg); + errorlen += tmp; + break; + case PCOp_SYSREG: + if (operand->arg != RegClass_SPR) { + tmp = sprintf(buf, "{EXPECTED %s}", register_class_name[RegClass_SPR]); + errorlen += tmp; + buf += tmp; + } + tmp = sprintf(buf, register_class_format[RegClass_SPR], operand->data.reg.reg); + errorlen += tmp; + break; + case PCOp_IMMEDIATE: + switch (a3) { + case 'x': + tmp = sprintf(buf, "0x%x", operand->data.imm.value); + break; + case 'u': + tmp = sprintf(buf, "%u", operand->data.imm.value); + break; + default: + tmp = sprintf(buf, "%" PRId32, operand->data.imm.value); + break; + } + errorlen += tmp; + buf += tmp; + if (operand->data.imm.obj) { + name = CMangler_GetLinkName(operand->data.imm.obj)->name; + if (strlen(name) > 50) + tmp = sprintf(buf, "{%45.45s...}", name); + else + tmp = sprintf(buf, "{%s}", name); + errorlen += tmp; + buf += tmp; + } + if (pcode_check_imm_bits(operand->data.imm.value, bitCount, (char) a3)) { + errorlen += sprintf(buf, "{IMM too large %i bits}", bitCount); + pclist_bad_operand = 1; + } + break; + case PCOp_MEMORY: + switch ((unsigned char) operand->arg) { + case RefType_0: + case RefType_1: + case RefType_2: + case RefType_3: + case RefType_4: + case RefType_5: + case RefType_9: + break; + case RefType_8: + case RefType_B: + case RefType_C: + tmp = sprintf(buf, "HA("); + errorlen += tmp; + buf += tmp; + break; + case RefType_7: + tmp = sprintf(buf, "HI("); + errorlen += tmp; + buf += tmp; + break; + case RefType_6: + case RefType_A: + case RefType_D: + tmp = sprintf(buf, "LO("); + errorlen += tmp; + buf += tmp; + break; + default: + tmp = sprintf(buf, "{UNEXPECTED reftype = %d}", (unsigned char) operand->arg); + errorlen += tmp; + buf += tmp; + } + name = CMangler_GetLinkName(operand->data.mem.obj)->name; + if (strlen(name) == 0 || strlen(name) > 3200 || name[0] < 0) + CError_FATAL(849); + if (strlen(name) > 50) + tmp = sprintf(buf, "%45.45s...", name); + else + tmp = sprintf(buf, "%s", name); + errorlen += tmp; + buf += tmp; + if (operand->data.mem.offset > 0) + tmp = sprintf(buf, "+%d", operand->data.mem.offset); + else if (operand->data.mem.offset != 0) + tmp = sprintf(buf, "-%d", -operand->data.mem.offset); + else + tmp = 0; + errorlen += tmp; + buf += tmp; + if (copts.codegen_pic && uses_globals && pic_base_reg) { + tmp = sprintf(buf, "-B%d", pic_base_pcodelabel->block->blockIndex); + errorlen += tmp; + buf += tmp; + } + switch ((unsigned char) operand->arg) { + case RefType_6: + case RefType_7: + case RefType_8: + case RefType_A: + case RefType_B: + case RefType_C: + case RefType_D: + errorlen += sprintf(buf, ")"); + } + break; + case PCOp_LABEL: + if (do_show_basic_blocks) { + if (!operand->data.label.label->block) + tmp = sprintf(buf, "B<unknown>"); + else + tmp = sprintf(buf, "B%" PRId32, operand->data.label.label->block->blockIndex); + } else { + tmp = sprintf(buf, "%.8" PRIX32, operand->data.label.label->block->codeOffset); + } + errorlen += tmp; + break; + case PCOp_LABELDIFF: + refis1 = ((unsigned char) operand->arg == 1); + b_null = !operand->data.labeldiff.labelB->block; + if (operand->data.labeldiff.labelA->block == NULL) { + if (b_null) + tmp = sprintf(buf, "%sB<unknown>-B<unknown>+%" PRId32, refis1 ? "-" : "", operand->data.labeldiff.offset); + else + tmp = sprintf(buf, "%sB<unknown>-B%" PRId32 "+%" PRId32, refis1 ? "-" : "", operand->data.labeldiff.labelB->block->blockIndex, operand->data.labeldiff.offset); + } else { + if (b_null) + tmp = sprintf(buf, "%sB%" PRId32 "-B<unknown>+%" PRId32, refis1 ? "-" : "", operand->data.labeldiff.labelA->block->blockIndex, operand->data.labeldiff.offset); + else + tmp = sprintf(buf, "%sB%" PRId32 "-B%" PRId32 "+%" PRId32, refis1 ? "-" : "", operand->data.labeldiff.labelA->block->blockIndex, operand->data.labeldiff.labelB->block->blockIndex, operand->data.labeldiff.offset); + } + errorlen += tmp; + break; + case PCOp_PLACEHOLDEROPERAND: + errorlen += sprintf(buf, "{placeholder}"); + break; + default: + errorlen += sprintf(buf, "{UNEXPECTED kind = %d}", operand->kind); + } + + return errorlen; +} + +int formatoperand(PCodeArg *operand, char *buf) { + return expectandformatoperand( + operand, + operand->kind, + (operand->kind == PCOp_REGISTER) ? operand->arg : 'x', + -1, + buf); +} + +void formatoperands(PCode *pcode, char *buf, int showBasicBlocks) { + const char *format; // r29 + // there might be a PCodeArg *arg in r28 + // not sure lol + PCodeArg *pa; + int arg_index; // r27 + + char *name; + int i; + int tmp; + int tmp2; + SInt32 thing; + SInt32 thing2; + SInt32 thing4; + char c; + int flagSetT; + int flagSetF; + + static char *cc[] = { + "lt", "gt", "eq", "un" + }; + static char *to[] = { + "", "lgt", "llt", "", + "eq", "lge", "lle", "", + "gt", "", "", "", + "ge", "", "", "", + "lt", "", "", "", + "le", "", "", "", + "ne", "", "", "", + "", "", "" + }; + + format = opcodeinfo[pcode->op].format; + orig_buf = buf; + do_show_basic_blocks = showBasicBlocks; + + if (format[0] == '#') { + format++; + if (format[0] == ',') + format++; + } + + arg_index = 0; + pa = pcode->args; + while (*format) { + if (*format == ';') + break; + if (arg_index == pcode->argCount) { + pclist_bad_operand = 1; + buf += sprintf(buf, "{EXCEDED noperands, remaning format = %s}", format); + break; + } + + if (*format == ',') { + *(buf++) = ','; + format++; + } + + if (*format == '[' || *format == '(') { + *(buf++) = *format; + format++; + } + + if (*format == '=' || *format == '+') + format++; + if (*format == '-') + format++; + + if (*format == '?') { + format++; + if (*format != 'c') + *(buf++) = ','; + } + + if (*format == '!') + format++; + + switch (*format) { + case 'b': + if (pa->kind == PCOp_REGISTER && pa->arg == RegClass_GPR && pa->data.reg.reg == 0) { + if (pa->data.reg.effect & EffectWrite) { + pclist_bad_operand = 1; + buf += sprintf(buf, "!!!r"); + } + buf += sprintf(buf, "0"); + break; + } + case 'r': + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_GPR, -1, buf); + break; + case 'f': + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_FPR, -1, buf); + break; + case 'v': + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_VR, -1, buf); + break; + case 'c': + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_CRFIELD, -1, buf); + break; + case 'X': + CError_ASSERT(1124, pa->data.reg.reg == 0); + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_SPR, -1, buf); + break; + case 'C': + CError_ASSERT(1129, pa->data.reg.reg == 2); + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_SPR, -1, buf); + break; + case 'L': + CError_ASSERT(1134, pa->data.reg.reg == 1); + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_SPR, -1, buf); + break; + case 'Z': + CError_ASSERT(1139, pa->data.reg.reg == 0); + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_SPR, -1, buf); + break; + case 'P': + if (isdigit(format[1])) + format += pcode_const_from_format(format + 1, &thing); + else + CError_FATAL(1149); + case 'S': + case 'T': + case 's': + if (pa->kind == PCOp_REGISTER && pa->arg == RegClass_SPR) { + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_SPR, -1, buf); + } else { + for (i = 0; i < 4; i++) { + if (pa->data.reg.reg == spr_to_sysreg[i]) { + CError_FATAL(1161); + break; + } + } + + buf += expectandformatoperand(pa, PCOp_SYSREG, RegClass_SPR, -1, buf); + } + break; + + case 'V': + do { + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_GPR, -1, buf); + *(buf++) = ','; + pa++; + arg_index++; + } while (arg_index < pcode->argCount && pa->kind == PCOp_REGISTER && pa->arg == RegClass_GPR); + buf--; + pa--; + arg_index--; + break; + + case 'Y': + do { + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_CRFIELD, -1, buf); + *(buf++) = ','; + pa++; + arg_index++; + } while (arg_index < pcode->argCount && pa->kind == PCOp_REGISTER && pa->arg == RegClass_CRFIELD); + buf--; + pa--; + arg_index--; + break; + + case 'Q': + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_CRFIELD, -1, buf); + *(buf++) = ','; + pa++; + arg_index++; + case 'q': + if (pa->kind == PCOp_IMMEDIATE && pa->data.imm.value >= 0 && pa->data.imm.value < 4 && (name = cc[pa->data.imm.value])[0]) { + buf += sprintf(buf, "%s", name); + } else { + buf += sprintf(buf, "{OUT OF RANGE}"); + buf += expectandformatoperand(pa, PCOp_IMMEDIATE, 0, -1, buf); + } + break; + case 'a': + case 'i': + case 'u': + case 'x': + tmp = *format; + if (tmp == 'a') { + if (isdigit(format[1])) { + format++; + tmp = *format; + } else { + CError_FATAL(1227); + } + } + if ((tmp2 = isdigit(format[1]))) + format += pcode_const_from_format(format + 1, &thing2); + if (!tmp2) + thing2 = -1; + buf += expectandformatoperand(pa, PCOp_IMMEDIATE, tmp, thing2, buf); + break; + + case 'N': + buf += expectandformatoperand(pa, PCOp_IMMEDIATE, 'u', 6, buf); + break; + + case 'D': + buf += expectandformatoperand(pa, PCOp_IMMEDIATE, 'u', 10, buf); + break; + + case 't': + if (pa->kind == PCOp_IMMEDIATE && pa->data.imm.value > 0 && pa->data.imm.value < 31 && (name = to[pa->data.imm.value])[0]) { + buf += sprintf(buf, "%s", name); + break; + } + + case 'B': + buf += expectandformatoperand(pa, PCOp_IMMEDIATE, 'x', 5, buf); + break; + + case 'l': + if (pa->kind == PCOp_IMMEDIATE) { + buf += sprintf(buf, "*%s%" PRId32, (pa->data.imm.value >= 0) ? "+" : "", pa->data.imm.value); + } else if (pa->kind == PCOp_LABELDIFF) { + buf += expectandformatoperand(pa, PCOp_LABELDIFF, 0, -1, buf); + } else if (pa->kind == PCOp_LABEL) { + buf += expectandformatoperand(pa, PCOp_LABEL, 0, -1, buf); + } else { + buf += expectandformatoperand(pa, PCOp_MEMORY, 0, -1, buf); + } + break; + + case 'd': + if (pa[1].kind == PCOp_MEMORY) + buf += expectandformatoperand(pa + 1, PCOp_MEMORY, 0, -1, buf); + else + buf += expectandformatoperand(pa + 1, PCOp_IMMEDIATE, 0, -1, buf); + CError_ASSERT(1283, format[1] == '('); + format++; + *(buf++) = *(format++); + if (*format == '+') + format++; + CError_ASSERT(1291, format[0] == 'b'); + if (pa->kind == PCOp_REGISTER && pa->arg == RegClass_GPR && pa->data.reg.reg == 0) { + if (pa->data.reg.effect & (EffectRead | EffectWrite)) { + pclist_bad_operand = 1; + buf += sprintf(buf, "!!!r"); + } + buf += sprintf(buf, "0"); + } else { + buf += expectandformatoperand(pa, PCOp_REGISTER, RegClass_GPR, -1, buf); + } + pa++; + i++; + break; + + case 'w': + if (pa->kind == PCOp_LABELDIFF) + buf += expectandformatoperand(pa, PCOp_LABELDIFF, 0, -1, buf); + else + buf += expectandformatoperand(pa, PCOp_IMMEDIATE, 0, -1, buf); + break; + + case 'm': + case 'n': + if (pa->kind == PCOp_MEMORY) + buf += expectandformatoperand(pa, PCOp_MEMORY, 0, -1, buf); + else if (pa->kind == PCOp_LABELDIFF) + buf += expectandformatoperand(pa, PCOp_LABELDIFF, 0, -1, buf); + else if ((pcode->flags & (fIsBranch | fIsCall)) && (pa->kind == PCOp_LABEL)) + buf += expectandformatoperand(pa, PCOp_LABEL, 0, -1, buf); + else + buf += expectandformatoperand(pa, PCOp_IMMEDIATE, 0, -1, buf); + break; + + case 'M': + if (pa->kind == PCOp_MEMORY) { + CError_ASSERT(1335, pa->arg == RefType_8 || pa->arg == RefType_B || pa->arg == RefType_C); + buf += expectandformatoperand(pa, PCOp_MEMORY, 0, -1, buf); + } else if (pa->kind == PCOp_LABELDIFF) + buf += expectandformatoperand(pa, PCOp_LABELDIFF, 0, -1, buf); + else + buf += expectandformatoperand(pa, PCOp_IMMEDIATE, 0, -1, buf); + break; + + case 'O': + pa--; + arg_index--; + break; + + case 'p': + buf += expectandformatoperand(pa, PCOp_PLACEHOLDEROPERAND, 0, -1, buf); + break; + + default: + CError_FATAL(1465); + } + + while (format[1] && strchr("/<>|*", format[1])) { + switch (*(++format)) { + case '/': + if (format[1] == '2') + format++; + break; + case '<': + case '*': + case '|': + case '>': + if (format[1] == 'p') + format++; + else if (isdigit(format[1])) + format += pcode_const_from_format(format + 1, &thing4); + else + CError_FATAL(1495); + break; + } + } + + if ((c = *(++format)) == ']' || c == ')') { + *(buf++) = c; + format++; + } + + pa++; + arg_index++; + } + + if (buf[-1] == ',') + buf--; + + flagSetT = PCODE_FLAG_SET_T(pcode); + flagSetF = PCODE_FLAG_SET_F(pcode); + if (pcode->flags & fIsConst) + buf += sprintf(buf, "; fIsConst"); + if (pcode->flags & fIsVolatile) + buf += sprintf(buf, "; fIsVolatile"); + if (pcode->flags & fSideEffects) + buf += sprintf(buf, "; fSideEffects"); + if (pcode->flags & fIsCSE) + buf += sprintf(buf, "; fIsCSE"); + if (pcode->flags & fCommutative) + buf += sprintf(buf, "; fCommutative"); + + if (flagSetF & fIsPtrOp) + buf += sprintf(buf, "; fIsPtrOp"); + + if (flagSetT) { + if (flagSetT & fLink) + buf += sprintf(buf, "; fLink"); + if (flagSetT & fAbsolute) + buf += sprintf(buf, "; fAbsolute"); + if (flagSetT & fBranchTaken) + buf += sprintf(buf, "; fBranchTaken"); + if (flagSetT & fBranchNotTaken) + buf += sprintf(buf, "; fBranchNotTaken"); + } else if (flagSetF) { + if (flagSetF & fSetsCarry) + buf += sprintf(buf, "; fSetsCarry"); + if (flagSetF & fOverflow) + buf += sprintf(buf, "; fOverflow"); + } + + *buf = 0; +} + +PCode *makecopyinstruction(PCodeArg *a, PCodeArg *b) { + if (b->kind == PCOp_REGISTER) { + switch (b->arg) { + case RegClass_GPR: + return makepcode(PC_MR, b->data.reg.reg, a->data.reg.reg); + case RegClass_FPR: + return makepcode(PC_FMR, b->data.reg.reg, a->data.reg.reg); + case RegClass_VR: + return makepcode(PC_VMR, b->data.reg.reg, a->data.reg.reg); + case RegClass_CRFIELD: + return makepcode(PC_MCRF, b->data.reg.reg, a->data.reg.reg); + } + } + + CError_FATAL(1622); + return NULL; +} + +int is_location_independent(PCode *pcode) { + switch (pcode->op) { + case PC_LI: + case PC_LIS: + case PC_VSPLTISB: + case PC_VSPLTISH: + case PC_VSPLTISW: + return 1; + case PC_ADDI: + case PC_ADDIS: + case PC_ORI: + case PC_ORIS: + return pcode->args[1].data.reg.reg == _FP_; + case PC_LWZ: + if ( + pcode->args[1].data.reg.reg == 1 && + pcode->args[2].kind == PCOp_IMMEDIATE && + pcode->args[2].data.imm.value == 0 + ) + return 1; + } + + return 0; +} + +int can_reuse_stored_value(PCode *a, PCode *b) { + switch (b->op) { + case PC_LWZ: + case PC_LWZU: + case PC_LWZX: + case PC_LWZUX: + switch (a->op) { + case PC_STW: + case PC_STWU: + case PC_STWX: + case PC_STWUX: + return 1; + } + break; + + case PC_LFD: + case PC_LFDU: + case PC_LFDX: + case PC_LFDUX: + switch (a->op) { + case PC_STFD: + case PC_STFDU: + case PC_STFDX: + case PC_STFDUX: + return 1; + } + break; + + case PC_LVX: + if (a->op == PC_STVX) + return 1; + break; + } + + return 0; +} + +int nbytes_loaded_or_stored_by(PCode *pcode) { + OpcodeInfo *oinfo = opcodeinfo + pcode->op; + if (oinfo->flags & (fIsRead | fIsWrite)) { + switch (pcode->op) { + case PC_LBZ: + case PC_LBZU: + case PC_LBZX: + case PC_LBZUX: + case PC_STB: + case PC_STBU: + case PC_STBX: + case PC_STBUX: + return 1; + case PC_LHZ: + case PC_LHZU: + case PC_LHZX: + case PC_LHZUX: + case PC_LHA: + case PC_LHAU: + case PC_LHAX: + case PC_LHAUX: + case PC_LHBRX: + case PC_STH: + case PC_STHU: + case PC_STHX: + case PC_STHUX: + case PC_STHBRX: + return 2; + case PC_LWZ: + case PC_LWZU: + case PC_LWZX: + case PC_LWZUX: + case PC_LWBRX: + case PC_STW: + case PC_STWU: + case PC_STWX: + case PC_STWUX: + case PC_STWBRX: + case PC_LFS: + case PC_LFSU: + case PC_LFSX: + case PC_LFSUX: + case PC_STFS: + case PC_STFSU: + case PC_STFSX: + case PC_STFSUX: + case PC_LWARX: + case PC_STFIWX: + case PC_STWCX: + case PC_ECIWX: + case PC_ECOWX: + case PC_MFROM: + case PC_LSCBX: + return 4; + case PC_LMW: + case PC_STMW: + if (pcode->args[0].kind == PCOp_REGISTER && pcode->args[0].arg == RegClass_GPR) + return (32 - pcode->args[0].data.reg.reg) * 4; + else + return 128; + case PC_LFD: + case PC_LFDU: + case PC_LFDX: + case PC_LFDUX: + case PC_STFD: + case PC_STFDU: + case PC_STFDX: + case PC_STFDUX: + return 8; + case PC_LSWI: + case PC_STSWI: + return pcode->args[2].data.imm.value; // not sure if imm is the right union type here + case PC_LSWX: + case PC_STSWX: + return 128; + + // there's probably an ifdef here lmao + case PC_LVEBX: + case PC_STVEBX: + return 1; + case PC_LVEHX: + case PC_STVEHX: + return 2; + case PC_LVEWX: + case PC_STVEWX: + return 4; + case PC_LVSL: + case PC_LVSR: + case PC_LVX: + case PC_LVXL: + case PC_STVX: + case PC_STVXL: + return 16; + + default: + CError_FATAL(2011); + } + } + + CError_FATAL(2014); + return 0; +} + +void change_num_operands(PCode *pcode, int newNum) { + int i; + + CError_ASSERT(2026, ((pcode->argCount > 5) ? pcode->argCount : 5) >= newNum); + + for (i = pcode->argCount - 1; i >= newNum; i--) + pcode->args[i].kind = PCOp_PLACEHOLDEROPERAND; + + pcode->argCount = newNum; +} + +void change_opcode(PCode *pcode, short opcode) { + pcode->flags = (pcode->flags & ~(opcodeinfo[pcode->op].flags & ~fIsPtrOp)) | opcodeinfo[opcode].flags; + if ((pcode->flags & fIsMove) && (PCODE_FLAG_SET_F(pcode) & fRecordBit)) + pcode->flags &= ~fIsMove; + pcode->op = opcode; +} diff --git a/compiler_and_linker/BackEnd/PowerPC/PPCError.c b/compiler_and_linker/BackEnd/PowerPC/PPCError.c new file mode 100644 index 0000000..2d4c469 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/PPCError.c @@ -0,0 +1,70 @@ +#include "compiler/PPCError.h" +#include "compiler/CError.h" +#include "compiler/CParser.h" +#include "compiler/InlineAsm.h" +#include "cos.h" + +static void PPCError_GetErrorString(char *str, short code) { + short scode; + + scode = (short) code; + CError_ASSERT(40, scode >= 100 && scode < PPCErrorStrMAX); + + COS_GetString(str, 10001, scode - 99); +} + +static void PPCError_VAErrorMessage(int code, va_list list, Boolean flag1, Boolean flag2) { + char format[256]; + PPCError_GetErrorString(format, code); + CError_ErrorMessageVA(code + 10001, format, list, flag1, flag2); +} + +void PPCError_Error(int code, ...) { + va_list list; + + if (trychain) + longjmp(trychain->jmpbuf, 1); + + va_start(list, code); + PPCError_VAErrorMessage(code, list, 0, 0); + va_end(list); + + if (in_assembler) + AssemblerError(); +} + +void PPCError_Warning(int code, ...) { + va_list list; + + if (!trychain) { + va_start(list, code); + PPCError_VAErrorMessage(code, list, 0, 1); + va_end(list); + } +} + +void PPCError_Message(char *format, ...) { + va_list list; + + if (!trychain) { + va_start(list, format); + CError_ErrorMessageVA(10213, format, list, 0, 1); + va_end(list); + } +} + +void PPCError_ErrorTerm(short code, ...) { + va_list list; + + if (trychain) + longjmp(trychain->jmpbuf, 1); + + va_start(list, code); + PPCError_VAErrorMessage(code, list, 1, 0); + va_end(list); + + if (in_assembler) + AssemblerError(); + + longjmp(errorreturn, 1); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/Coloring.c b/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/Coloring.c new file mode 100644 index 0000000..8036435 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/Coloring.c @@ -0,0 +1,268 @@ +#include "compiler/Coloring.h" +#include "compiler/CFunc.h" +#include "compiler/CompilerTools.h" +#include "compiler/InterferenceGraph.h" +#include "compiler/PCode.h" +#include "compiler/PPCError.h" +#include "compiler/Registers.h" +#include "compiler/RegisterInfo.h" +#include "compiler/SpillCode.h" +#include "compiler/StackFrame.h" +#include "compiler/objects.h" + +RegClass coloring_class; +static short used_regs_before_coloring; + +static void markspecialregisters(RegClass rclass) { + ObjectList *list; + Object *object; + VarInfo *vi; + UInt32 i; + + for (i = 0; i < n_real_registers[rclass]; i++) + interferencegraph[i]->x14 = i; + + for (list = arguments; list; list = list->next) { + object = list->object; + vi = Registers_GetVarInfo(object); + if ((vi->flags & VarInfoFlag2) && vi->rclass == rclass) { + interferencegraph[vi->reg]->spillTemporary = object; + if (vi->flags & VarInfoFlag4) { + interferencegraph[vi->reg]->flags |= fPairLow; + interferencegraph[vi->regHi]->flags |= fPairHigh; + interferencegraph[vi->regHi]->spillTemporary = object; + } + } + } + + for (list = locals; list; list = list->next) { + object = list->object; + vi = Registers_GetVarInfo(object); + if ((vi->flags & VarInfoFlag2) && vi->rclass == rclass) { + interferencegraph[vi->reg]->spillTemporary = object; + if (vi->flags & VarInfoFlag4) { + interferencegraph[vi->reg]->flags |= fPairLow; + interferencegraph[vi->regHi]->flags |= fPairHigh; + interferencegraph[vi->regHi]->spillTemporary = object; + } + } + } +} + +static IGNode *simplifygraph(void) { + int availableRegs; + IGNode *spilledNodes; + IGNode *pushedNodes; + IGNode *best; + IGNode *node; + UInt32 i; + UInt32 j; + int flag; + float bestScore; + float score; + + availableRegs = available_registers(coloring_class); + pushedNodes = NULL; + + do { + spilledNodes = NULL; + flag = 0; + for (i = n_real_registers[coloring_class]; i < used_virtual_registers[coloring_class]; i++) { + node = interferencegraph[i]; + if (!(node->flags & (fPushed | fCoalesced))) { + if (node->x12 < availableRegs) { + for (j = 0; j < node->arraySize; j++) + interferencegraph[node->array[j]]->x12--; + node->flags |= fPushed; + node->next = pushedNodes; + pushedNodes = node; + flag = 1; + } else { + node->next = spilledNodes; + spilledNodes = node; + } + } + } + } while (flag); + + if (spilledNodes) + estimatespillcosts(); + + while (spilledNodes) { + best = spilledNodes; + bestScore = (spilledNodes->x10 >= used_regs_before_coloring) ? FLT_MAX : ((float) spilledNodes->spillCost / (float) spilledNodes->x12); + + for (node = spilledNodes->next; node; node = node->next) { + score = (node->x10 >= used_regs_before_coloring) ? FLT_MAX : ((float) node->spillCost / (float) node->x12); + if (score < bestScore) { + best = node; + bestScore = score; + } + } + + for (i = 0; i < best->arraySize; i++) + interferencegraph[best->array[i]]->x12--; + + best->flags |= fPushed; + best->next = pushedNodes; + pushedNodes = best; + + do { + spilledNodes = NULL; + flag = 0; + for (i = n_real_registers[coloring_class]; i < used_virtual_registers[coloring_class]; i++) { + node = interferencegraph[i]; + if (!(node->flags & (fPushed | fCoalesced))) { + if (node->x12 < availableRegs) { + for (j = 0; j < node->arraySize; j++) + interferencegraph[node->array[j]]->x12--; + node->flags |= fPushed; + node->next = pushedNodes; + pushedNodes = node; + flag = 1; + } else { + node->next = spilledNodes; + spilledNodes = node; + } + } + } + } while (flag); + } + + return pushedNodes; +} + +static int colorgraph(IGNode *node) { + UInt32 volatileRegs; + int result; + IGNode *otherNode; + int reg; + UInt32 workingMask; + int i; + short *array; + + result = 1; + + reset_nonvolatile_registers(coloring_class); + volatileRegs = volatile_registers(coloring_class); + + while (node) { + workingMask = volatileRegs; + for (array = node->array, i = 0; i < node->arraySize; i++) { + otherNode = interferencegraph[*(array++)]; + reg = otherNode->x14; + if (reg != -1 && reg < n_real_registers[coloring_class]) + workingMask &= ~(1 << reg); + } + + if (workingMask) { + for (i = 0; i < n_real_registers[coloring_class]; i++) { + if (workingMask & (1 << i)) { + node->x14 = i; + break; + } + } + } else { + reg = obtain_nonvolatile_register(coloring_class); + if (reg != -1) { + volatileRegs |= 1 << (node->x14 = reg); + } else { + node->flags |= fSpilled; + result = 0; + } + } + + node = node->next; + } + + return result; +} + +static void rewritepcode(void) { + PCodeBlock *block; + PCode *instr; + PCodeArg *op; + UInt32 i; + IGNode *node; + int reg; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + op = instr->args; + i = instr->argCount; + while (i--) { + if (PC_OP_IS_ANY_REGISTER(op, coloring_class)) + op->data.reg.reg = interferencegraph[op->data.reg.reg]->x14; + op++; + } + + if ( + (instr->flags & fIsMove) && + (instr->args[1].arg == coloring_class) && + instr->args[1].data.reg.reg == instr->args[0].data.reg.reg + ) + deletepcode(instr); + } + } + + for (i = n_real_registers[coloring_class]; i < used_virtual_registers[coloring_class]; i++) { + node = interferencegraph[i]; + if (node->spillTemporary && !(node->flags & fSpilled)) { + if (node->flags & fCoalesced) { + reg = node->x14; + while (reg >= n_real_registers[coloring_class]) { + reg = interferencegraph[reg]->x14; + if (reg < 0) + break; + } + node->x14 = reg; + } + + if (node->flags & fPairHigh) { + reg = node->x14; + Registers_GetVarInfo(node->spillTemporary)->regHi = reg; + } else { + reg = node->x14; + Registers_GetVarInfo(node->spillTemporary)->reg = reg; + } + } + } +} + +void colorinstructions(Object *proc) { + RegClass rclass; + int flag; + + for (rclass = 0; rclass < RegClassMax; rclass++) { + coloring_class = rclass; + + if (rclass == RegClass_GPR) + check_dynamic_aligned_frame(); + + if (used_virtual_registers[rclass] > n_real_registers[rclass]) { + save_before_coloring_nonvolatile_registers(rclass); + used_regs_before_coloring = used_virtual_registers[rclass]; + if (!available_registers(rclass)) { + PPCError_Error(PPCErrorStr102, register_class_name[rclass]); + return; + } + + flag = 1; + while (flag && used_virtual_registers[rclass] > n_real_registers[rclass]) { + buildinterferencegraph(proc); + markspecialregisters(rclass); + flag = colorgraph(simplifygraph()) ? 0 : 1; + + if (flag) + insertspillcode(); + else + rewritepcode(); + freeoheap(); + } + } + + used_virtual_registers[rclass] = n_real_registers[rclass]; + } + + coloring = 0; +} diff --git a/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/InterferenceGraph.c b/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/InterferenceGraph.c new file mode 100644 index 0000000..d589502 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/InterferenceGraph.c @@ -0,0 +1,364 @@ +#include "compiler/InterferenceGraph.h" +#include "compiler/CError.h" +#include "compiler/CParser.h" +#include "compiler/BitVectors.h" +#include "compiler/Coloring.h" +#include "compiler/LiveInfo.h" +#include "compiler/PCode.h" +#include "compiler/PCodeListing.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/Registers.h" +#include "compiler/RegisterInfo.h" +#include "compiler/CompilerTools.h" + +IGNode **interferencegraph; +static UInt32 *interferencematrix; +Boolean coalesced_nregisters; +static SInt16 *coalesced; + +static void makeinterfere(UInt32 a, UInt32 b) { + if (a < b) + bitvectorsetbit(((b * b) / 2) + a, interferencematrix); + else if (a > b) + bitvectorsetbit(((a * a) / 2) + b, interferencematrix); +} + +int interferes(UInt32 a, UInt32 b) { + if (a < b) + return bitvectorgetbit(((b * b) / 2) + a, interferencematrix) > 0; + else if (a > b) + return bitvectorgetbit(((a * a) / 2) + b, interferencematrix) > 0; + else + return 0; +} + +static void buildinterferencematrix(void) { + PCodeBlock *block; // r30 + PCode *instr; // r29 + PCodeArg *op; + UInt32 *vec; // r28 + UInt32 i; + UInt32 j; + + UInt32 regs = used_virtual_registers[coloring_class]; + interferencematrix = oalloc(4 * ((((regs * regs) / 2) + 31) >> 5)); + bitvectorinitialize(interferencematrix, (regs * regs) / 2, 0); + + for (i = 0; i < 32; i++) + for (j = 0; j < 32; j++) + if (i != j) + makeinterfere(i, j); + + vec = oalloc(4 * ((regs + 31) >> 5)); + for (block = pcbasicblocks; block; block = block->nextBlock) { + bitvectorcopy(vec, liveinfo[block->blockIndex].out, regs); + for (instr = block->lastPCode; instr; instr = instr->prevPCode) { + for (op = instr->args, i = instr->argCount; i--; op++) { + if (PC_OP_IS_WRITE_ANY_REGISTER(op, coloring_class)) { + int reg = op->data.reg.reg; + bitvectorclearbit(reg, vec); + for (j = 0; j < regs; j++) { + if (bitvectorgetbit(j, vec)) { + if ( + (instr->flags & fIsMove) && + PC_OP_IS_ANY_REGISTER(&instr->args[0], coloring_class) && + instr->args[1].data.reg.reg == j + ) + continue; + makeinterfere(reg, j); + } + } + } + } + + for (op = instr->args, i = instr->argCount; i--; op++) { + if (PC_OP_IS_READ_ANY_REGISTER(op, coloring_class)) { + int reg = op->data.reg.reg; + if (bitvectorgetbit(op->data.reg.reg, vec) == 0) + op->data.reg.effect |= Effect4; + bitvectorsetbit(reg, vec); + } + } + + if (coloring_class == RegClass_GPR) { + if (PCODE_FLAG_SET_F(instr) & (fIsRead | fIsWrite | fPCodeFlag400000)) { + if (instr->args[1].data.reg.reg >= n_real_registers[coloring_class]) + makeinterfere(0, instr->args[1].data.reg.reg); + if (PCODE_FLAG_SET_F(instr) & fUpdatesPtr) + makeinterfere(instr->args[0].data.reg.reg, instr->args[1].data.reg.reg); + } else { + switch (instr->op) { + case PC_DCBF: + case PC_DCBST: + case PC_DCBT: + case PC_DCBTST: + case PC_DCBZ: + case PC_DCBI: + case PC_ICBI: + case PC_DCCCI: + case PC_ICBT: + case PC_ICCCI: + case PC_ICREAD: + case PC_DCBA: + case PC_DST: + case PC_DSTT: + case PC_DSTST: + case PC_DSTSTT: + if (instr->args[0].data.reg.reg >= n_real_registers[coloring_class]) + makeinterfere(0, instr->args[0].data.reg.reg); + break; + } + } + } + + if (coloring_class == RegClass_GPR && (instr->flags & fIsCall)) { + i = branch_count_volatiles(); + op = instr->args; + CError_ASSERT(219, instr->argCount != 0); + + while (op->kind != PCOp_REGISTER || !(op->data.reg.effect & EffectWrite)) { + i++; + op++; + CError_ASSERT(226, i <= instr->argCount); + } + + for (op = instr->args + i; i < instr->argCount; i++, op++) { + if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) { + for (j = 0; j < n_scratch_registers[coloring_class]; j++) + makeinterfere(op->data.reg.reg, scratch_registers[coloring_class][j]); + } + } + } + } + } +} + +static short coalesced_path(short id) { + while (id != coalesced[id]) + id = coalesced[id]; + return id; +} + +static void coalescenodes(void) { + PCodeArg *op; + UInt32 regs; + PCodeBlock *block; + PCode *instr; + UInt32 i; + short path1; + short path2; + short node1; + short node2; + + regs = used_virtual_registers[coloring_class]; + coalesced = oalloc(sizeof(SInt16) * regs); + + for (i = 0; i < regs; i++) + coalesced[i] = i; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if ((instr->flags & fIsMove) && !(instr->flags & fSideEffects)) { + if (PCODE_FLAG_SET_F(instr) & fRecordBit) { + CError_FATAL(309); + continue; + } + + if (instr->argCount > 2) { + if (instr->argCount != 3 || instr->args[2].kind != PCOp_PLACEHOLDEROPERAND) { + CError_FATAL(316); + continue; + } + } + + if (PC_OP_IS_ANY_REGISTER(&instr->args[0], coloring_class)) { + path1 = coalesced_path(instr->args[0].data.reg.reg); + path2 = coalesced_path(instr->args[1].data.reg.reg); + if (path1 == path2) { + deletepcode(instr); + continue; + } + + if (!interferes(path1, path2)) { + if (path1 >= n_real_registers[coloring_class] && path2 >= n_real_registers[coloring_class]) { + if (path1 < first_fe_temporary_register[coloring_class]) + continue; + if (path1 > last_temporary_register[coloring_class]) + continue; + if (path2 < first_fe_temporary_register[coloring_class]) + continue; + if (path2 > last_temporary_register[coloring_class]) + continue; + } + + node1 = (path2 < path1) ? path2 : path1; + node2 = (path2 > path1) ? path2 : path1; + + if (coloring_class == RegClass_GPR && node2 == _CALLER_SP_) + continue; + + coalesced[node2] = node1; + for (i = 0; i < regs; i++) { + if (interferes(node2, i)) + makeinterfere(node1, i); + } + + deletepcode(instr); + } + } + } + } + } + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + op = instr->args; + i = instr->argCount; + while (i--) { + if (PC_OP_IS_ANY_REGISTER(op, coloring_class) && op->data.reg.reg != coalesced[op->data.reg.reg]) + op->data.reg.reg = coalesced_path(op->data.reg.reg); + op++; + } + } + } +} + +static void buildadjacencyvectors(void) { + IGNode *node; + UInt32 regs; + UInt32 i; + UInt32 counter; + short *array; + short *dest; + short *src; + UInt32 j; + + regs = used_virtual_registers[coloring_class]; + interferencegraph = oalloc(sizeof(IGNode *) * regs); + array = oalloc(sizeof(short) * regs); + + for (i = 0; i < regs; i++) { + counter = 0; + for (j = 0; j < regs; j++) { + if (interferes(i, j)) + array[counter++] = j; + } + + node = interferencegraph[i] = oalloc(sizeof(IGNode) + sizeof(short) * (counter - 1)); + memclrw(node, sizeof(IGNode) + sizeof(short) * (counter - 1)); + + node->x10 = i; + node->x14 = -1; + node->arraySize = counter; + node->x12 = counter; + + dest = node->array; + src = array; + for (j = 0; j < counter; j++) + *(dest++) = *(src++); + + if (i != coalesced[i]) { + node->flags |= fCoalesced; + j = coalesced_path(i); + interferencegraph[j]->flags |= fCoalescedInto; + node->x14 = j; + } + } +} + +static void eliminatedeadcode(void) { + UInt32 regs; + PCodeBlock *block; + PCode *instr; + UInt32 *vec; + UInt32 i; + PCodeArg *op; + + regs = used_virtual_registers[coloring_class]; + vec = oalloc(4 * ((regs + 31) >> 5)); + + for (block = pcbasicblocks; block; block = block->nextBlock) { + bitvectorcopy(vec, liveinfo[block->blockIndex].out, regs); + for (instr = block->lastPCode; instr; instr = instr->prevPCode) { + if (dead(instr, coloring_class, vec)) { + deletepcode(instr); + continue; + } + + op = instr->args; + i = instr->argCount; + while (i--) { + if (PC_OP_IS_WRITE_ANY_REGISTER(op, coloring_class)) + bitvectorclearbit(op->data.reg.reg, vec); + op++; + } + + op = instr->args; + i = instr->argCount; + while (i--) { + if (PC_OP_IS_READ_ANY_REGISTER(op, coloring_class)) { + int reg = op->data.reg.reg; + if (!bitvectorgetbit(reg, vec)) + op->data.reg.effect |= Effect4; + bitvectorsetbit(reg, vec); + } + op++; + } + } + } +} + +static void findrematerializations(void) { + UInt32 regs; + UInt32 i; + PCodeBlock *block; + PCode *instr; + PCodeArg *op; + IGNode *node; + + regs = used_virtual_registers[coloring_class]; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->lastPCode; instr; instr = instr->prevPCode) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + PC_OP_IS_WRITE_ANY_REGISTER(op, coloring_class) && + op->data.reg.reg >= n_real_registers[coloring_class] && + !(interferencegraph[op->data.reg.reg]->flags & (fPairLow | fPairHigh)) && + !(interferencegraph[op->data.reg.reg]->flags & fIGNode40) + ) + { + node = interferencegraph[op->data.reg.reg]; + if (!node->instr8) { + node->instr8 = instr; + } else { + node->instr8 = NULL; + node->flags |= fIGNode40; + } + } + op++; + } + } + } + + for (i = 0; i < regs; i++) { + node = interferencegraph[i]; + if (node->instr8 && !is_location_independent(node->instr8)) + node->instr8 = NULL; + } +} + +void buildinterferencegraph(Object *proc) { + int regs = used_virtual_registers[coloring_class]; + + computelivevariables(proc); + eliminatedeadcode(); + buildinterferencematrix(); + if (copts.debuglisting) + pclistinterferences(register_class_format[coloring_class], regs); + coalescenodes(); + buildadjacencyvectors(); + findrematerializations(); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/RegisterInfo.c b/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/RegisterInfo.c new file mode 100644 index 0000000..177c2e0 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/RegisterInfo.c @@ -0,0 +1,381 @@ +#include "compiler/RegisterInfo.h" +#include "compiler/CodeGen.h" +#include "compiler/CError.h" +#include "compiler/CParser.h" +#include "compiler/PCode.h" +#include "compiler/CompilerTools.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +short last_exception_register[RegClassMax]; +short first_fe_temporary_register[RegClassMax]; +short last_argument_register[RegClassMax]; +short _FP_; +short _CALLER_SP_; +char *special_register_names[RegClassMax][RegisterMax]; +static short used_regs_before_coloring; +static UInt8 save_state[RegisterMax]; + +short spr_to_sysreg[4] = {1, 8, 9, 0x100}; + +void asm_used_register(RegClass rclass, short reg) { + int i; + + if ((reg < n_real_registers[rclass]) && (reg_state[rclass][reg] == RegState0)) { + if (reg == nonvolatile_registers[rclass][used_nonvolatile_registers[rclass]]) { + if (assignable_registers[rclass] > 0) + assignable_registers[rclass]--; + reg_state[rclass][reg] = RegState1; + used_nonvolatile_registers[rclass]++; + } else { + for (i = used_nonvolatile_registers[rclass]; i < n_nonvolatile_registers[rclass]; i++) { + if (reg == nonvolatile_registers[rclass][i]) { + reg_state[rclass][reg] = RegState1; + if (assignable_registers[rclass] > 0) + assignable_registers[rclass]--; + } + } + } + } +} + +void retain_register(Object *obj, RegClass rclass, short reg) { + VarInfo *vi; + + CError_ASSERT(95, (short) reg < RegisterMax); + + if (reg_state[rclass][reg] == RegState0) { + assignable_registers[rclass]--; + reg_state[rclass][reg] = RegState1; + if (reg == nonvolatile_registers[rclass][used_nonvolatile_registers[rclass]]) + used_nonvolatile_registers[rclass]++; + } + + if (obj) { + vi = Registers_GetVarInfo(obj); + vi->rclass = rclass; + vi->flags |= VarInfoFlag2; + vi->reg = reg; + } +} + +void retain_GPR_pair(Object *obj, short reg, short regHi) { + VarInfo *vi; + + retain_register(NULL, RegClass_GPR, reg); + retain_register(NULL, RegClass_GPR, regHi); + + if (obj) { + vi = Registers_GetVarInfo(obj); + vi->rclass = RegClass_GPR; + vi->flags |= VarInfoFlag2 | VarInfoFlag4; + vi->reg = reg; + vi->regHi = regHi; + } +} + +int is_register_object(Object *obj) { + return obj->sclass == TK_REGISTER; +} + +int GetABIFirstNonVolatile(RegClass rclass) { + switch (rclass) { + case RegClass_SPR: return 3; + case RegClass_CRFIELD: return 2; + case RegClass_VR: return 20; + case RegClass_GPR: return 13; + case RegClass_FPR: return 14; + default: return -1; + } +} + +char GetRegisterClassName(RegClass rclass) { + switch (rclass) { + case RegClass_VR: return 'v'; + case RegClass_GPR: return 'r'; + case RegClass_FPR: return 'f'; + default: + CError_FATAL(242); + return '?'; + } +} + +static int first_nonvolatile_reg(RegClass rclass) { + return GetABIFirstNonVolatile(rclass); +} + +void setup_diagnostic_reg_strings(void) { + register_class_name[RegClass_SPR] = "SPR"; + register_class_format[RegClass_SPR] = "spr%" PRId32; + register_class_name[RegClass_CRFIELD] = "CRFIELD"; + register_class_format[RegClass_CRFIELD] = "cr%" PRId32; + register_class_name[RegClass_VR] = "VR"; + register_class_format[RegClass_VR] = "vr%" PRId32; + register_class_name[RegClass_FPR] = "FPR"; + register_class_format[RegClass_FPR] = "f%" PRId32; + register_class_name[RegClass_GPR] = "GPR"; + register_class_format[RegClass_GPR] = "r%" PRId32; +} + +void init_target_registers(void) { + int reg; + int end; + RegClass rclass; + + static int last_nonvolatile_reg[] = {3, 5, 31, 31, 31}; + static int nonvol_reserve[] = {0, 0, 0, 4, 3}; + + for (rclass = 0; rclass < RegClassMax; rclass++) { + for (reg = 0; reg < RegisterMax; reg++) + special_register_names[rclass][reg] = NULL; + } + + special_register_names[RegClass_SPR][0] = "XER"; + special_register_names[RegClass_SPR][1] = "LR"; + special_register_names[RegClass_SPR][2] = "CTR"; + special_register_names[RegClass_SPR][3] = "VRSAVE"; + special_register_names[RegClass_GPR][1] = "SP"; + + setup_diagnostic_reg_strings(); + n_real_registers[RegClass_SPR] = 4; + n_real_registers[RegClass_CRFIELD] = 8; + n_real_registers[RegClass_VR] = 32; + n_real_registers[RegClass_FPR] = 32; + n_real_registers[RegClass_GPR] = 32; + reg_state[RegClass_GPR][1] = RegState2; + reg_state[RegClass_GPR][2] = RegState2; + reg_state[RegClass_CRFIELD][5] = RegState2; + + for (rclass = 0; rclass < RegClassMax; rclass++) { + n_nonvolatile_registers[rclass] = 0; + if (last_nonvolatile_reg[rclass] >= 0) { + end = first_nonvolatile_reg(rclass); + for (reg = last_nonvolatile_reg[rclass]; reg >= end; reg--) { + if (reg_state[rclass][reg] == RegState0) { + nonvolatile_registers[rclass][n_nonvolatile_registers[rclass]++] = reg; + } + } + } + + assignable_registers[rclass] = n_nonvolatile_registers[rclass] - nonvol_reserve[rclass]; + if (assignable_registers[rclass] < 0) + assignable_registers[rclass] = 0; + + n_scratch_registers[rclass] = 0; + for (reg = 0; reg < n_real_registers[rclass]; reg++) { + if (reg < GetABIFirstNonVolatile(rclass) || reg > last_nonvolatile_reg[rclass]) { + if (reg_state[rclass][reg] == RegState0) { + scratch_registers[rclass][n_scratch_registers[rclass]++] = reg; + } + } + } + } + + _FP_ = -1; + _CALLER_SP_ = -1; + optimizing = (copts.optimizationlevel > 0) && !disable_optimizer; +} + +void assign_register_by_type(Object *obj) { + VarInfo *vi; + Type *ty; + Boolean flag; + + ty = obj->type; + vi = Registers_GetVarInfo(obj); + flag = 0; + vi->rclass = RegClassMax; + vi->reg = 0; + vi->regHi = 0; + + if ((ty->type == TYPEINT) || (ty->type == TYPEENUM) || ((ty->type == TYPEPOINTER || ty->type == TYPEARRAY) && (ty->type != TYPEARRAY)) || ((ty->type == TYPEMEMBERPOINTER) && (ty->size == 4U))) { + if (((ty->type == TYPEINT) || (ty->type == TYPEENUM)) && (ty->size == 8)) + flag = 1; + vi->rclass = RegClass_GPR; + } else if (ty->type == TYPEFLOAT) { + vi->rclass = RegClass_FPR; + } else if ((ty->type == TYPESTRUCT) && (TYPE_STRUCT(ty)->stype >= STRUCT_VECTOR_UCHAR) && (TYPE_STRUCT(ty)->stype <= STRUCT_VECTOR_PIXEL)) { + vi->rclass = RegClass_VR; + } else { + return; + } + + if (vi->rclass < RegClassMax) { + if (flag) { + CError_ASSERT(520, vi->rclass == RegClass_GPR); + if (assignable_registers[vi->rclass] > 1) + assign_GPR_pair(obj); + } else { + if (assignable_registers[vi->rclass] > 0) + assign_register_to_variable(obj, vi->rclass); + } + } +} + +void assign_GPR_pair(Object *obj) { + VarInfo *vi; + short reg; + short regHi; + + vi = Registers_GetVarInfo(obj); + if (optimizing) { + reg = used_virtual_registers[RegClass_GPR]++; + regHi = used_virtual_registers[RegClass_GPR]++; + } else { + CError_ASSERT(554, assignable_registers[RegClass_GPR] >= 2); + reg = obtain_nonvolatile_register(RegClass_GPR); + regHi = obtain_nonvolatile_register(RegClass_GPR); + retain_GPR_pair(obj, reg, regHi); + } + + vi->rclass = RegClass_GPR; + if (reg > 0 && regHi > 0) { + vi->flags |= VarInfoFlag2 | VarInfoFlag4; + vi->reg = reg; + vi->regHi = regHi; + } else { + CError_FATAL(567); + } +} + +void open_fe_temp_registers(void) { + int r; + + r = used_virtual_registers[RegClass_GPR]; + first_fe_temporary_register[RegClass_GPR] = last_temporary_register[RegClass_GPR] = r; + r = used_virtual_registers[RegClass_FPR]; + first_fe_temporary_register[RegClass_FPR] = last_temporary_register[RegClass_FPR] = r; + r = used_virtual_registers[RegClass_VR]; + first_fe_temporary_register[RegClass_VR] = last_temporary_register[RegClass_VR] = r; +} + +void set_last_exception_registers(void) { + last_exception_register[RegClass_GPR] = used_virtual_registers[RegClass_GPR] - 1; + last_exception_register[RegClass_FPR] = used_virtual_registers[RegClass_FPR] - 1; + last_exception_register[RegClass_VR] = used_virtual_registers[RegClass_VR] - 1; +} + +static VarInfo *Registers_GetNewVarInfo(void) { + VarInfo *vi = galloc(sizeof(VarInfo)); + memclrw(vi, sizeof(VarInfo)); + return vi; +} + +VarInfo *Registers_GetVarInfo(Object *obj) { + switch (obj->datatype) { + case DDATA: + if (!obj->u.data.info) + obj->u.data.info = Registers_GetNewVarInfo(); + return obj->u.data.info; + case DNONLAZYPTR: + if (!obj->u.toc.info) { + CError_FATAL(639); + obj->u.toc.info = CodeGen_GetNewVarInfo(); + } + return obj->u.toc.info; + case DLOCAL: + if (!obj->u.var.info) + CError_FATAL(647); + return obj->u.var.info; + case DABSOLUTE: + // not sure if this is the right union + if (!obj->u.data.info) + obj->u.data.info = Registers_GetNewVarInfo(); + return obj->u.data.info; + default: + CError_FATAL(660); + return NULL; + } +} + +int used_vrstate_VRs(void) { + int count = 0; + int i; + for (i = 0; i < RegisterMax; i++) { + if (reg_state[RegClass_VR][i]) + count++; + } + return count; +} + +UInt32 colored_vrs_as_vrsave(PCodeBlock *block) { + PCode *pc; + UInt32 mask; + int i; + + mask = 0; + if (copts.altivec_vrsave == 2) + return 0xFFFFFFFF; + if (copts.altivec_vrsave == 0) + return 0; + + while (block) { + for (pc = block->firstPCode; pc; pc = pc->nextPCode) { + if (pc->flags & fOpTypeVR) { + for (i = 0; i < pc->argCount; i++) { + if (pc->args[i].kind == PCOp_REGISTER && pc->args[i].arg == RegClass_VR) + mask |= 1 << (31 - pc->args[i].data.reg.reg); + } + } + } + block = block->nextBlock; + } + + return mask; +} + +void save_before_coloring_nonvolatile_registers(RegClass rclass) { + used_regs_before_coloring = used_nonvolatile_registers[rclass]; + memcpy(save_state, reg_state[rclass], sizeof(save_state)); +} + +void reset_nonvolatile_registers(RegClass rclass) { + used_nonvolatile_registers[rclass] = used_regs_before_coloring; + memcpy(reg_state[rclass], save_state, sizeof(save_state)); +} + +int is_nonvolatile_register(RegClass rclass, int reg) { + int i; + + for (i = 0; i < n_nonvolatile_registers[rclass]; i++) { + if (reg == nonvolatile_registers[rclass][i]) + return 1; + } + + return 0; +} + +void init_endian(void) { + if (copts.littleendian) { + high_offset = 4; + low_offset = 0; + high_reg = 4; + low_reg = 3; + high_reg2 = 6; + low_reg2 = 5; + } else { + high_offset = 0; + low_offset = 4; + high_reg = 3; + low_reg = 4; + high_reg2 = 5; + low_reg2 = 6; + } +} + +void update_asm_nonvolatile_registers(void) { + RegClass rclass; + int i; + int reg; + + for (rclass = 0; rclass < RegClassMax; rclass++) { + reg = n_nonvolatile_registers[rclass]; + for (i = n_nonvolatile_registers[rclass] - 1; i >= 0; i--) { + if (reg_state[rclass][nonvolatile_registers[rclass][i]] == RegState1) + break; + reg--; + } + if (reg > used_nonvolatile_registers[rclass]) + used_nonvolatile_registers[rclass] = reg; + } +} diff --git a/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/SpillCode.c b/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/SpillCode.c new file mode 100644 index 0000000..69e7e43 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/RegisterAllocator/SpillCode.c @@ -0,0 +1,452 @@ +#include "compiler/SpillCode.h" +#include "compiler/CError.h" +#include "compiler/CMachine.h" +#include "compiler/CParser.h" +#include "compiler/CodeGen.h" +#include "compiler/CompilerTools.h" +#include "compiler/Coloring.h" +#include "compiler/InterferenceGraph.h" +#include "compiler/Operands.h" +#include "compiler/PCode.h" +#include "compiler/PCodeUtilities.h" +#include "compiler/Registers.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StackFrame.h" +#include "compiler/objects.h" + +static int last_unused_vreg_before_spilling; +static short rTEMP_for_VR_spill; + +void estimatespillcosts(void) { + PCodeBlock *block; + PCode *instr; + IGNode *node; + PCodeArg *op; + int i; + int weight; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (copts.optimizesize) + weight = 1; + else + weight = block->loopWeight; + + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + op = instr->args; + i = instr->argCount; + while (i--) { + if (PC_OP_IS_READ_ANY_REGISTER(op, coloring_class)) { + node = interferencegraph[op->data.reg.reg]; + if (node->instr8 || copts.optimizesize) + node->spillCost += weight; + else + node->spillCost += weight * 2; + } + op++; + } + + op = instr->args; + i = instr->argCount; + while (i--) { + if (PC_OP_IS_WRITE_ANY_REGISTER(op, coloring_class)) { + node = interferencegraph[op->data.reg.reg]; + if (node->instr8 || (instr->flags & fIsArgInit)) + node->spillCost -= weight; + else + node->spillCost += weight; + } + op++; + } + } + } +} + +static Object *makespilltemporary(Type *type) { + Object *obj = lalloc(sizeof(Object)); + memclrw(obj, sizeof(Object)); + + obj->otype = OT_OBJECT; + obj->access = ACCESSPUBLIC; + obj->datatype = DLOCAL; + obj->type = type; + obj->name = CParser_GetUniqueName(); + obj->u.var.info = CodeGen_GetNewVarInfo(); + obj->u.var.uid = 0; + + return obj; +} + +static PCode *rematerialize_spilled_register(short reg, IGNode *node) { + PCode *instr = copypcode(node->instr8); + + CError_ASSERT(128, instr->args[0].kind == PCOp_REGISTER); + + instr->args[0].data.reg.reg = reg; + return instr; +} + +static void insert_load_spilled_register(PCode *instr, short reg, IGNode *node) { + Type *type; + Opcode opcode; + Object *object; + PCode *newInstr; + PCode *newInstr2; + SInt32 offset; + Operand operand; + + type = node->spillTemporary->type; + switch (coloring_class) { + case RegClass_CRFIELD: + case RegClass_GPR: + switch (type->size) { + case 1: + opcode = PC_LBZ; + break; + case 2: + opcode = is_unsigned(type) ? PC_LHZ : PC_LHA; + break; + case 4: + opcode = PC_LWZ; + break; + case 8: + opcode = PC_LWZ; + break; + default: + CError_FATAL(187); + } + + memclrw(&operand, sizeof(Operand)); + operand.optype = OpndType_Symbol; + operand.object = node->spillTemporary; + CError_ASSERT(222, node->spillTemporary->datatype == DLOCAL); + + coerce_to_addressable(&operand); + + CError_ASSERT(233, operand.optype == OpndType_GPR_ImmOffset); + + CError_ASSERT(237, node->spillTemporary->datatype == DLOCAL); + + if (node->flags & fPairLow) + offset = low_offset; + else if (node->flags & fPairHigh) + offset = high_offset; + else + offset = 0; + insertpcodebefore(instr, makepcode(opcode, reg, operand.reg, operand.object, operand.immOffset + offset)); + break; + + case RegClass_FPR: + CError_ASSERT(253, node->spillTemporary->datatype == DLOCAL); + + if (node->flags & fPairLow) + offset = low_offset; + else if (node->flags & fPairHigh) + offset = high_offset; + else + offset = 0; + + object = node->spillTemporary; + insertpcodebefore( + instr, + makepcode( + (type->size == 8) ? PC_LFD : PC_LFS, + reg, + local_base_register(object), + object, + offset + ) + ); + break; + + case RegClass_VR: + CError_ASSERT(320, node->spillTemporary->datatype == DLOCAL); + + object = node->spillTemporary; + newInstr = makepcode(PC_ADDI, rTEMP_for_VR_spill, local_base_register(object), object, 0); + newInstr2 = makepcode(PC_LVX, reg, 0, rTEMP_for_VR_spill); + insertpcodebefore(instr, newInstr); + insertpcodeafter(newInstr, newInstr2); + break; + + default: + CError_FATAL(333); + } +} + +static void insert_store_spilled_register(PCode *instr, Boolean flag, short reg, IGNode *node) { + Object *object; // r31 + Opcode opcode; // r30 + SInt32 offset; // r26 + PCode *newInstr2; // r26 + PCode *newInstr; // r25 + Type *type; // r25 + + object = node->spillTemporary; + type = object->type; + + switch (coloring_class) { + case RegClass_CRFIELD: + case RegClass_GPR: + switch (type->size) { + case 1: + opcode = PC_STB; + break; + case 2: + opcode = PC_STH; + break; + case 4: + opcode = PC_STW; + break; + case 8: + opcode = PC_STW; + break; + default: + CError_FATAL(391); + } + + if (node->flags & fPairLow) + offset = low_offset; + else if (node->flags & fPairHigh) + offset = high_offset; + else + offset = 0; + + newInstr = makepcode(opcode, reg, local_base_register(object), object, offset); + if (flag) + insertpcodebefore(instr, newInstr); + else + insertpcodeafter(instr, newInstr); + + break; + + case RegClass_FPR: + newInstr = makepcode((type->size == 8) ? PC_STFD : PC_STFS, reg, local_base_register(object), object, 0); + if (flag) + insertpcodebefore(instr, newInstr); + else + insertpcodeafter(instr, newInstr); + + break; + + case RegClass_VR: + newInstr = makepcode(PC_ADDI, rTEMP_for_VR_spill, local_base_register(object), object, 0); + newInstr2 = makepcode(PC_STVX, reg, 0, rTEMP_for_VR_spill); + if (flag) + insertpcodebefore(instr, newInstr); + else + insertpcodeafter(instr, newInstr); + insertpcodeafter(newInstr, newInstr2); + + break; + + default: + CError_FATAL(527); + } +} + +static void spillinstruction(PCodeBlock *block, PCode *instr) { + int reg; + int reg2; + int regs; + IGNode *node; + PCodeArg *op; + int i; + PCodeArg *op2; + int j; + int readCounter; + int writeCounter; + Boolean flag; + + regs = used_virtual_registers[coloring_class]; + flag = 0; + for (i = 0, op = instr->args; i < instr->argCount; i++, op++) { + CError_ASSERT(563, instr->block != NULL); + + if ( + PC_OP_IS_ANY_REGISTER(op, coloring_class) && + (reg = op->data.reg.reg) < regs && + ((node = interferencegraph[op->data.reg.reg])->flags & fSpilled) + ) + { + reg2 = used_virtual_registers[coloring_class]++; + readCounter = 0; + writeCounter = 0; + + for (j = i, op2 = op; j < instr->argCount; j++, op2++) { + if (PC_OP_IS_REGISTER(op2, coloring_class, reg)) { + if (op2->data.reg.effect & EffectRead) + readCounter++; + if (op2->data.reg.effect & EffectWrite) + writeCounter++; + op2->data.reg.reg = reg2; + op2->data.reg.effect |= Effect40; + } + } + + if (readCounter) { + if (node->instr8) + insertpcodebefore(instr, rematerialize_spilled_register(reg2, node)); + else + insert_load_spilled_register(instr, reg2, node); + } + + if (writeCounter) { + if (node->instr8 || (instr->flags & fIsArgInit)) + flag = 1; + else + insert_store_spilled_register(instr, 0, reg2, node); + } + } + } + + if (flag) + deletepcode(instr); +} + +static void spillcopy(PCodeBlock *block, PCode *instr) { + IGNode *node1; + IGNode *node2; + int reg; + + node1 = interferencegraph[instr->args[1].data.reg.reg]; + node2 = interferencegraph[instr->args[0].data.reg.reg]; + + if (node1->flags & fSpilled) { + if (node2->flags & fSpilled) { + reg = used_virtual_registers[coloring_class]++; + if (node1->instr8) + insertpcodebefore(instr, rematerialize_spilled_register(reg, node1)); + else + insert_load_spilled_register(instr, reg, node1); + insert_store_spilled_register(instr, 1, reg, node2); + } else { + if (node1->instr8) + insertpcodebefore(instr, rematerialize_spilled_register(instr->args[0].data.reg.reg, node1)); + else + insert_load_spilled_register(instr, instr->args[0].data.reg.reg, node1); + } + } else { + insert_store_spilled_register(instr, 1, instr->args[1].data.reg.reg, node2); + } + + deletepcode(instr); +} + +static void spillcall(PCodeBlock *block, PCode *instr) { + PCodeArg *opSrc; + PCodeArg *opDst; + int opCount; + int volatileCount; + int i; + + opCount = instr->argCount; + volatileCount = branch_count_volatiles(); + + opDst = instr->args + volatileCount; + opSrc = instr->args + volatileCount; + for (i = volatileCount; i < opCount; i++) { + if ( + PC_OP_IS_ANY_REGISTER(opSrc, coloring_class) && + opSrc->data.reg.reg >= n_real_registers[coloring_class] && + (interferencegraph[opSrc->data.reg.reg]->flags & fSpilled) + ) + { + instr->argCount--; + } else { + *opDst = *opSrc; + opDst++; + } + opSrc++; + } + + spillinstruction(block, instr); +} + +static void assign_spill_locations(void) { + UInt32 i; + IGNode *node; + Type *type; + + last_unused_vreg_before_spilling = used_virtual_registers[coloring_class]; + for (i = n_real_registers[coloring_class]; i < last_unused_vreg_before_spilling; i++) { + node = interferencegraph[i]; + if (node->flags & fCoalesced) + continue; + if (!(node->flags & fSpilled)) + continue; + + if (!node->spillTemporary) { + switch (coloring_class) { + case RegClass_GPR: + type = TYPE(&stunsignedlong); + break; + case RegClass_CRFIELD: + type = TYPE(&stunsignedlong); + break; + case RegClass_FPR: + type = TYPE(&stunsignedlong); + break; + case RegClass_VR: + type = TYPE(&stvectorunsignedchar); + break; + default: + CError_FATAL(771); + } + + node->spillTemporary = makespilltemporary(type); + } + + if (node->spillTemporary->datatype == DLOCAL && !(node->spillTemporary->u.var.info->flags & VarInfoFlag1)) + assign_local_memory(node->spillTemporary); + + if (node->flags & fPairHigh) + Registers_GetVarInfo(node->spillTemporary)->regHi = Register0; + else + Registers_GetVarInfo(node->spillTemporary)->reg = Register0; + } +} + +void insertspillcode(void) { + PCodeBlock *block; + PCode *instr; + PCode *nextInstr; + PCodeArg *op; + UInt32 i; + int flag; + + rTEMP_for_VR_spill = 0; + assign_spill_locations(); + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + flag = 0; + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + PC_OP_IS_ANY_REGISTER(op, coloring_class) && + op->data.reg.reg < last_unused_vreg_before_spilling && + (interferencegraph[op->data.reg.reg]->flags & fSpilled) + ) + { + flag = 1; + break; + } + op++; + } + + if (flag) { + if (coloring_class == RegClass_VR && rTEMP_for_VR_spill == 0) + rTEMP_for_VR_spill = used_virtual_registers[RegClass_GPR]++; + + if (instr->flags & fIsMove) + spillcopy(block, instr); + else if (instr->flags & fIsCall) + spillcall(block, instr); + else + spillinstruction(block, instr); + } + } + } +} diff --git a/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation601.c b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation601.c new file mode 100644 index 0000000..2d54678 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation601.c @@ -0,0 +1,552 @@ +#include "compiler/Scheduler.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" + +// https://stuff.mit.edu/afs/sipb/contrib/doc/specs/ic/cpu/powerpc/mpc601.pdf +// https://www.nxp.com/docs/en/user-guide/MPC601UMAD.pdf + +typedef enum Stage { + IU, // Integer Unit + FD, // FP Decode + FPM, // FP Multiply + FPA, // FP Add + FWA, // FP Arithmetic Writeback + BPU, // Branch Processing Unit + NumStages, + Serialize, // special form for instructions that use IU but are serialised + Unsupported // instructions not supported by this processor +} Stage; + +static struct { + // the instruction currently in this pipeline stage + PCode *instr; + + // how many cycles are left for this instruction to finish + int remaining; +} pipeline[NumStages]; + +static struct { + // the initial stage for this instruction + Stage stage; + + // the total amount of cycles required by this instruction + char latency; + + // how long it takes to finish each stage + char cycles[4]; +} instruction_timing[OPCODE_MAX] = { + BPU, 0, 0, 0, 0, 1, // PC_B + BPU, 0, 0, 0, 0, 1, // PC_BL + BPU, 0, 0, 0, 0, 1, // PC_BC + BPU, 0, 0, 0, 0, 1, // PC_BCLR + BPU, 0, 0, 0, 0, 1, // PC_BCCTR + BPU, 0, 0, 0, 0, 1, // PC_BT + BPU, 0, 0, 0, 0, 1, // PC_BTLR + BPU, 0, 0, 0, 0, 1, // PC_BTCTR + BPU, 0, 0, 0, 0, 1, // PC_BF + BPU, 0, 0, 0, 0, 1, // PC_BFLR + BPU, 0, 0, 0, 0, 1, // PC_BFCTR + BPU, 0, 0, 0, 0, 1, // PC_BDNZ + BPU, 0, 0, 0, 0, 1, // PC_BDNZT + BPU, 0, 0, 0, 0, 1, // PC_BDNZF + BPU, 0, 0, 0, 0, 1, // PC_BDZ + BPU, 0, 0, 0, 0, 1, // PC_BDZT + BPU, 0, 0, 0, 0, 1, // PC_BDZF + BPU, 0, 0, 0, 0, 1, // PC_BLR + BPU, 0, 0, 0, 0, 1, // PC_BCTR + BPU, 0, 0, 0, 0, 1, // PC_BCTRL + BPU, 0, 0, 0, 0, 1, // PC_BLRL + IU, 2, 1, 0, 0, 0, // PC_LBZ + IU, 2, 1, 0, 0, 0, // PC_LBZU + IU, 2, 1, 0, 0, 0, // PC_LBZX + IU, 2, 1, 0, 0, 0, // PC_LBZUX + IU, 2, 1, 0, 0, 0, // PC_LHZ + IU, 2, 1, 0, 0, 0, // PC_LHZU + IU, 2, 1, 0, 0, 0, // PC_LHZX + IU, 2, 1, 0, 0, 0, // PC_LHZUX + IU, 2, 1, 0, 0, 0, // PC_LHA + IU, 2, 1, 0, 0, 0, // PC_LHAU + IU, 2, 1, 0, 0, 0, // PC_LHAX + IU, 2, 1, 0, 0, 0, // PC_LHAUX + IU, 2, 1, 0, 0, 0, // PC_LHBRX + IU, 2, 1, 0, 0, 0, // PC_LWZ + IU, 2, 1, 0, 0, 0, // PC_LWZU + IU, 2, 1, 0, 0, 0, // PC_LWZX + IU, 2, 1, 0, 0, 0, // PC_LWZUX + IU, 2, 1, 0, 0, 0, // PC_LWBRX + IU, 1, 1, 0, 0, 0, // PC_LMW + IU, 1, 1, 0, 0, 0, // PC_STB + IU, 1, 1, 0, 0, 0, // PC_STBU + IU, 1, 1, 0, 0, 0, // PC_STBX + IU, 1, 1, 0, 0, 0, // PC_STBUX + IU, 1, 1, 0, 0, 0, // PC_STH + IU, 1, 1, 0, 0, 0, // PC_STHU + IU, 1, 1, 0, 0, 0, // PC_STHX + IU, 1, 1, 0, 0, 0, // PC_STHUX + IU, 1, 1, 0, 0, 0, // PC_STHBRX + IU, 1, 1, 0, 0, 0, // PC_STW + IU, 1, 1, 0, 0, 0, // PC_STWU + IU, 1, 1, 0, 0, 0, // PC_STWX + IU, 1, 1, 0, 0, 0, // PC_STWUX + IU, 1, 1, 0, 0, 0, // PC_STWBRX + IU, 1, 1, 0, 0, 0, // PC_STMW + IU, 2, 1, 0, 0, 0, // PC_DCBF + IU, 2, 1, 0, 0, 0, // PC_DCBST + IU, 2, 1, 0, 0, 0, // PC_DCBT + IU, 2, 1, 0, 0, 0, // PC_DCBTST + IU, 2, 1, 0, 0, 0, // PC_DCBZ + IU, 1, 1, 0, 0, 0, // PC_ADD + IU, 1, 1, 0, 0, 0, // PC_ADDC + IU, 1, 1, 0, 0, 0, // PC_ADDE + IU, 1, 1, 0, 0, 0, // PC_ADDI + IU, 1, 1, 0, 0, 0, // PC_ADDIC + IU, 1, 1, 0, 0, 0, // PC_ADDICR + IU, 1, 1, 0, 0, 0, // PC_ADDIS + IU, 1, 1, 0, 0, 0, // PC_ADDME + IU, 1, 1, 0, 0, 0, // PC_ADDZE + IU, 36, 36, 0, 0, 0, // PC_DIVW + IU, 36, 36, 0, 0, 0, // PC_DIVWU + IU, 5, 5, 0, 0, 0, // PC_MULHW + IU, 5, 5, 0, 0, 0, // PC_MULHWU + IU, 5, 5, 0, 0, 0, // PC_MULLI + IU, 5, 5, 0, 0, 0, // PC_MULLW + IU, 1, 1, 0, 0, 0, // PC_NEG + IU, 1, 1, 0, 0, 0, // PC_SUBF + IU, 1, 1, 0, 0, 0, // PC_SUBFC + IU, 1, 1, 0, 0, 0, // PC_SUBFE + IU, 1, 1, 0, 0, 0, // PC_SUBFIC + IU, 1, 1, 0, 0, 0, // PC_SUBFME + IU, 1, 1, 0, 0, 0, // PC_SUBFZE + IU, 3, 1, 0, 0, 0, // PC_CMPI + IU, 3, 1, 0, 0, 0, // PC_CMP + IU, 3, 1, 0, 0, 0, // PC_CMPLI + IU, 3, 1, 0, 0, 0, // PC_CMPL + IU, 1, 1, 0, 0, 0, // PC_ANDI + IU, 1, 1, 0, 0, 0, // PC_ANDIS + IU, 1, 1, 0, 0, 0, // PC_ORI + IU, 1, 1, 0, 0, 0, // PC_ORIS + IU, 1, 1, 0, 0, 0, // PC_XORI + IU, 1, 1, 0, 0, 0, // PC_XORIS + IU, 1, 1, 0, 0, 0, // PC_AND + IU, 1, 1, 0, 0, 0, // PC_OR + IU, 1, 1, 0, 0, 0, // PC_XOR + IU, 1, 1, 0, 0, 0, // PC_NAND + IU, 1, 1, 0, 0, 0, // PC_NOR + IU, 1, 1, 0, 0, 0, // PC_EQV + IU, 1, 1, 0, 0, 0, // PC_ANDC + IU, 1, 1, 0, 0, 0, // PC_ORC + IU, 1, 1, 0, 0, 0, // PC_EXTSB + IU, 1, 1, 0, 0, 0, // PC_EXTSH + IU, 1, 1, 0, 0, 0, // PC_CNTLZW + IU, 1, 1, 0, 0, 0, // PC_RLWINM + IU, 1, 1, 0, 0, 0, // PC_RLWNM + IU, 1, 1, 0, 0, 0, // PC_RLWIMI + IU, 1, 1, 0, 0, 0, // PC_SLW + IU, 1, 1, 0, 0, 0, // PC_SRW + IU, 1, 1, 0, 0, 0, // PC_SRAWI + IU, 1, 1, 0, 0, 0, // PC_SRAW + IU, 1, 1, 0, 0, 0, // PC_CRAND + IU, 1, 1, 0, 0, 0, // PC_CRANDC + IU, 1, 1, 0, 0, 0, // PC_CREQV + IU, 1, 1, 0, 0, 0, // PC_CRNAND + IU, 1, 1, 0, 0, 0, // PC_CRNOR + IU, 1, 1, 0, 0, 0, // PC_CROR + IU, 1, 1, 0, 0, 0, // PC_CRORC + IU, 1, 1, 0, 0, 0, // PC_CRXOR + IU, 1, 1, 0, 0, 0, // PC_MCRF + IU, 4, 1, 0, 0, 0, // PC_MTXER + IU, 4, 1, 0, 0, 0, // PC_MTCTR + IU, 4, 1, 0, 0, 0, // PC_MTLR + IU, 2, 1, 0, 0, 0, // PC_MTCRF + IU, 1, 0, 0, 0, 0, // PC_MTMSR + IU, 1, 0, 0, 0, 0, // PC_MTSPR + IU, 1, 0, 0, 0, 0, // PC_MFMSR + IU, 1, 0, 0, 0, 0, // PC_MFSPR + IU, 1, 1, 0, 0, 0, // PC_MFXER + IU, 1, 1, 0, 0, 0, // PC_MFCTR + IU, 1, 1, 0, 0, 0, // PC_MFLR + IU, 1, 1, 0, 0, 0, // PC_MFCR + FD, 4, 1, 1, 1, 1, // PC_MFFS + FD, 4, 1, 1, 1, 1, // PC_MTFSF + Serialize, 1, 1, 0, 0, 1, // PC_EIEIO + Serialize, 1, 1, 0, 0, 1, // PC_ISYNC + Serialize, 1, 1, 0, 0, 1, // PC_SYNC + Serialize, 0, 0, 0, 0, 1, // PC_RFI + IU, 1, 1, 0, 0, 0, // PC_LI + IU, 1, 1, 0, 0, 0, // PC_LIS + IU, 1, 1, 0, 0, 0, // PC_MR + IU, 1, 1, 0, 0, 0, // PC_NOP + IU, 1, 1, 0, 0, 0, // PC_NOT + IU, 3, 1, 0, 0, 0, // PC_LFS + IU, 3, 1, 0, 0, 0, // PC_LFSU + IU, 3, 1, 0, 0, 0, // PC_LFSX + IU, 3, 1, 0, 0, 0, // PC_LFSUX + IU, 3, 1, 0, 0, 0, // PC_LFD + IU, 3, 1, 0, 0, 0, // PC_LFDU + IU, 3, 1, 0, 0, 0, // PC_LFDX + IU, 3, 1, 0, 0, 0, // PC_LFDUX + IU, 1, 1, 0, 0, 0, // PC_STFS + IU, 1, 1, 0, 0, 0, // PC_STFSU + IU, 1, 1, 0, 0, 0, // PC_STFSX + IU, 1, 1, 0, 0, 0, // PC_STFSUX + IU, 1, 1, 0, 0, 0, // PC_STFD + IU, 1, 1, 0, 0, 0, // PC_STFDU + IU, 1, 1, 0, 0, 0, // PC_STFDX + IU, 1, 1, 0, 0, 0, // PC_STFDUX + FD, 4, 1, 1, 1, 1, // PC_FMR + FD, 4, 1, 1, 1, 1, // PC_FABS + FD, 4, 1, 1, 1, 1, // PC_FNEG + FD, 4, 1, 1, 1, 1, // PC_FNABS + FD, 4, 1, 1, 1, 1, // PC_FADD + FD, 4, 1, 1, 1, 1, // PC_FADDS + FD, 4, 1, 1, 1, 1, // PC_FSUB + FD, 4, 1, 1, 1, 1, // PC_FSUBS + FD, 5, 1, 1, 2, 1, // PC_FMUL + FD, 4, 1, 1, 1, 1, // PC_FMULS + FD, 31, 1, 1, 28, 1, // PC_FDIV + FD, 17, 1, 1, 14, 1, // PC_FDIVS + FD, 5, 1, 1, 2, 1, // PC_FMADD + FD, 4, 1, 1, 1, 1, // PC_FMADDS + FD, 5, 1, 1, 2, 1, // PC_FMSUB + FD, 4, 1, 1, 1, 1, // PC_FMSUBS + FD, 5, 1, 1, 2, 1, // PC_FNMADD + FD, 4, 1, 1, 1, 1, // PC_FNMADDS + FD, 5, 1, 1, 2, 1, // PC_FNMSUB + FD, 4, 1, 1, 1, 1, // PC_FNMSUBS + FD, 4, 1, 1, 1, 1, // PC_FRES + FD, 4, 1, 1, 1, 1, // PC_FRSQRTE + FD, 4, 1, 1, 1, 1, // PC_FSEL + FD, 4, 1, 1, 1, 1, // PC_FRSP + FD, 4, 1, 1, 1, 1, // PC_FCTIW + FD, 4, 1, 1, 1, 1, // PC_FCTIWZ + FD, 6, 1, 1, 1, 1, // PC_FCMPU + FD, 6, 1, 1, 1, 1, // PC_FCMPO + IU, 0, 0, 0, 0, 0, // PC_LWARX + IU, 0, 0, 0, 0, 0, // PC_LSWI + IU, 0, 0, 0, 0, 0, // PC_LSWX + IU, 0, 0, 0, 0, 0, // PC_STFIWX + IU, 0, 0, 0, 0, 0, // PC_STSWI + IU, 0, 0, 0, 0, 0, // PC_STSWX + IU, 0, 0, 0, 0, 0, // PC_STWCX + IU, 0, 0, 0, 0, 0, // PC_ECIWX + IU, 0, 0, 0, 0, 0, // PC_ECOWX + IU, 0, 0, 0, 0, 0, // PC_DCBI + IU, 0, 0, 0, 0, 0, // PC_ICBI + IU, 0, 0, 0, 0, 0, // PC_MCRFS + IU, 0, 0, 0, 0, 0, // PC_MCRXR + IU, 0, 0, 0, 0, 0, // PC_MFTB + IU, 0, 0, 0, 0, 0, // PC_MFSR + IU, 0, 0, 0, 0, 0, // PC_MTSR + IU, 0, 0, 0, 0, 0, // PC_MFSRIN + IU, 0, 0, 0, 0, 0, // PC_MTSRIN + IU, 0, 0, 0, 0, 0, // PC_MTFSB0 + IU, 0, 0, 0, 0, 0, // PC_MTFSB1 + IU, 0, 0, 0, 0, 0, // PC_MTFSFI + Serialize, 0, 0, 0, 0, 0, // PC_SC + IU, 0, 0, 0, 0, 0, // PC_FSQRT + IU, 0, 0, 0, 0, 0, // PC_FSQRTS + IU, 0, 0, 0, 0, 0, // PC_TLBIA + IU, 0, 0, 0, 0, 0, // PC_TLBIE + IU, 0, 0, 0, 0, 0, // PC_TLBLD + IU, 0, 0, 0, 0, 0, // PC_TLBLI + IU, 0, 0, 0, 0, 0, // PC_TLBSYNC + Serialize, 0, 0, 0, 0, 0, // PC_TW + Serialize, 0, 0, 0, 0, 0, // PC_TRAP + Serialize, 0, 0, 0, 0, 0, // PC_TWI + Serialize, 0, 0, 0, 0, 0, // PC_OPWORD + IU, 0, 0, 0, 0, 0, // PC_MFROM + IU, 0, 0, 0, 0, 0, // PC_DSA + IU, 0, 0, 0, 0, 0, // PC_ESA + IU, 0, 0, 0, 0, 0, // PC_DCCCI + IU, 0, 0, 0, 0, 0, // PC_DCREAD + IU, 0, 0, 0, 0, 0, // PC_ICBT + IU, 0, 0, 0, 0, 0, // PC_ICCCI + IU, 0, 0, 0, 0, 0, // PC_ICREAD + IU, 0, 0, 0, 0, 0, // PC_RFCI + IU, 0, 0, 0, 0, 0, // PC_TLBRE + IU, 0, 0, 0, 0, 0, // PC_TLBSX + IU, 0, 0, 0, 0, 0, // PC_TLBWE + IU, 0, 0, 0, 0, 0, // PC_WRTEE + IU, 0, 0, 0, 0, 0, // PC_WRTEEI + IU, 0, 0, 0, 0, 0, // PC_MFDCR + IU, 0, 0, 0, 0, 0, // PC_MTDCR + Unsupported, 0, 0, 0, 0, 0, // PC_DCBA + Unsupported, 0, 0, 0, 0, 0, // PC_DSS + Unsupported, 0, 0, 0, 0, 0, // PC_DSSALL + Unsupported, 0, 0, 0, 0, 0, // PC_DST + Unsupported, 0, 0, 0, 0, 0, // PC_DSTT + Unsupported, 0, 0, 0, 0, 0, // PC_DSTST + Unsupported, 0, 0, 0, 0, 0, // PC_DSTSTT + Unsupported, 0, 0, 0, 0, 0, // PC_LVEBX + Unsupported, 0, 0, 0, 0, 0, // PC_LVEHX + Unsupported, 0, 0, 0, 0, 0, // PC_LVEWX + Unsupported, 0, 0, 0, 0, 0, // PC_LVSL + Unsupported, 0, 0, 0, 0, 0, // PC_LVSR + Unsupported, 0, 0, 0, 0, 0, // PC_LVX + Unsupported, 0, 0, 0, 0, 0, // PC_LVXL + Unsupported, 0, 0, 0, 0, 0, // PC_STVEBX + Unsupported, 0, 0, 0, 0, 0, // PC_STVEHX + Unsupported, 0, 0, 0, 0, 0, // PC_STVEWX + Unsupported, 0, 0, 0, 0, 0, // PC_STVX + Unsupported, 0, 0, 0, 0, 0, // PC_STVXL + Unsupported, 0, 0, 0, 0, 0, // PC_MFVSCR + Unsupported, 0, 0, 0, 0, 0, // PC_MTVSCR + Unsupported, 0, 0, 0, 0, 0, // PC_VADDCUW + Unsupported, 0, 0, 0, 0, 0, // PC_VADDFP + Unsupported, 0, 0, 0, 0, 0, // PC_VADDSBS + Unsupported, 0, 0, 0, 0, 0, // PC_VADDSHS + Unsupported, 0, 0, 0, 0, 0, // PC_VADDSWS + Unsupported, 0, 0, 0, 0, 0, // PC_VADDUBM + Unsupported, 0, 0, 0, 0, 0, // PC_VADDUBS + Unsupported, 0, 0, 0, 0, 0, // PC_VADDUHM + Unsupported, 0, 0, 0, 0, 0, // PC_VADDUHS + Unsupported, 0, 0, 0, 0, 0, // PC_VADDUWM + Unsupported, 0, 0, 0, 0, 0, // PC_VADDUWS + Unsupported, 0, 0, 0, 0, 0, // PC_VAND + Unsupported, 0, 0, 0, 0, 0, // PC_VANDC + Unsupported, 0, 0, 0, 0, 0, // PC_VAVGSB + Unsupported, 0, 0, 0, 0, 0, // PC_VAVGSH + Unsupported, 0, 0, 0, 0, 0, // PC_VAVGSW + Unsupported, 0, 0, 0, 0, 0, // PC_VAVGUB + Unsupported, 0, 0, 0, 0, 0, // PC_VAVGUH + Unsupported, 0, 0, 0, 0, 0, // PC_VAVGUW + Unsupported, 0, 0, 0, 0, 0, // PC_VCFSX + Unsupported, 0, 0, 0, 0, 0, // PC_VCFUX + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPBFP + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPEQFP + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPEQUB + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPEQUH + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPEQUW + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGEFP + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTFP + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTSB + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTSH + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTSW + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTUB + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTUH + Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTUW + Unsupported, 0, 0, 0, 0, 0, // PC_VCTSXS + Unsupported, 0, 0, 0, 0, 0, // PC_VCTUXS + Unsupported, 0, 0, 0, 0, 0, // PC_VEXPTEFP + Unsupported, 0, 0, 0, 0, 0, // PC_VLOGEFP + Unsupported, 0, 0, 0, 0, 0, // PC_VMAXFP + Unsupported, 0, 0, 0, 0, 0, // PC_VMAXSB + Unsupported, 0, 0, 0, 0, 0, // PC_VMAXSH + Unsupported, 0, 0, 0, 0, 0, // PC_VMAXSW + Unsupported, 0, 0, 0, 0, 0, // PC_VMAXUB + Unsupported, 0, 0, 0, 0, 0, // PC_VMAXUH + Unsupported, 0, 0, 0, 0, 0, // PC_VMAXUW + Unsupported, 0, 0, 0, 0, 0, // PC_VMINFP + Unsupported, 0, 0, 0, 0, 0, // PC_VMINSB + Unsupported, 0, 0, 0, 0, 0, // PC_VMINSH + Unsupported, 0, 0, 0, 0, 0, // PC_VMINSW + Unsupported, 0, 0, 0, 0, 0, // PC_VMINUB + Unsupported, 0, 0, 0, 0, 0, // PC_VMINUH + Unsupported, 0, 0, 0, 0, 0, // PC_VMINUW + Unsupported, 0, 0, 0, 0, 0, // PC_VMRGHB + Unsupported, 0, 0, 0, 0, 0, // PC_VMRGHH + Unsupported, 0, 0, 0, 0, 0, // PC_VMRGHW + Unsupported, 0, 0, 0, 0, 0, // PC_VMRGLB + Unsupported, 0, 0, 0, 0, 0, // PC_VMRGLH + Unsupported, 0, 0, 0, 0, 0, // PC_VMRGLW + Unsupported, 0, 0, 0, 0, 0, // PC_VMULESB + Unsupported, 0, 0, 0, 0, 0, // PC_VMULESH + Unsupported, 0, 0, 0, 0, 0, // PC_VMULEUB + Unsupported, 0, 0, 0, 0, 0, // PC_VMULEUH + Unsupported, 0, 0, 0, 0, 0, // PC_VMULOSB + Unsupported, 0, 0, 0, 0, 0, // PC_VMULOSH + Unsupported, 0, 0, 0, 0, 0, // PC_VMULOUB + Unsupported, 0, 0, 0, 0, 0, // PC_VMULOUH + Unsupported, 0, 0, 0, 0, 0, // PC_VNOR + Unsupported, 0, 0, 0, 0, 0, // PC_VOR + Unsupported, 0, 0, 0, 0, 0, // PC_VPKPX + Unsupported, 0, 0, 0, 0, 0, // PC_VPKSHSS + Unsupported, 0, 0, 0, 0, 0, // PC_VPKSHUS + Unsupported, 0, 0, 0, 0, 0, // PC_VPKSWSS + Unsupported, 0, 0, 0, 0, 0, // PC_VPKSWUS + Unsupported, 0, 0, 0, 0, 0, // PC_VPKUHUM + Unsupported, 0, 0, 0, 0, 0, // PC_VPKUHUS + Unsupported, 0, 0, 0, 0, 0, // PC_VPKUWUM + Unsupported, 0, 0, 0, 0, 0, // PC_VPKUWUS + Unsupported, 0, 0, 0, 0, 0, // PC_VREFP + Unsupported, 0, 0, 0, 0, 0, // PC_VRFIM + Unsupported, 0, 0, 0, 0, 0, // PC_VRFIN + Unsupported, 0, 0, 0, 0, 0, // PC_VRFIP + Unsupported, 0, 0, 0, 0, 0, // PC_VRFIZ + Unsupported, 0, 0, 0, 0, 0, // PC_VRLB + Unsupported, 0, 0, 0, 0, 0, // PC_VRLH + Unsupported, 0, 0, 0, 0, 0, // PC_VRLW + Unsupported, 0, 0, 0, 0, 0, // PC_VRSQRTEFP + Unsupported, 0, 0, 0, 0, 0, // PC_VSL + Unsupported, 0, 0, 0, 0, 0, // PC_VSLB + Unsupported, 0, 0, 0, 0, 0, // PC_VSLH + Unsupported, 0, 0, 0, 0, 0, // PC_VSLO + Unsupported, 0, 0, 0, 0, 0, // PC_VSLW + Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTB + Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTH + Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTW + Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTISB + Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTISH + Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTISW + Unsupported, 0, 0, 0, 0, 0, // PC_VSR + Unsupported, 0, 0, 0, 0, 0, // PC_VSRAB + Unsupported, 0, 0, 0, 0, 0, // PC_VSRAH + Unsupported, 0, 0, 0, 0, 0, // PC_VSRAW + Unsupported, 0, 0, 0, 0, 0, // PC_VSRB + Unsupported, 0, 0, 0, 0, 0, // PC_VSRH + Unsupported, 0, 0, 0, 0, 0, // PC_VSRO + Unsupported, 0, 0, 0, 0, 0, // PC_VSRW + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBCUW + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBFP + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBSBS + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBSHS + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBSWS + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUBM + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUBS + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUHM + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUHS + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUWM + Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUWS + Unsupported, 0, 0, 0, 0, 0, // PC_VSUMSWS + Unsupported, 0, 0, 0, 0, 0, // PC_VSUFPMSWS + Unsupported, 0, 0, 0, 0, 0, // PC_VSUFWASBS + Unsupported, 0, 0, 0, 0, 0, // PC_VSUFWASHS + Unsupported, 0, 0, 0, 0, 0, // PC_VSUFWAUBS + Unsupported, 0, 0, 0, 0, 0, // PC_VUPKHPX + Unsupported, 0, 0, 0, 0, 0, // PC_VUPKHSB + Unsupported, 0, 0, 0, 0, 0, // PC_VUPKHSH + Unsupported, 0, 0, 0, 0, 0, // PC_VUPKLPX + Unsupported, 0, 0, 0, 0, 0, // PC_VUPKLSB + Unsupported, 0, 0, 0, 0, 0, // PC_VUPKLSH + Unsupported, 0, 0, 0, 0, 0, // PC_VXOR + Unsupported, 0, 0, 0, 0, 0, // PC_VMADDFP + Unsupported, 0, 0, 0, 0, 0, // PC_VMHADDSHS + Unsupported, 0, 0, 0, 0, 0, // PC_VMHRADDSHS + Unsupported, 0, 0, 0, 0, 0, // PC_VMLADDUHM + Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMMBM + Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMSHM + Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMSHS + Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMUBM + Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMUHM + Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMUHS + Unsupported, 0, 0, 0, 0, 0, // PC_VNMSUBFP + Unsupported, 0, 0, 0, 0, 0, // PC_VPERM + Unsupported, 0, 0, 0, 0, 0, // PC_VSEL + Unsupported, 0, 0, 0, 0, 0, // PC_VSLDOI + Unsupported, 0, 0, 0, 0, 0, // PC_VMR + Unsupported, 0, 0, 0, 0, 0, // PC_VMRP + IU, 0, 0, 0, 0, 0, // PC_SLE + IU, 0, 0, 0, 0, 0, // PC_SLEQ + IU, 0, 0, 0, 0, 0, // PC_SLIQ + IU, 0, 0, 0, 0, 0, // PC_SLLIQ + IU, 0, 0, 0, 0, 0, // PC_SLLQ + IU, 0, 0, 0, 0, 0, // PC_SLQ + IU, 0, 0, 0, 0, 0, // PC_SRAIQ + IU, 0, 0, 0, 0, 0, // PC_SRAQ + IU, 0, 0, 0, 0, 0, // PC_SRE + IU, 0, 0, 0, 0, 0, // PC_SREA + IU, 0, 0, 0, 0, 0, // PC_SREQ + IU, 0, 0, 0, 0, 0, // PC_SRIQ + IU, 0, 0, 0, 0, 0, // PC_SRLIQ + IU, 0, 0, 0, 0, 0, // PC_SRLQ + IU, 0, 0, 0, 0, 0, // PC_SRQ + IU, 0, 0, 0, 0, 0, // PC_MASKG + IU, 0, 0, 0, 0, 0, // PC_MASKIR + IU, 0, 0, 0, 0, 0, // PC_LSCBX + IU, 0, 0, 0, 0, 0, // PC_DIV + IU, 0, 0, 0, 0, 0, // PC_DIVS + IU, 0, 0, 0, 0, 0, // PC_DOZ + IU, 0, 0, 0, 0, 0, // PC_MUL + IU, 0, 0, 0, 0, 0, // PC_NABS + IU, 0, 0, 0, 0, 0, // PC_ABS + IU, 0, 0, 0, 0, 0, // PC_CLCS + IU, 0, 0, 0, 0, 0, // PC_DOZI + IU, 0, 0, 0, 0, 0, // PC_RLMI + IU, 0, 0, 0, 0, 0, // PC_RRIB +}; + +static void advance(int stageCount, int oldStage, int newStage) { + PCode *instr = pipeline[oldStage].instr; + int cycles = instruction_timing[instr->op].cycles[newStage - stageCount]; + pipeline[newStage].instr = instr; + pipeline[newStage].remaining = cycles; + pipeline[oldStage].instr = NULL; +} + +static void complete_instruction(int stage) { + pipeline[stage].instr = NULL; +} + +static int latency(PCode *instr) { + int cycles = instruction_timing[instr->op].latency; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + cycles += 2; + if (instr->op == PC_LMW || instr->op == PC_STMW) + cycles += instr->argCount - 2; + return cycles; +} + +static void initialize(void) { + int stage; + + for (stage = 0; stage < NumStages; stage++) + pipeline[stage].instr = NULL; +} + +static int can_issue(PCode *instr) { + int stage = instruction_timing[instr->op].stage; + if (stage == Serialize) + stage = IU; + if (pipeline[stage].instr) + return 0; + return 1; +} + +static void issue(PCode *instr) { + int stage = instruction_timing[instr->op].stage; + int cycles = instruction_timing[instr->op].cycles[IU]; + if (stage == Serialize) + stage = IU; + pipeline[stage].instr = instr; + pipeline[stage].remaining = cycles; +} + +static void advance_clock(void) { + int stage; + + for (stage = 0; stage < NumStages; stage++) { + if (pipeline[stage].instr && pipeline[stage].remaining) + --pipeline[stage].remaining; + } + + if (pipeline[IU].instr && pipeline[IU].remaining == 0) + complete_instruction(IU); + if (pipeline[FWA].instr && pipeline[FWA].remaining == 0) + complete_instruction(FWA); + if (pipeline[BPU].instr && pipeline[BPU].remaining == 0) + complete_instruction(BPU); + + if (pipeline[FPA].instr && pipeline[FPA].remaining == 0 && !pipeline[FWA].instr) + advance(1, FPA, FWA); + if (pipeline[FPM].instr && pipeline[FPM].remaining == 0 && !pipeline[FPA].instr) + advance(1, FPM, FPA); + if (pipeline[FD].instr && pipeline[FD].remaining == 0 && !pipeline[FPM].instr) + advance(1, FD, FPM); +} + +static int serializes(PCode *instr) { + return instruction_timing[instr->op].stage == Serialize; +} + +MachineInfo machine601 = { + 2, + 0, + 0, + &latency, + &initialize, + &can_issue, + &issue, + &advance_clock, + &serializes, + &default_uses_vpermute_unit +}; diff --git a/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation603.c b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation603.c new file mode 100644 index 0000000..49d1ea0 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation603.c @@ -0,0 +1,626 @@ +#include "compiler/Scheduler.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" + +// this is actually for 603e, but i couldn't find the 603 doc +// https://www.nxp.com/docs/en/reference-manual/MPC603EUM.pdf + +typedef enum Stage { + BPU, // Branch Prediction Unit + IU, // Integer Unit + LSU1, // Load/Store Unit + LSU2, + FPU1, // Floating Point Unit + FPU2, + FPU3, + SRU, // System Register Unit + NumStages +} Stage; + +static struct { + // the instruction currently in this pipeline stage + PCode *instr; + + // how many cycles are left for this instruction to finish + int remaining; +} pipeline[NumStages]; + +enum { + MaxEntries = 5 +}; + +static struct { + // how many entries remain unused in the queue + unsigned int free; + + // how many entries are currently used in the queue + unsigned int used; + + // the index of the next instruction that will be retired + unsigned int nextToRetire; + + // the index of the next free slot that will be used when an instruction is dispatched + unsigned int nextFreeSlot; + + // circular array of entries in the completion queue + struct { + PCode *instr; + int completed; + } entries[MaxEntries]; +} completionbuffers; + +static struct { + // the initial stage for this instruction + Stage stage; + + // the total amount of cycles required by this instruction + char latency; + + // how long it takes to finish each stage + char cycles[3]; + + // does this instruction serialise? + char serializes; +} instruction_timing[OPCODE_MAX] = { + BPU, 0, 0, 0, 0, 1, // PC_B + BPU, 0, 0, 0, 0, 1, // PC_BL + BPU, 0, 0, 0, 0, 1, // PC_BC + BPU, 0, 0, 0, 0, 1, // PC_BCLR + BPU, 0, 0, 0, 0, 1, // PC_BCCTR + BPU, 0, 0, 0, 0, 1, // PC_BT + BPU, 0, 0, 0, 0, 1, // PC_BTLR + BPU, 0, 0, 0, 0, 1, // PC_BTCTR + BPU, 0, 0, 0, 0, 1, // PC_BF + BPU, 0, 0, 0, 0, 1, // PC_BFLR + BPU, 0, 0, 0, 0, 1, // PC_BFCTR + BPU, 0, 0, 0, 0, 1, // PC_BDNZ + BPU, 0, 0, 0, 0, 1, // PC_BDNZT + BPU, 0, 0, 0, 0, 1, // PC_BDNZF + BPU, 0, 0, 0, 0, 1, // PC_BDZ + BPU, 0, 0, 0, 0, 1, // PC_BDZT + BPU, 0, 0, 0, 0, 1, // PC_BDZF + BPU, 0, 0, 0, 0, 1, // PC_BLR + BPU, 0, 0, 0, 0, 1, // PC_BCTR + BPU, 0, 0, 0, 0, 1, // PC_BCTRL + BPU, 0, 0, 0, 0, 1, // PC_BLRL + LSU1, 2, 1, 1, 0, 0, // PC_LBZ + LSU1, 2, 1, 1, 0, 0, // PC_LBZU + LSU1, 2, 1, 1, 0, 0, // PC_LBZX + LSU1, 2, 1, 1, 0, 0, // PC_LBZUX + LSU1, 2, 1, 1, 0, 0, // PC_LHZ + LSU1, 2, 1, 1, 0, 0, // PC_LHZU + LSU1, 2, 1, 1, 0, 0, // PC_LHZX + LSU1, 2, 1, 1, 0, 0, // PC_LHZUX + LSU1, 2, 1, 1, 0, 0, // PC_LHA + LSU1, 2, 1, 1, 0, 0, // PC_LHAU + LSU1, 2, 1, 1, 0, 0, // PC_LHAX + LSU1, 2, 1, 1, 0, 0, // PC_LHAUX + LSU1, 2, 1, 1, 0, 0, // PC_LHBRX + LSU1, 2, 1, 1, 0, 0, // PC_LWZ + LSU1, 2, 1, 1, 0, 0, // PC_LWZU + LSU1, 2, 1, 1, 0, 0, // PC_LWZX + LSU1, 2, 1, 1, 0, 0, // PC_LWZUX + LSU1, 2, 1, 1, 0, 0, // PC_LWBRX + LSU1, 2, 1, 1, 0, 0, // PC_LMW + LSU1, 2, 1, 1, 0, 0, // PC_STB + LSU1, 2, 1, 1, 0, 0, // PC_STBU + LSU1, 2, 1, 1, 0, 0, // PC_STBX + LSU1, 2, 1, 1, 0, 0, // PC_STBUX + LSU1, 2, 1, 1, 0, 0, // PC_STH + LSU1, 2, 1, 1, 0, 0, // PC_STHU + LSU1, 2, 1, 1, 0, 0, // PC_STHX + LSU1, 2, 1, 1, 0, 0, // PC_STHUX + LSU1, 2, 1, 1, 0, 0, // PC_STHBRX + LSU1, 2, 1, 1, 0, 0, // PC_STW + LSU1, 2, 1, 1, 0, 0, // PC_STWU + LSU1, 2, 1, 1, 0, 0, // PC_STWX + LSU1, 2, 1, 1, 0, 0, // PC_STWUX + LSU1, 2, 1, 1, 0, 0, // PC_STWBRX + LSU1, 2, 1, 1, 0, 0, // PC_STMW + LSU1, 2, 1, 1, 0, 0, // PC_DCBF + LSU1, 2, 1, 1, 0, 0, // PC_DCBST + LSU1, 2, 1, 1, 0, 0, // PC_DCBT + LSU1, 2, 1, 1, 0, 0, // PC_DCBTST + LSU1, 2, 1, 1, 0, 0, // PC_DCBZ + IU, 1, 1, 0, 0, 0, // PC_ADD + IU, 1, 1, 0, 0, 0, // PC_ADDC + IU, 1, 1, 0, 0, 0, // PC_ADDE + IU, 1, 1, 0, 0, 0, // PC_ADDI + IU, 1, 1, 0, 0, 0, // PC_ADDIC + IU, 1, 1, 0, 0, 0, // PC_ADDICR + IU, 1, 1, 0, 0, 0, // PC_ADDIS + IU, 1, 1, 0, 0, 0, // PC_ADDME + IU, 1, 1, 0, 0, 0, // PC_ADDZE + IU, 37, 37, 0, 0, 0, // PC_DIVW + IU, 37, 37, 0, 0, 0, // PC_DIVWU + IU, 5, 5, 0, 0, 0, // PC_MULHW + IU, 5, 5, 0, 0, 0, // PC_MULHWU + IU, 3, 3, 0, 0, 0, // PC_MULLI + IU, 5, 5, 0, 0, 0, // PC_MULLW + IU, 1, 1, 0, 0, 0, // PC_NEG + IU, 1, 1, 0, 0, 0, // PC_SUBF + IU, 1, 1, 0, 0, 0, // PC_SUBFC + IU, 1, 1, 0, 0, 0, // PC_SUBFE + IU, 1, 1, 0, 0, 0, // PC_SUBFIC + IU, 1, 1, 0, 0, 0, // PC_SUBFME + IU, 1, 1, 0, 0, 0, // PC_SUBFZE + IU, 3, 1, 0, 0, 0, // PC_CMPI + IU, 3, 1, 0, 0, 0, // PC_CMP + IU, 3, 1, 0, 0, 0, // PC_CMPLI + IU, 3, 1, 0, 0, 0, // PC_CMPL + IU, 1, 1, 0, 0, 0, // PC_ANDI + IU, 1, 1, 0, 0, 0, // PC_ANDIS + IU, 1, 1, 0, 0, 0, // PC_ORI + IU, 1, 1, 0, 0, 0, // PC_ORIS + IU, 1, 1, 0, 0, 0, // PC_XORI + IU, 1, 1, 0, 0, 0, // PC_XORIS + IU, 1, 1, 0, 0, 0, // PC_AND + IU, 1, 1, 0, 0, 0, // PC_OR + IU, 1, 1, 0, 0, 0, // PC_XOR + IU, 1, 1, 0, 0, 0, // PC_NAND + IU, 1, 1, 0, 0, 0, // PC_NOR + IU, 1, 1, 0, 0, 0, // PC_EQV + IU, 1, 1, 0, 0, 0, // PC_ANDC + IU, 1, 1, 0, 0, 0, // PC_ORC + IU, 1, 1, 0, 0, 0, // PC_EXTSB + IU, 1, 1, 0, 0, 0, // PC_EXTSH + IU, 1, 1, 0, 0, 0, // PC_CNTLZW + IU, 1, 1, 0, 0, 0, // PC_RLWINM + IU, 1, 1, 0, 0, 0, // PC_RLWNM + IU, 1, 1, 0, 0, 0, // PC_RLWIMI + IU, 1, 1, 0, 0, 0, // PC_SLW + IU, 1, 1, 0, 0, 0, // PC_SRW + IU, 1, 1, 0, 0, 0, // PC_SRAWI + IU, 1, 1, 0, 0, 0, // PC_SRAW + SRU, 1, 1, 0, 0, 0, // PC_CRAND + SRU, 1, 1, 0, 0, 0, // PC_CRANDC + SRU, 1, 1, 0, 0, 0, // PC_CREQV + SRU, 1, 1, 0, 0, 0, // PC_CRNAND + SRU, 1, 1, 0, 0, 0, // PC_CRNOR + SRU, 1, 1, 0, 0, 0, // PC_CROR + SRU, 1, 1, 0, 0, 0, // PC_CRORC + SRU, 1, 1, 0, 0, 0, // PC_CRXOR + SRU, 1, 1, 0, 0, 0, // PC_MCRF + SRU, 2, 2, 0, 0, 0, // PC_MTXER + SRU, 2, 2, 0, 0, 0, // PC_MTCTR + SRU, 2, 2, 0, 0, 0, // PC_MTLR + SRU, 1, 1, 0, 0, 0, // PC_MTCRF + SRU, 1, 1, 0, 0, 1, // PC_MTMSR + SRU, 1, 1, 0, 0, 1, // PC_MTSPR + SRU, 1, 1, 0, 0, 1, // PC_MFMSR + SRU, 1, 1, 0, 0, 1, // PC_MFSPR + SRU, 1, 1, 0, 0, 0, // PC_MFXER + SRU, 1, 1, 0, 0, 0, // PC_MFCTR + SRU, 1, 1, 0, 0, 0, // PC_MFLR + SRU, 1, 1, 0, 0, 0, // PC_MFCR + FPU1, 3, 1, 1, 1, 0, // PC_MFFS + FPU1, 3, 1, 1, 1, 0, // PC_MTFSF + SRU, 1, 1, 0, 0, 1, // PC_EIEIO + SRU, 1, 1, 0, 0, 1, // PC_ISYNC + SRU, 1, 1, 0, 0, 1, // PC_SYNC + SRU, 1, 1, 0, 0, 1, // PC_RFI + IU, 1, 1, 0, 0, 0, // PC_LI + IU, 1, 1, 0, 0, 0, // PC_LIS + IU, 1, 1, 0, 0, 0, // PC_MR + IU, 1, 1, 0, 0, 0, // PC_NOP + IU, 1, 1, 0, 0, 0, // PC_NOT + LSU1, 2, 1, 1, 0, 0, // PC_LFS + LSU1, 2, 1, 1, 0, 0, // PC_LFSU + LSU1, 2, 1, 1, 0, 0, // PC_LFSX + LSU1, 2, 1, 1, 0, 0, // PC_LFSUX + LSU1, 2, 1, 1, 0, 0, // PC_LFD + LSU1, 2, 1, 1, 0, 0, // PC_LFDU + LSU1, 2, 1, 1, 0, 0, // PC_LFDX + LSU1, 2, 1, 1, 0, 0, // PC_LFDUX + LSU1, 2, 1, 1, 0, 0, // PC_STFS + LSU1, 2, 1, 1, 0, 0, // PC_STFSU + LSU1, 2, 1, 1, 0, 0, // PC_STFSX + LSU1, 2, 1, 1, 0, 0, // PC_STFSUX + LSU1, 2, 1, 1, 0, 0, // PC_STFD + LSU1, 2, 1, 1, 0, 0, // PC_STFDU + LSU1, 2, 1, 1, 0, 0, // PC_STFDX + LSU1, 2, 1, 1, 0, 0, // PC_STFDUX + FPU1, 3, 1, 1, 1, 0, // PC_FMR + FPU1, 3, 1, 1, 1, 0, // PC_FABS + FPU1, 3, 1, 1, 1, 0, // PC_FNEG + FPU1, 3, 1, 1, 1, 0, // PC_FNABS + FPU1, 3, 1, 1, 1, 0, // PC_FADD + FPU1, 3, 1, 1, 1, 0, // PC_FADDS + FPU1, 3, 1, 1, 1, 0, // PC_FSUB + FPU1, 3, 1, 1, 1, 0, // PC_FSUBS + FPU1, 4, 2, 1, 1, 0, // PC_FMUL + FPU1, 3, 1, 1, 1, 0, // PC_FMULS + FPU1, 33, 33, 0, 0, 0, // PC_FDIV + FPU1, 18, 18, 0, 0, 0, // PC_FDIVS + FPU1, 4, 2, 1, 1, 0, // PC_FMADD + FPU1, 3, 1, 1, 1, 0, // PC_FMADDS + FPU1, 4, 2, 1, 1, 0, // PC_FMSUB + FPU1, 3, 1, 1, 1, 0, // PC_FMSUBS + FPU1, 4, 2, 1, 1, 0, // PC_FNMADD + FPU1, 3, 1, 1, 1, 0, // PC_FNMADDS + FPU1, 4, 2, 1, 1, 0, // PC_FNMSUB + FPU1, 3, 1, 1, 1, 0, // PC_FNMSUBS + FPU1, 18, 18, 0, 0, 0, // PC_FRES + FPU1, 3, 1, 1, 1, 0, // PC_FRSQRTE + FPU1, 3, 1, 1, 1, 0, // PC_FSEL + FPU1, 3, 1, 1, 1, 0, // PC_FRSP + FPU1, 3, 1, 1, 1, 0, // PC_FCTIW + FPU1, 3, 1, 1, 1, 0, // PC_FCTIWZ + FPU1, 5, 1, 1, 1, 0, // PC_FCMPU + FPU1, 5, 1, 1, 1, 0, // PC_FCMPO + LSU1, 1, 1, 0, 0, 0, // PC_LWARX + LSU1, 1, 1, 0, 0, 0, // PC_LSWI + LSU1, 1, 1, 0, 0, 0, // PC_LSWX + LSU1, 1, 1, 0, 0, 0, // PC_STFIWX + LSU1, 1, 1, 0, 0, 0, // PC_STSWI + LSU1, 1, 1, 0, 0, 0, // PC_STSWX + LSU1, 1, 1, 0, 0, 0, // PC_STWCX + IU, 1, 1, 0, 0, 1, // PC_ECIWX + IU, 1, 1, 0, 0, 1, // PC_ECOWX + IU, 1, 1, 0, 0, 0, // PC_DCBI + IU, 1, 1, 0, 0, 0, // PC_ICBI + IU, 1, 1, 0, 0, 0, // PC_MCRFS + IU, 1, 1, 0, 0, 0, // PC_MCRXR + IU, 1, 1, 0, 0, 0, // PC_MFTB + IU, 1, 1, 0, 0, 0, // PC_MFSR + IU, 1, 1, 0, 0, 0, // PC_MTSR + IU, 1, 1, 0, 0, 0, // PC_MFSRIN + IU, 1, 1, 0, 0, 0, // PC_MTSRIN + IU, 1, 1, 0, 0, 0, // PC_MTFSB0 + IU, 1, 1, 0, 0, 0, // PC_MTFSB1 + IU, 1, 1, 0, 0, 0, // PC_MTFSFI + IU, 1, 1, 0, 0, 1, // PC_SC + FPU1, 1, 1, 0, 0, 0, // PC_FSQRT + FPU1, 1, 1, 0, 0, 0, // PC_FSQRTS + IU, 1, 1, 0, 0, 0, // PC_TLBIA + IU, 1, 1, 0, 0, 0, // PC_TLBIE + IU, 1, 1, 0, 0, 0, // PC_TLBLD + IU, 1, 1, 0, 0, 0, // PC_TLBLI + IU, 1, 1, 0, 0, 0, // PC_TLBSYNC + IU, 1, 1, 0, 0, 1, // PC_TW + IU, 1, 1, 0, 0, 1, // PC_TRAP + IU, 1, 1, 0, 0, 1, // PC_TWI + IU, 1, 1, 0, 0, 1, // PC_OPWORD + IU, 1, 1, 0, 0, 0, // PC_MFROM + IU, 1, 1, 0, 0, 1, // PC_DSA + IU, 1, 1, 0, 0, 1, // PC_ESA + IU, 0, 0, 0, 0, 0, // PC_DCCCI + IU, 0, 0, 0, 0, 0, // PC_DCREAD + IU, 0, 0, 0, 0, 0, // PC_ICBT + IU, 0, 0, 0, 0, 0, // PC_ICCCI + IU, 0, 0, 0, 0, 0, // PC_ICREAD + IU, 0, 0, 0, 0, 0, // PC_RFCI + IU, 0, 0, 0, 0, 0, // PC_TLBRE + IU, 0, 0, 0, 0, 0, // PC_TLBSX + IU, 0, 0, 0, 0, 0, // PC_TLBWE + IU, 0, 0, 0, 0, 0, // PC_WRTEE + IU, 0, 0, 0, 0, 0, // PC_WRTEEI + IU, 0, 0, 0, 0, 0, // PC_MFDCR + IU, 0, 0, 0, 0, 0, // PC_MTDCR + IU, 0, 0, 0, 0, 0, // PC_DCBA + BPU, 0, 0, 0, 0, 0, // PC_DSS + BPU, 0, 0, 0, 0, 0, // PC_DSSALL + BPU, 0, 0, 0, 0, 0, // PC_DST + BPU, 0, 0, 0, 0, 0, // PC_DSTT + BPU, 0, 0, 0, 0, 0, // PC_DSTST + BPU, 0, 0, 0, 0, 0, // PC_DSTSTT + BPU, 0, 0, 0, 0, 0, // PC_LVEBX + BPU, 0, 0, 0, 0, 0, // PC_LVEHX + BPU, 0, 0, 0, 0, 0, // PC_LVEWX + BPU, 0, 0, 0, 0, 0, // PC_LVSL + BPU, 0, 0, 0, 0, 0, // PC_LVSR + BPU, 0, 0, 0, 0, 0, // PC_LVX + BPU, 0, 0, 0, 0, 0, // PC_LVXL + BPU, 0, 0, 0, 0, 0, // PC_STVEBX + BPU, 0, 0, 0, 0, 0, // PC_STVEHX + BPU, 0, 0, 0, 0, 0, // PC_STVEWX + BPU, 0, 0, 0, 0, 0, // PC_STVX + BPU, 0, 0, 0, 0, 0, // PC_STVXL + BPU, 0, 0, 0, 0, 0, // PC_MFVSCR + BPU, 0, 0, 0, 0, 0, // PC_MTVSCR + BPU, 0, 0, 0, 0, 0, // PC_VADDCUW + BPU, 0, 0, 0, 0, 0, // PC_VADDFP + BPU, 0, 0, 0, 0, 0, // PC_VADDSBS + BPU, 0, 0, 0, 0, 0, // PC_VADDSHS + BPU, 0, 0, 0, 0, 0, // PC_VADDSWS + BPU, 0, 0, 0, 0, 0, // PC_VADDUBM + BPU, 0, 0, 0, 0, 0, // PC_VADDUBS + BPU, 0, 0, 0, 0, 0, // PC_VADDUHM + BPU, 0, 0, 0, 0, 0, // PC_VADDUHS + BPU, 0, 0, 0, 0, 0, // PC_VADDUWM + BPU, 0, 0, 0, 0, 0, // PC_VADDUWS + BPU, 0, 0, 0, 0, 0, // PC_VAND + BPU, 0, 0, 0, 0, 0, // PC_VANDC + BPU, 0, 0, 0, 0, 0, // PC_VAVGSB + BPU, 0, 0, 0, 0, 0, // PC_VAVGSH + BPU, 0, 0, 0, 0, 0, // PC_VAVGSW + BPU, 0, 0, 0, 0, 0, // PC_VAVGUB + BPU, 0, 0, 0, 0, 0, // PC_VAVGUH + BPU, 0, 0, 0, 0, 0, // PC_VAVGUW + BPU, 0, 0, 0, 0, 0, // PC_VCFSX + BPU, 0, 0, 0, 0, 0, // PC_VCFUX + BPU, 0, 0, 0, 0, 0, // PC_VCMPBFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUB + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUH + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUW + BPU, 0, 0, 0, 0, 0, // PC_VCMPGEFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSB + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSH + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSW + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUB + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUH + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUW + BPU, 0, 0, 0, 0, 0, // PC_VCTSXS + BPU, 0, 0, 0, 0, 0, // PC_VCTUXS + BPU, 0, 0, 0, 0, 0, // PC_VEXPTEFP + BPU, 0, 0, 0, 0, 0, // PC_VLOGEFP + BPU, 0, 0, 0, 0, 0, // PC_VMAXFP + BPU, 0, 0, 0, 0, 0, // PC_VMAXSB + BPU, 0, 0, 0, 0, 0, // PC_VMAXSH + BPU, 0, 0, 0, 0, 0, // PC_VMAXSW + BPU, 0, 0, 0, 0, 0, // PC_VMAXUB + BPU, 0, 0, 0, 0, 0, // PC_VMAXUH + BPU, 0, 0, 0, 0, 0, // PC_VMAXUW + BPU, 0, 0, 0, 0, 0, // PC_VMINFP + BPU, 0, 0, 0, 0, 0, // PC_VMINSB + BPU, 0, 0, 0, 0, 0, // PC_VMINSH + BPU, 0, 0, 0, 0, 0, // PC_VMINSW + BPU, 0, 0, 0, 0, 0, // PC_VMINUB + BPU, 0, 0, 0, 0, 0, // PC_VMINUH + BPU, 0, 0, 0, 0, 0, // PC_VMINUW + BPU, 0, 0, 0, 0, 0, // PC_VMRGHB + BPU, 0, 0, 0, 0, 0, // PC_VMRGHH + BPU, 0, 0, 0, 0, 0, // PC_VMRGHW + BPU, 0, 0, 0, 0, 0, // PC_VMRGLB + BPU, 0, 0, 0, 0, 0, // PC_VMRGLH + BPU, 0, 0, 0, 0, 0, // PC_VMRGLW + BPU, 0, 0, 0, 0, 0, // PC_VMULESB + BPU, 0, 0, 0, 0, 0, // PC_VMULESH + BPU, 0, 0, 0, 0, 0, // PC_VMULEUB + BPU, 0, 0, 0, 0, 0, // PC_VMULEUH + BPU, 0, 0, 0, 0, 0, // PC_VMULOSB + BPU, 0, 0, 0, 0, 0, // PC_VMULOSH + BPU, 0, 0, 0, 0, 0, // PC_VMULOUB + BPU, 0, 0, 0, 0, 0, // PC_VMULOUH + BPU, 0, 0, 0, 0, 0, // PC_VNOR + BPU, 0, 0, 0, 0, 0, // PC_VOR + BPU, 0, 0, 0, 0, 0, // PC_VPKPX + BPU, 0, 0, 0, 0, 0, // PC_VPKSHSS + BPU, 0, 0, 0, 0, 0, // PC_VPKSHUS + BPU, 0, 0, 0, 0, 0, // PC_VPKSWSS + BPU, 0, 0, 0, 0, 0, // PC_VPKSWUS + BPU, 0, 0, 0, 0, 0, // PC_VPKUHUM + BPU, 0, 0, 0, 0, 0, // PC_VPKUHUS + BPU, 0, 0, 0, 0, 0, // PC_VPKUWUM + BPU, 0, 0, 0, 0, 0, // PC_VPKUWUS + BPU, 0, 0, 0, 0, 0, // PC_VREFP + BPU, 0, 0, 0, 0, 0, // PC_VRFIM + BPU, 0, 0, 0, 0, 0, // PC_VRFIN + BPU, 0, 0, 0, 0, 0, // PC_VRFIP + BPU, 0, 0, 0, 0, 0, // PC_VRFIZ + BPU, 0, 0, 0, 0, 0, // PC_VRLB + BPU, 0, 0, 0, 0, 0, // PC_VRLH + BPU, 0, 0, 0, 0, 0, // PC_VRLW + BPU, 0, 0, 0, 0, 0, // PC_VRSQRTEFP + BPU, 0, 0, 0, 0, 0, // PC_VSL + BPU, 0, 0, 0, 0, 0, // PC_VSLB + BPU, 0, 0, 0, 0, 0, // PC_VSLH + BPU, 0, 0, 0, 0, 0, // PC_VSLO + BPU, 0, 0, 0, 0, 0, // PC_VSLW + BPU, 0, 0, 0, 0, 0, // PC_VSPLTB + BPU, 0, 0, 0, 0, 0, // PC_VSPLTH + BPU, 0, 0, 0, 0, 0, // PC_VSPLTW + BPU, 0, 0, 0, 0, 0, // PC_VSPLTISB + BPU, 0, 0, 0, 0, 0, // PC_VSPLTISH + BPU, 0, 0, 0, 0, 0, // PC_VSPLTISW + BPU, 0, 0, 0, 0, 0, // PC_VSR + BPU, 0, 0, 0, 0, 0, // PC_VSRAB + BPU, 0, 0, 0, 0, 0, // PC_VSRAH + BPU, 0, 0, 0, 0, 0, // PC_VSRAW + BPU, 0, 0, 0, 0, 0, // PC_VSRB + BPU, 0, 0, 0, 0, 0, // PC_VSRH + BPU, 0, 0, 0, 0, 0, // PC_VSRO + BPU, 0, 0, 0, 0, 0, // PC_VSRW + BPU, 0, 0, 0, 0, 0, // PC_VSUBCUW + BPU, 0, 0, 0, 0, 0, // PC_VSUBFP + BPU, 0, 0, 0, 0, 0, // PC_VSUBSBS + BPU, 0, 0, 0, 0, 0, // PC_VSUBSHS + BPU, 0, 0, 0, 0, 0, // PC_VSUBSWS + BPU, 0, 0, 0, 0, 0, // PC_VSUBUBM + BPU, 0, 0, 0, 0, 0, // PC_VSUBUBS + BPU, 0, 0, 0, 0, 0, // PC_VSUBUHM + BPU, 0, 0, 0, 0, 0, // PC_VSUBUHS + BPU, 0, 0, 0, 0, 0, // PC_VSUBUWM + BPU, 0, 0, 0, 0, 0, // PC_VSUBUWS + BPU, 0, 0, 0, 0, 0, // PC_VSUMSWS + BPU, 0, 0, 0, 0, 0, // PC_VSUM2SWS + BPU, 0, 0, 0, 0, 0, // PC_VSUM4SBS + BPU, 0, 0, 0, 0, 0, // PC_VSUM4SHS + BPU, 0, 0, 0, 0, 0, // PC_VSUM4UBS + BPU, 0, 0, 0, 0, 0, // PC_VUPKHPX + BPU, 0, 0, 0, 0, 0, // PC_VUPKHSB + BPU, 0, 0, 0, 0, 0, // PC_VUPKHSH + BPU, 0, 0, 0, 0, 0, // PC_VUPKLPX + BPU, 0, 0, 0, 0, 0, // PC_VUPKLSB + BPU, 0, 0, 0, 0, 0, // PC_VUPKLSH + BPU, 0, 0, 0, 0, 0, // PC_VXOR + BPU, 0, 0, 0, 0, 0, // PC_VMADDFP + BPU, 0, 0, 0, 0, 0, // PC_VMHADDSHS + BPU, 0, 0, 0, 0, 0, // PC_VMHRADDSHS + BPU, 0, 0, 0, 0, 0, // PC_VMLADDUHM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMMBM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHS + BPU, 0, 0, 0, 0, 0, // PC_VMSUMUBM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHS + BPU, 0, 0, 0, 0, 0, // PC_VNMSUBFP + BPU, 0, 0, 0, 0, 0, // PC_VPERM + BPU, 0, 0, 0, 0, 0, // PC_VSEL + BPU, 0, 0, 0, 0, 0, // PC_VSLDOI + BPU, 0, 0, 0, 0, 0, // PC_VMR + BPU, 0, 0, 0, 0, 0, // PC_VMRP + BPU, 0, 0, 0, 0, 0, // PC_SLE + BPU, 0, 0, 0, 0, 0, // PC_SLEQ + BPU, 0, 0, 0, 0, 0, // PC_SLIQ + BPU, 0, 0, 0, 0, 0, // PC_SLLIQ + BPU, 0, 0, 0, 0, 0, // PC_SLLQ + BPU, 0, 0, 0, 0, 0, // PC_SLQ + BPU, 0, 0, 0, 0, 0, // PC_SRAIQ + BPU, 0, 0, 0, 0, 0, // PC_SRAQ + BPU, 0, 0, 0, 0, 0, // PC_SRE + BPU, 0, 0, 0, 0, 0, // PC_SREA + BPU, 0, 0, 0, 0, 0, // PC_SREQ + BPU, 0, 0, 0, 0, 0, // PC_SRIQ + BPU, 0, 0, 0, 0, 0, // PC_SRLIQ + BPU, 0, 0, 0, 0, 0, // PC_SRLQ + BPU, 0, 0, 0, 0, 0, // PC_SRQ + BPU, 0, 0, 0, 0, 0, // PC_MASKG + BPU, 0, 0, 0, 0, 0, // PC_MASKIR + BPU, 0, 0, 0, 0, 0, // PC_LSCBX + BPU, 0, 0, 0, 0, 0, // PC_DIV + BPU, 0, 0, 0, 0, 0, // PC_DIVS + BPU, 0, 0, 0, 0, 0, // PC_DOZ + BPU, 0, 0, 0, 0, 0, // PC_MUL + BPU, 0, 0, 0, 0, 0, // PC_NABS + BPU, 0, 0, 0, 0, 0, // PC_ABS + BPU, 0, 0, 0, 0, 0, // PC_CLCS + BPU, 0, 0, 0, 0, 0, // PC_DOZI + BPU, 0, 0, 0, 0, 0, // PC_RLMI + BPU, 0, 0, 0, 0, 0, // PC_RRIB +}; + +static void advance(int firstStage, int oldStage, int newStage) { + PCode *instr = pipeline[oldStage].instr; + int cycles = instruction_timing[instr->op].cycles[newStage - firstStage]; + pipeline[newStage].instr = instr; + pipeline[newStage].remaining = cycles; + pipeline[oldStage].instr = NULL; +} + +static void assign_completion_buffer(PCode *instr) { + completionbuffers.used++; + completionbuffers.free--; + completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr; + completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0; + completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries; +} + +static void complete_instruction(int stage) { + PCode *instr = pipeline[stage].instr; + int buf = 0; + while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr) + buf++; + + completionbuffers.entries[buf].completed = 1; + pipeline[stage].instr = NULL; +} + +static void retire_instruction(void) { + completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL; + completionbuffers.used--; + completionbuffers.free++; + completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries; +} + +static int latency(PCode *instr) { + int cycles = instruction_timing[instr->op].latency; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + cycles += 2; + if (instr->op == PC_LMW || instr->op == PC_STMW) + cycles += instr->argCount - 2; + return cycles; +} + +static void initialize(void) { + int stage; + int i; + + for (stage = 0; stage < NumStages; stage++) + pipeline[stage].instr = NULL; + + completionbuffers.free = 5; + completionbuffers.used = 0; + completionbuffers.nextToRetire = 0; + completionbuffers.nextFreeSlot = 0; + for (i = 0; i < MaxEntries; i++) + completionbuffers.entries[i].instr = NULL; +} + +static int can_issue(PCode *instr) { + if (completionbuffers.free == 0) + return 0; + if (pipeline[instruction_timing[instr->op].stage].instr) + return 0; + if ((instr->flags & fIsWrite) && pipeline[LSU2].instr && (pipeline[LSU2].instr->flags & fIsWrite)) + return 0; + return 1; +} + +static void issue(PCode *instr) { + int stage = instruction_timing[instr->op].stage; + int cycles = instruction_timing[instr->op].cycles[0]; + assign_completion_buffer(instr); + pipeline[stage].instr = instr; + pipeline[stage].remaining = cycles; +} + +static void advance_clock(void) { + int stage; + + for (stage = 0; stage < NumStages; stage++) { + if (pipeline[stage].instr && pipeline[stage].remaining) + --pipeline[stage].remaining; + } + + if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) { + retire_instruction(); + if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) { + retire_instruction(); + } + } + + if (pipeline[IU].instr && pipeline[IU].remaining == 0) + complete_instruction(IU); + if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0) + complete_instruction(LSU2); + if (pipeline[FPU3].instr && pipeline[FPU3].remaining == 0) + complete_instruction(FPU3); + if (pipeline[SRU].instr && pipeline[SRU].remaining == 0) + complete_instruction(SRU); + if (pipeline[BPU].instr && pipeline[BPU].remaining == 0) + complete_instruction(BPU); + + if ( + pipeline[FPU1].instr && + pipeline[FPU1].remaining == 0 && + (pipeline[FPU1].instr->op == PC_FDIV || pipeline[FPU1].instr->op == PC_FDIVS) + ) + complete_instruction(FPU1); + + if (pipeline[FPU2].instr && pipeline[FPU2].remaining == 0 && !pipeline[FPU3].instr) + advance(FPU1, FPU2, FPU3); + if (pipeline[FPU1].instr && pipeline[FPU1].remaining == 0 && !pipeline[FPU2].instr) + advance(FPU1, FPU1, FPU2); + if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr) + advance(LSU1, LSU1, LSU2); +} + +static int serializes(PCode *instr) { + return instruction_timing[instr->op].serializes; +} + +MachineInfo machine603 = { + 2, + 1, + 0, + &latency, + &initialize, + &can_issue, + &issue, + &advance_clock, + &serializes, + &default_uses_vpermute_unit +}; diff --git a/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation603e.c b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation603e.c new file mode 100644 index 0000000..d3e1e47 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation603e.c @@ -0,0 +1,650 @@ +#include "compiler/Scheduler.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" + +// https://www.nxp.com/docs/en/reference-manual/MPC603EUM.pdf + +typedef enum Stage { + BPU, // Branch Prediction Unit + IU, // Integer Unit + LSU1, // Load/Store Unit + LSU2, + FPU1, // Floating Point Unit + FPU2, + FPU3, + SRU, // System Register Unit + NumStages +} Stage; + +static struct { + // the instruction currently in this pipeline stage + PCode *instr; + + // how many cycles are left for this instruction to finish + int remaining; +} pipeline[NumStages]; + +enum { + MaxEntries = 5 +}; + +static struct { + // how many entries remain unused in the queue + unsigned int free; + + // how many entries are currently used in the queue + unsigned int used; + + // the index of the next instruction that will be retired + unsigned int nextToRetire; + + // the index of the next free slot that will be used when an instruction is dispatched + unsigned int nextFreeSlot; + + // circular array of entries in the completion queue + struct { + PCode *instr; + int completed; + } entries[MaxEntries]; +} completionbuffers; + +static struct { + // the initial stage for this instruction + Stage stage; + + // the total amount of cycles required by this instruction + char latency; + + // how long it takes to finish each stage + char cycles[3]; + + // does this instruction serialise? + char serializes; +} instruction_timing[OPCODE_MAX] = { + BPU, 0, 0, 0, 0, 1, // PC_B + BPU, 0, 0, 0, 0, 1, // PC_BL + BPU, 0, 0, 0, 0, 1, // PC_BC + BPU, 0, 0, 0, 0, 1, // PC_BCLR + BPU, 0, 0, 0, 0, 1, // PC_BCCTR + BPU, 0, 0, 0, 0, 1, // PC_BT + BPU, 0, 0, 0, 0, 1, // PC_BTLR + BPU, 0, 0, 0, 0, 1, // PC_BTCTR + BPU, 0, 0, 0, 0, 1, // PC_BF + BPU, 0, 0, 0, 0, 1, // PC_BFLR + BPU, 0, 0, 0, 0, 1, // PC_BFCTR + BPU, 0, 0, 0, 0, 1, // PC_BDNZ + BPU, 0, 0, 0, 0, 1, // PC_BDNZT + BPU, 0, 0, 0, 0, 1, // PC_BDNZF + BPU, 0, 0, 0, 0, 1, // PC_BDZ + BPU, 0, 0, 0, 0, 1, // PC_BDZT + BPU, 0, 0, 0, 0, 1, // PC_BDZF + BPU, 0, 0, 0, 0, 1, // PC_BLR + BPU, 0, 0, 0, 0, 1, // PC_BCTR + BPU, 0, 0, 0, 0, 1, // PC_BCTRL + BPU, 0, 0, 0, 0, 1, // PC_BLRL + LSU1, 2, 1, 1, 0, 0, // PC_LBZ + LSU1, 2, 1, 1, 0, 0, // PC_LBZU + LSU1, 2, 1, 1, 0, 0, // PC_LBZX + LSU1, 2, 1, 1, 0, 0, // PC_LBZUX + LSU1, 2, 1, 1, 0, 0, // PC_LHZ + LSU1, 2, 1, 1, 0, 0, // PC_LHZU + LSU1, 2, 1, 1, 0, 0, // PC_LHZX + LSU1, 2, 1, 1, 0, 0, // PC_LHZUX + LSU1, 2, 1, 1, 0, 0, // PC_LHA + LSU1, 2, 1, 1, 0, 0, // PC_LHAU + LSU1, 2, 1, 1, 0, 0, // PC_LHAX + LSU1, 2, 1, 1, 0, 0, // PC_LHAUX + LSU1, 2, 1, 1, 0, 0, // PC_LHBRX + LSU1, 2, 1, 1, 0, 0, // PC_LWZ + LSU1, 2, 1, 1, 0, 0, // PC_LWZU + LSU1, 2, 1, 1, 0, 0, // PC_LWZX + LSU1, 2, 1, 1, 0, 0, // PC_LWZUX + LSU1, 2, 1, 1, 0, 0, // PC_LWBRX + LSU1, 2, 1, 1, 0, 0, // PC_LMW + LSU1, 2, 1, 1, 0, 0, // PC_STB + LSU1, 2, 1, 1, 0, 0, // PC_STBU + LSU1, 2, 1, 1, 0, 0, // PC_STBX + LSU1, 2, 1, 1, 0, 0, // PC_STBUX + LSU1, 2, 1, 1, 0, 0, // PC_STH + LSU1, 2, 1, 1, 0, 0, // PC_STHU + LSU1, 2, 1, 1, 0, 0, // PC_STHX + LSU1, 2, 1, 1, 0, 0, // PC_STHUX + LSU1, 2, 1, 1, 0, 0, // PC_STHBRX + LSU1, 2, 1, 1, 0, 0, // PC_STW + LSU1, 2, 1, 1, 0, 0, // PC_STWU + LSU1, 2, 1, 1, 0, 0, // PC_STWX + LSU1, 2, 1, 1, 0, 0, // PC_STWUX + LSU1, 2, 1, 1, 0, 0, // PC_STWBRX + LSU1, 2, 1, 1, 0, 0, // PC_STMW + LSU1, 2, 1, 1, 0, 0, // PC_DCBF + LSU1, 2, 1, 1, 0, 0, // PC_DCBST + LSU1, 2, 1, 1, 0, 0, // PC_DCBT + LSU1, 2, 1, 1, 0, 0, // PC_DCBTST + LSU1, 2, 1, 1, 0, 0, // PC_DCBZ + IU, 1, 1, 0, 0, 0, // PC_ADD + IU, 1, 1, 0, 0, 0, // PC_ADDC + IU, 1, 1, 0, 0, 0, // PC_ADDE + IU, 1, 1, 0, 0, 0, // PC_ADDI + IU, 1, 1, 0, 0, 0, // PC_ADDIC + IU, 1, 1, 0, 0, 0, // PC_ADDICR + IU, 1, 1, 0, 0, 0, // PC_ADDIS + IU, 1, 1, 0, 0, 0, // PC_ADDME + IU, 1, 1, 0, 0, 0, // PC_ADDZE + IU, 37, 37, 0, 0, 0, // PC_DIVW + IU, 37, 37, 0, 0, 0, // PC_DIVWU + IU, 5, 5, 0, 0, 0, // PC_MULHW + IU, 5, 5, 0, 0, 0, // PC_MULHWU + IU, 3, 3, 0, 0, 0, // PC_MULLI + IU, 5, 5, 0, 0, 0, // PC_MULLW + IU, 1, 1, 0, 0, 0, // PC_NEG + IU, 1, 1, 0, 0, 0, // PC_SUBF + IU, 1, 1, 0, 0, 0, // PC_SUBFC + IU, 1, 1, 0, 0, 0, // PC_SUBFE + IU, 1, 1, 0, 0, 0, // PC_SUBFIC + IU, 1, 1, 0, 0, 0, // PC_SUBFME + IU, 1, 1, 0, 0, 0, // PC_SUBFZE + IU, 3, 1, 0, 0, 0, // PC_CMPI + IU, 3, 1, 0, 0, 0, // PC_CMP + IU, 3, 1, 0, 0, 0, // PC_CMPLI + IU, 3, 1, 0, 0, 0, // PC_CMPL + IU, 1, 1, 0, 0, 0, // PC_ANDI + IU, 1, 1, 0, 0, 0, // PC_ANDIS + IU, 1, 1, 0, 0, 0, // PC_ORI + IU, 1, 1, 0, 0, 0, // PC_ORIS + IU, 1, 1, 0, 0, 0, // PC_XORI + IU, 1, 1, 0, 0, 0, // PC_XORIS + IU, 1, 1, 0, 0, 0, // PC_AND + IU, 1, 1, 0, 0, 0, // PC_OR + IU, 1, 1, 0, 0, 0, // PC_XOR + IU, 1, 1, 0, 0, 0, // PC_NAND + IU, 1, 1, 0, 0, 0, // PC_NOR + IU, 1, 1, 0, 0, 0, // PC_EQV + IU, 1, 1, 0, 0, 0, // PC_ANDC + IU, 1, 1, 0, 0, 0, // PC_ORC + IU, 1, 1, 0, 0, 0, // PC_EXTSB + IU, 1, 1, 0, 0, 0, // PC_EXTSH + IU, 1, 1, 0, 0, 0, // PC_CNTLZW + IU, 1, 1, 0, 0, 0, // PC_RLWINM + IU, 1, 1, 0, 0, 0, // PC_RLWNM + IU, 1, 1, 0, 0, 0, // PC_RLWIMI + IU, 1, 1, 0, 0, 0, // PC_SLW + IU, 1, 1, 0, 0, 0, // PC_SRW + IU, 1, 1, 0, 0, 0, // PC_SRAWI + IU, 1, 1, 0, 0, 0, // PC_SRAW + SRU, 1, 1, 0, 0, 0, // PC_CRAND + SRU, 1, 1, 0, 0, 0, // PC_CRANDC + SRU, 1, 1, 0, 0, 0, // PC_CREQV + SRU, 1, 1, 0, 0, 0, // PC_CRNAND + SRU, 1, 1, 0, 0, 0, // PC_CRNOR + SRU, 1, 1, 0, 0, 0, // PC_CROR + SRU, 1, 1, 0, 0, 0, // PC_CRORC + SRU, 1, 1, 0, 0, 0, // PC_CRXOR + SRU, 1, 1, 0, 0, 0, // PC_MCRF + SRU, 2, 2, 0, 0, 0, // PC_MTXER + SRU, 2, 2, 0, 0, 0, // PC_MTCTR + SRU, 2, 2, 0, 0, 0, // PC_MTLR + SRU, 1, 1, 0, 0, 0, // PC_MTCRF + SRU, 1, 1, 0, 0, 1, // PC_MTMSR + SRU, 1, 1, 0, 0, 1, // PC_MTSPR + SRU, 1, 1, 0, 0, 1, // PC_MFMSR + SRU, 1, 1, 0, 0, 1, // PC_MFSPR + SRU, 1, 1, 0, 0, 0, // PC_MFXER + SRU, 1, 1, 0, 0, 0, // PC_MFCTR + SRU, 1, 1, 0, 0, 0, // PC_MFLR + SRU, 1, 1, 0, 0, 0, // PC_MFCR + FPU1, 3, 1, 1, 1, 0, // PC_MFFS + FPU1, 3, 1, 1, 1, 0, // PC_MTFSF + SRU, 1, 1, 0, 0, 1, // PC_EIEIO + SRU, 1, 1, 0, 0, 1, // PC_ISYNC + SRU, 1, 1, 0, 0, 1, // PC_SYNC + SRU, 1, 1, 0, 0, 1, // PC_RFI + IU, 1, 1, 0, 0, 0, // PC_LI + IU, 1, 1, 0, 0, 0, // PC_LIS + IU, 1, 1, 0, 0, 0, // PC_MR + IU, 1, 1, 0, 0, 0, // PC_NOP + IU, 1, 1, 0, 0, 0, // PC_NOT + LSU1, 2, 1, 1, 0, 0, // PC_LFS + LSU1, 2, 1, 1, 0, 0, // PC_LFSU + LSU1, 2, 1, 1, 0, 0, // PC_LFSX + LSU1, 2, 1, 1, 0, 0, // PC_LFSUX + LSU1, 2, 1, 1, 0, 0, // PC_LFD + LSU1, 2, 1, 1, 0, 0, // PC_LFDU + LSU1, 2, 1, 1, 0, 0, // PC_LFDX + LSU1, 2, 1, 1, 0, 0, // PC_LFDUX + LSU1, 2, 1, 1, 0, 0, // PC_STFS + LSU1, 2, 1, 1, 0, 0, // PC_STFSU + LSU1, 2, 1, 1, 0, 0, // PC_STFSX + LSU1, 2, 1, 1, 0, 0, // PC_STFSUX + LSU1, 2, 1, 1, 0, 0, // PC_STFD + LSU1, 2, 1, 1, 0, 0, // PC_STFDU + LSU1, 2, 1, 1, 0, 0, // PC_STFDX + LSU1, 2, 1, 1, 0, 0, // PC_STFDUX + FPU1, 3, 1, 1, 1, 0, // PC_FMR + FPU1, 3, 1, 1, 1, 0, // PC_FABS + FPU1, 3, 1, 1, 1, 0, // PC_FNEG + FPU1, 3, 1, 1, 1, 0, // PC_FNABS + FPU1, 3, 1, 1, 1, 0, // PC_FADD + FPU1, 3, 1, 1, 1, 0, // PC_FADDS + FPU1, 3, 1, 1, 1, 0, // PC_FSUB + FPU1, 3, 1, 1, 1, 0, // PC_FSUBS + FPU1, 4, 2, 1, 1, 0, // PC_FMUL + FPU1, 3, 1, 1, 1, 0, // PC_FMULS + FPU1, 33, 33, 0, 0, 0, // PC_FDIV + FPU1, 18, 18, 0, 0, 0, // PC_FDIVS + FPU1, 4, 2, 1, 1, 0, // PC_FMADD + FPU1, 3, 1, 1, 1, 0, // PC_FMADDS + FPU1, 4, 2, 1, 1, 0, // PC_FMSUB + FPU1, 3, 1, 1, 1, 0, // PC_FMSUBS + FPU1, 4, 2, 1, 1, 0, // PC_FNMADD + FPU1, 3, 1, 1, 1, 0, // PC_FNMADDS + FPU1, 4, 2, 1, 1, 0, // PC_FNMSUB + FPU1, 3, 1, 1, 1, 0, // PC_FNMSUBS + FPU1, 18, 18, 0, 0, 0, // PC_FRES + FPU1, 3, 1, 1, 1, 0, // PC_FRSQRTE + FPU1, 3, 1, 1, 1, 0, // PC_FSEL + FPU1, 3, 1, 1, 1, 0, // PC_FRSP + FPU1, 3, 1, 1, 1, 0, // PC_FCTIW + FPU1, 3, 1, 1, 1, 0, // PC_FCTIWZ + FPU1, 5, 1, 1, 1, 0, // PC_FCMPU + FPU1, 5, 1, 1, 1, 0, // PC_FCMPO + LSU1, 1, 1, 0, 0, 0, // PC_LWARX + LSU1, 1, 1, 0, 0, 0, // PC_LSWI + LSU1, 1, 1, 0, 0, 0, // PC_LSWX + LSU1, 1, 1, 0, 0, 0, // PC_STFIWX + LSU1, 1, 1, 0, 0, 0, // PC_STSWI + LSU1, 1, 1, 0, 0, 0, // PC_STSWX + LSU1, 1, 1, 0, 0, 0, // PC_STWCX + IU, 1, 1, 0, 0, 1, // PC_ECIWX + IU, 1, 1, 0, 0, 1, // PC_ECOWX + IU, 1, 1, 0, 0, 0, // PC_DCBI + IU, 1, 1, 0, 0, 0, // PC_ICBI + IU, 1, 1, 0, 0, 0, // PC_MCRFS + IU, 1, 1, 0, 0, 0, // PC_MCRXR + IU, 1, 1, 0, 0, 0, // PC_MFTB + IU, 1, 1, 0, 0, 0, // PC_MFSR + IU, 1, 1, 0, 0, 0, // PC_MTSR + IU, 1, 1, 0, 0, 0, // PC_MFSRIN + IU, 1, 1, 0, 0, 0, // PC_MTSRIN + IU, 1, 1, 0, 0, 0, // PC_MTFSB0 + IU, 1, 1, 0, 0, 0, // PC_MTFSB1 + IU, 1, 1, 0, 0, 0, // PC_MTFSFI + IU, 1, 1, 0, 0, 1, // PC_SC + FPU1, 1, 1, 0, 0, 0, // PC_FSQRT + FPU1, 1, 1, 0, 0, 0, // PC_FSQRTS + IU, 1, 1, 0, 0, 0, // PC_TLBIA + IU, 1, 1, 0, 0, 0, // PC_TLBIE + IU, 1, 1, 0, 0, 0, // PC_TLBLD + IU, 1, 1, 0, 0, 0, // PC_TLBLI + IU, 1, 1, 0, 0, 0, // PC_TLBSYNC + IU, 1, 1, 0, 0, 1, // PC_TW + IU, 1, 1, 0, 0, 1, // PC_TRAP + IU, 1, 1, 0, 0, 1, // PC_TWI + IU, 1, 1, 0, 0, 1, // PC_OPWORD + IU, 1, 1, 0, 0, 0, // PC_MFROM + IU, 1, 1, 0, 0, 1, // PC_DSA + IU, 1, 1, 0, 0, 1, // PC_ESA + IU, 0, 0, 0, 0, 0, // PC_DCCCI + IU, 0, 0, 0, 0, 0, // PC_DCREAD + IU, 0, 0, 0, 0, 0, // PC_ICBT + IU, 0, 0, 0, 0, 0, // PC_ICCCI + IU, 0, 0, 0, 0, 0, // PC_ICREAD + IU, 0, 0, 0, 0, 0, // PC_RFCI + IU, 0, 0, 0, 0, 0, // PC_TLBRE + IU, 0, 0, 0, 0, 0, // PC_TLBSX + IU, 0, 0, 0, 0, 0, // PC_TLBWE + IU, 0, 0, 0, 0, 0, // PC_WRTEE + IU, 0, 0, 0, 0, 0, // PC_WRTEEI + IU, 0, 0, 0, 0, 0, // PC_MFDCR + IU, 0, 0, 0, 0, 0, // PC_MTDCR + IU, 0, 0, 0, 0, 0, // PC_DCBA + BPU, 0, 0, 0, 0, 0, // PC_DSS + BPU, 0, 0, 0, 0, 0, // PC_DSSALL + BPU, 0, 0, 0, 0, 0, // PC_DST + BPU, 0, 0, 0, 0, 0, // PC_DSTT + BPU, 0, 0, 0, 0, 0, // PC_DSTST + BPU, 0, 0, 0, 0, 0, // PC_DSTSTT + BPU, 0, 0, 0, 0, 0, // PC_LVEBX + BPU, 0, 0, 0, 0, 0, // PC_LVEHX + BPU, 0, 0, 0, 0, 0, // PC_LVEWX + BPU, 0, 0, 0, 0, 0, // PC_LVSL + BPU, 0, 0, 0, 0, 0, // PC_LVSR + BPU, 0, 0, 0, 0, 0, // PC_LVX + BPU, 0, 0, 0, 0, 0, // PC_LVXL + BPU, 0, 0, 0, 0, 0, // PC_STVEBX + BPU, 0, 0, 0, 0, 0, // PC_STVEHX + BPU, 0, 0, 0, 0, 0, // PC_STVEWX + BPU, 0, 0, 0, 0, 0, // PC_STVX + BPU, 0, 0, 0, 0, 0, // PC_STVXL + BPU, 0, 0, 0, 0, 0, // PC_MFVSCR + BPU, 0, 0, 0, 0, 0, // PC_MTVSCR + BPU, 0, 0, 0, 0, 0, // PC_VADDCUW + BPU, 0, 0, 0, 0, 0, // PC_VADDFP + BPU, 0, 0, 0, 0, 0, // PC_VADDSBS + BPU, 0, 0, 0, 0, 0, // PC_VADDSHS + BPU, 0, 0, 0, 0, 0, // PC_VADDSWS + BPU, 0, 0, 0, 0, 0, // PC_VADDUBM + BPU, 0, 0, 0, 0, 0, // PC_VADDUBS + BPU, 0, 0, 0, 0, 0, // PC_VADDUHM + BPU, 0, 0, 0, 0, 0, // PC_VADDUHS + BPU, 0, 0, 0, 0, 0, // PC_VADDUWM + BPU, 0, 0, 0, 0, 0, // PC_VADDUWS + BPU, 0, 0, 0, 0, 0, // PC_VAND + BPU, 0, 0, 0, 0, 0, // PC_VANDC + BPU, 0, 0, 0, 0, 0, // PC_VAVGSB + BPU, 0, 0, 0, 0, 0, // PC_VAVGSH + BPU, 0, 0, 0, 0, 0, // PC_VAVGSW + BPU, 0, 0, 0, 0, 0, // PC_VAVGUB + BPU, 0, 0, 0, 0, 0, // PC_VAVGUH + BPU, 0, 0, 0, 0, 0, // PC_VAVGUW + BPU, 0, 0, 0, 0, 0, // PC_VCFSX + BPU, 0, 0, 0, 0, 0, // PC_VCFUX + BPU, 0, 0, 0, 0, 0, // PC_VCMPBFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUB + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUH + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUW + BPU, 0, 0, 0, 0, 0, // PC_VCMPGEFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSB + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSH + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSW + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUB + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUH + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUW + BPU, 0, 0, 0, 0, 0, // PC_VCTSXS + BPU, 0, 0, 0, 0, 0, // PC_VCTUXS + BPU, 0, 0, 0, 0, 0, // PC_VEXPTEFP + BPU, 0, 0, 0, 0, 0, // PC_VLOGEFP + BPU, 0, 0, 0, 0, 0, // PC_VMAXFP + BPU, 0, 0, 0, 0, 0, // PC_VMAXSB + BPU, 0, 0, 0, 0, 0, // PC_VMAXSH + BPU, 0, 0, 0, 0, 0, // PC_VMAXSW + BPU, 0, 0, 0, 0, 0, // PC_VMAXUB + BPU, 0, 0, 0, 0, 0, // PC_VMAXUH + BPU, 0, 0, 0, 0, 0, // PC_VMAXUW + BPU, 0, 0, 0, 0, 0, // PC_VMINFP + BPU, 0, 0, 0, 0, 0, // PC_VMINSB + BPU, 0, 0, 0, 0, 0, // PC_VMINSH + BPU, 0, 0, 0, 0, 0, // PC_VMINSW + BPU, 0, 0, 0, 0, 0, // PC_VMINUB + BPU, 0, 0, 0, 0, 0, // PC_VMINUH + BPU, 0, 0, 0, 0, 0, // PC_VMINUW + BPU, 0, 0, 0, 0, 0, // PC_VMRGHB + BPU, 0, 0, 0, 0, 0, // PC_VMRGHH + BPU, 0, 0, 0, 0, 0, // PC_VMRGHW + BPU, 0, 0, 0, 0, 0, // PC_VMRGLB + BPU, 0, 0, 0, 0, 0, // PC_VMRGLH + BPU, 0, 0, 0, 0, 0, // PC_VMRGLW + BPU, 0, 0, 0, 0, 0, // PC_VMULESB + BPU, 0, 0, 0, 0, 0, // PC_VMULESH + BPU, 0, 0, 0, 0, 0, // PC_VMULEUB + BPU, 0, 0, 0, 0, 0, // PC_VMULEUH + BPU, 0, 0, 0, 0, 0, // PC_VMULOSB + BPU, 0, 0, 0, 0, 0, // PC_VMULOSH + BPU, 0, 0, 0, 0, 0, // PC_VMULOUB + BPU, 0, 0, 0, 0, 0, // PC_VMULOUH + BPU, 0, 0, 0, 0, 0, // PC_VNOR + BPU, 0, 0, 0, 0, 0, // PC_VOR + BPU, 0, 0, 0, 0, 0, // PC_VPKPX + BPU, 0, 0, 0, 0, 0, // PC_VPKSHSS + BPU, 0, 0, 0, 0, 0, // PC_VPKSHUS + BPU, 0, 0, 0, 0, 0, // PC_VPKSWSS + BPU, 0, 0, 0, 0, 0, // PC_VPKSWUS + BPU, 0, 0, 0, 0, 0, // PC_VPKUHUM + BPU, 0, 0, 0, 0, 0, // PC_VPKUHUS + BPU, 0, 0, 0, 0, 0, // PC_VPKUWUM + BPU, 0, 0, 0, 0, 0, // PC_VPKUWUS + BPU, 0, 0, 0, 0, 0, // PC_VREFP + BPU, 0, 0, 0, 0, 0, // PC_VRFIM + BPU, 0, 0, 0, 0, 0, // PC_VRFIN + BPU, 0, 0, 0, 0, 0, // PC_VRFIP + BPU, 0, 0, 0, 0, 0, // PC_VRFIZ + BPU, 0, 0, 0, 0, 0, // PC_VRLB + BPU, 0, 0, 0, 0, 0, // PC_VRLH + BPU, 0, 0, 0, 0, 0, // PC_VRLW + BPU, 0, 0, 0, 0, 0, // PC_VRSQRTEFP + BPU, 0, 0, 0, 0, 0, // PC_VSL + BPU, 0, 0, 0, 0, 0, // PC_VSLB + BPU, 0, 0, 0, 0, 0, // PC_VSLH + BPU, 0, 0, 0, 0, 0, // PC_VSLO + BPU, 0, 0, 0, 0, 0, // PC_VSLW + BPU, 0, 0, 0, 0, 0, // PC_VSPLTB + BPU, 0, 0, 0, 0, 0, // PC_VSPLTH + BPU, 0, 0, 0, 0, 0, // PC_VSPLTW + BPU, 0, 0, 0, 0, 0, // PC_VSPLTISB + BPU, 0, 0, 0, 0, 0, // PC_VSPLTISH + BPU, 0, 0, 0, 0, 0, // PC_VSPLTISW + BPU, 0, 0, 0, 0, 0, // PC_VSR + BPU, 0, 0, 0, 0, 0, // PC_VSRAB + BPU, 0, 0, 0, 0, 0, // PC_VSRAH + BPU, 0, 0, 0, 0, 0, // PC_VSRAW + BPU, 0, 0, 0, 0, 0, // PC_VSRB + BPU, 0, 0, 0, 0, 0, // PC_VSRH + BPU, 0, 0, 0, 0, 0, // PC_VSRO + BPU, 0, 0, 0, 0, 0, // PC_VSRW + BPU, 0, 0, 0, 0, 0, // PC_VSUBCUW + BPU, 0, 0, 0, 0, 0, // PC_VSUBFP + BPU, 0, 0, 0, 0, 0, // PC_VSUBSBS + BPU, 0, 0, 0, 0, 0, // PC_VSUBSHS + BPU, 0, 0, 0, 0, 0, // PC_VSUBSWS + BPU, 0, 0, 0, 0, 0, // PC_VSUBUBM + BPU, 0, 0, 0, 0, 0, // PC_VSUBUBS + BPU, 0, 0, 0, 0, 0, // PC_VSUBUHM + BPU, 0, 0, 0, 0, 0, // PC_VSUBUHS + BPU, 0, 0, 0, 0, 0, // PC_VSUBUWM + BPU, 0, 0, 0, 0, 0, // PC_VSUBUWS + BPU, 0, 0, 0, 0, 0, // PC_VSUMSWS + BPU, 0, 0, 0, 0, 0, // PC_VSUM2SWS + BPU, 0, 0, 0, 0, 0, // PC_VSUM4SBS + BPU, 0, 0, 0, 0, 0, // PC_VSUM4SHS + BPU, 0, 0, 0, 0, 0, // PC_VSUM4UBS + BPU, 0, 0, 0, 0, 0, // PC_VUPKHPX + BPU, 0, 0, 0, 0, 0, // PC_VUPKHSB + BPU, 0, 0, 0, 0, 0, // PC_VUPKHSH + BPU, 0, 0, 0, 0, 0, // PC_VUPKLPX + BPU, 0, 0, 0, 0, 0, // PC_VUPKLSB + BPU, 0, 0, 0, 0, 0, // PC_VUPKLSH + BPU, 0, 0, 0, 0, 0, // PC_VXOR + BPU, 0, 0, 0, 0, 0, // PC_VMADDFP + BPU, 0, 0, 0, 0, 0, // PC_VMHADDSHS + BPU, 0, 0, 0, 0, 0, // PC_VMHRADDSHS + BPU, 0, 0, 0, 0, 0, // PC_VMLADDUHM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMMBM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHS + BPU, 0, 0, 0, 0, 0, // PC_VMSUMUBM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHS + BPU, 0, 0, 0, 0, 0, // PC_VNMSUBFP + BPU, 0, 0, 0, 0, 0, // PC_VPERM + BPU, 0, 0, 0, 0, 0, // PC_VSEL + BPU, 0, 0, 0, 0, 0, // PC_VSLDOI + BPU, 0, 0, 0, 0, 0, // PC_VMR + BPU, 0, 0, 0, 0, 0, // PC_VMRP + BPU, 0, 0, 0, 0, 0, // PC_SLE + BPU, 0, 0, 0, 0, 0, // PC_SLEQ + BPU, 0, 0, 0, 0, 0, // PC_SLIQ + BPU, 0, 0, 0, 0, 0, // PC_SLLIQ + BPU, 0, 0, 0, 0, 0, // PC_SLLQ + BPU, 0, 0, 0, 0, 0, // PC_SLQ + BPU, 0, 0, 0, 0, 0, // PC_SRAIQ + BPU, 0, 0, 0, 0, 0, // PC_SRAQ + BPU, 0, 0, 0, 0, 0, // PC_SRE + BPU, 0, 0, 0, 0, 0, // PC_SREA + BPU, 0, 0, 0, 0, 0, // PC_SREQ + BPU, 0, 0, 0, 0, 0, // PC_SRIQ + BPU, 0, 0, 0, 0, 0, // PC_SRLIQ + BPU, 0, 0, 0, 0, 0, // PC_SRLQ + BPU, 0, 0, 0, 0, 0, // PC_SRQ + BPU, 0, 0, 0, 0, 0, // PC_MASKG + BPU, 0, 0, 0, 0, 0, // PC_MASKIR + BPU, 0, 0, 0, 0, 0, // PC_LSCBX + BPU, 0, 0, 0, 0, 0, // PC_DIV + BPU, 0, 0, 0, 0, 0, // PC_DIVS + BPU, 0, 0, 0, 0, 0, // PC_DOZ + BPU, 0, 0, 0, 0, 0, // PC_MUL + BPU, 0, 0, 0, 0, 0, // PC_NABS + BPU, 0, 0, 0, 0, 0, // PC_ABS + BPU, 0, 0, 0, 0, 0, // PC_CLCS + BPU, 0, 0, 0, 0, 0, // PC_DOZI + BPU, 0, 0, 0, 0, 0, // PC_RLMI + BPU, 0, 0, 0, 0, 0, // PC_RRIB +}; + +static void advance(int firstStage, int oldStage, int newStage) { + PCode *instr = pipeline[oldStage].instr; + int cycles = instruction_timing[instr->op].cycles[newStage - firstStage]; + pipeline[newStage].instr = instr; + pipeline[newStage].remaining = cycles; + pipeline[oldStage].instr = NULL; +} + +static void assign_completion_buffer(PCode *instr) { + completionbuffers.used++; + completionbuffers.free--; + completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr; + completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0; + completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries; +} + +static void complete_instruction(int stage) { + PCode *instr = pipeline[stage].instr; + int buf = 0; + while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr) + buf++; + + completionbuffers.entries[buf].completed = 1; + pipeline[stage].instr = NULL; +} + +static void retire_instruction(void) { + completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL; + completionbuffers.used--; + completionbuffers.free++; + completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries; +} + +static int latency(PCode *instr) { + int cycles = instruction_timing[instr->op].latency; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + cycles += 2; + if (instr->op == PC_LMW || instr->op == PC_STMW) + cycles += instr->argCount - 2; + return cycles; +} + +static void initialize(void) { + int stage; + int i; + + for (stage = 0; stage < NumStages; stage++) + pipeline[stage].instr = NULL; + + completionbuffers.free = MaxEntries; + completionbuffers.used = 0; + completionbuffers.nextToRetire = 0; + completionbuffers.nextFreeSlot = 0; + for (i = 0; i < MaxEntries; i++) + completionbuffers.entries[i].instr = NULL; +} + +static int can_issue(PCode *instr) { + int stage; + + if (completionbuffers.free == 0) + return 0; + + stage = instruction_timing[instr->op].stage; + if (pipeline[stage].instr) { + if (stage == IU) { + switch (instr->op) { + case PC_ADD: + case PC_ADDC: + case PC_ADDI: + case PC_ADDIS: + case PC_CMPI: + case PC_CMP: + case PC_CMPLI: + case PC_CMPL: + if (is_dependent(instr, pipeline[IU].instr, RegClass_GPR)) + return 0; + if (!pipeline[SRU].instr) + return 1; + } + } + return 0; + } + + if ((instr->flags & fIsWrite) && pipeline[LSU2].instr && (pipeline[LSU2].instr->flags & fIsWrite)) + return 0; + + return 1; +} + +static void issue(PCode *instr) { + int stage = instruction_timing[instr->op].stage; + int cycles = instruction_timing[instr->op].cycles[0]; + if (stage == IU && pipeline[IU].instr) + stage = SRU; + assign_completion_buffer(instr); + pipeline[stage].instr = instr; + pipeline[stage].remaining = cycles; +} + +static void advance_clock(void) { + int stage; + + for (stage = 0; stage < NumStages; stage++) { + if (pipeline[stage].instr && pipeline[stage].remaining) + --pipeline[stage].remaining; + } + + if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) { + retire_instruction(); + if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) { + retire_instruction(); + } + } + + if (pipeline[IU].instr && pipeline[IU].remaining == 0) + complete_instruction(IU); + if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0) + complete_instruction(LSU2); + if (pipeline[FPU3].instr && pipeline[FPU3].remaining == 0) + complete_instruction(FPU3); + if (pipeline[SRU].instr && pipeline[SRU].remaining == 0) + complete_instruction(SRU); + if (pipeline[BPU].instr && pipeline[BPU].remaining == 0) + complete_instruction(BPU); + + if ( + pipeline[FPU1].instr && + pipeline[FPU1].remaining == 0 && + (pipeline[FPU1].instr->op == PC_FDIV || pipeline[FPU1].instr->op == PC_FDIVS) + ) + complete_instruction(FPU1); + + if (pipeline[FPU2].instr && pipeline[FPU2].remaining == 0 && !pipeline[FPU3].instr) + advance(FPU1, FPU2, FPU3); + if (pipeline[FPU1].instr && pipeline[FPU1].remaining == 0 && !pipeline[FPU2].instr) + advance(FPU1, FPU1, FPU2); + if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr) + advance(LSU1, LSU1, LSU2); +} + +static int serializes(PCode *instr) { + return instruction_timing[instr->op].serializes; +} + +MachineInfo machine603e = { + 2, + 1, + 0, + &latency, + &initialize, + &can_issue, + &issue, + &advance_clock, + &serializes, + &default_uses_vpermute_unit +}; diff --git a/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation604.c b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation604.c new file mode 100644 index 0000000..9775c9e --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation604.c @@ -0,0 +1,670 @@ +#include "compiler/Scheduler.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" + +// https://archive.org/details/bitsavers_motorolaPosManualNov94_22719504 + +typedef enum Stage { + SCIU, // Single-Cycle Integer Unit 1 + SCIU2, // Single-Cycle Integer Unit 2 + MCIU, // Multiple-Cycle Integer Unit + FPU1, // Floating Point Unit + FPU2, + FPU3, + LSU1, // Load/Store Unit + LSU2, + BPU, // Branch Prediction Unit + NumStages +} Stage; + +static struct { + // the instruction currently in this pipeline stage + PCode *instr; + + // how many cycles are left for this instruction to finish + int remaining; +} pipeline[NumStages]; + +enum { + MaxEntries = 16 +}; + +static struct { + // how many entries remain unused in the queue + unsigned int free; + + // how many entries are currently used in the queue + unsigned int used; + + // the index of the next instruction that will be retired + unsigned int nextToRetire; + + // the index of the next free slot that will be used when an instruction is dispatched + unsigned int nextFreeSlot; + + // circular array of entries in the completion queue + struct { + PCode *instr; + int completed; + } entries[MaxEntries]; +} completionbuffers; + +static PCode *sciu_completed_instruction; +static PCode *sciu2_completed_instruction; + +static struct { + // the initial stage for this instruction + Stage stage; + + // the total amount of cycles required by this instruction + char latency; + + // how long it takes to finish each stage + char cycles[3]; + + // does this instruction serialise? + char serializes; +} instruction_timing[OPCODE_MAX] = { + BPU, 0, 0, 0, 0, 1, // PC_B + BPU, 0, 0, 0, 0, 1, // PC_BL + BPU, 0, 0, 0, 0, 1, // PC_BC + BPU, 0, 0, 0, 0, 1, // PC_BCLR + BPU, 0, 0, 0, 0, 1, // PC_BCCTR + BPU, 0, 0, 0, 0, 1, // PC_BT + BPU, 0, 0, 0, 0, 1, // PC_BTLR + BPU, 0, 0, 0, 0, 1, // PC_BTCTR + BPU, 0, 0, 0, 0, 1, // PC_BF + BPU, 0, 0, 0, 0, 1, // PC_BFLR + BPU, 0, 0, 0, 0, 1, // PC_BFCTR + BPU, 0, 0, 0, 0, 1, // PC_BDNZ + BPU, 0, 0, 0, 0, 1, // PC_BDNZT + BPU, 0, 0, 0, 0, 1, // PC_BDNZF + BPU, 0, 0, 0, 0, 1, // PC_BDZ + BPU, 0, 0, 0, 0, 1, // PC_BDZT + BPU, 0, 0, 0, 0, 1, // PC_BDZF + BPU, 0, 0, 0, 0, 1, // PC_BLR + BPU, 0, 0, 0, 0, 1, // PC_BCTR + BPU, 0, 0, 0, 0, 1, // PC_BCTRL + BPU, 0, 0, 0, 0, 1, // PC_BLRL + LSU1, 2, 1, 1, 0, 0, // PC_LBZ + LSU1, 2, 1, 1, 0, 0, // PC_LBZU + LSU1, 2, 1, 1, 0, 0, // PC_LBZX + LSU1, 2, 1, 1, 0, 0, // PC_LBZUX + LSU1, 2, 1, 1, 0, 0, // PC_LHZ + LSU1, 2, 1, 1, 0, 0, // PC_LHZU + LSU1, 2, 1, 1, 0, 0, // PC_LHZX + LSU1, 2, 1, 1, 0, 0, // PC_LHZUX + LSU1, 2, 1, 1, 0, 0, // PC_LHA + LSU1, 2, 1, 1, 0, 0, // PC_LHAU + LSU1, 2, 1, 1, 0, 0, // PC_LHAX + LSU1, 2, 1, 1, 0, 0, // PC_LHAUX + LSU1, 2, 1, 1, 0, 0, // PC_LHBRX + LSU1, 2, 1, 1, 0, 0, // PC_LWZ + LSU1, 2, 1, 1, 0, 0, // PC_LWZU + LSU1, 2, 1, 1, 0, 0, // PC_LWZX + LSU1, 2, 1, 1, 0, 0, // PC_LWZUX + LSU1, 2, 1, 1, 0, 0, // PC_LWBRX + LSU1, 2, 1, 1, 0, 0, // PC_LMW + LSU1, 3, 1, 1, 0, 0, // PC_STB + LSU1, 3, 1, 1, 0, 0, // PC_STBU + LSU1, 3, 1, 1, 0, 0, // PC_STBX + LSU1, 3, 1, 1, 0, 0, // PC_STBUX + LSU1, 3, 1, 1, 0, 0, // PC_STH + LSU1, 3, 1, 1, 0, 0, // PC_STHU + LSU1, 3, 1, 1, 0, 0, // PC_STHX + LSU1, 3, 1, 1, 0, 0, // PC_STHUX + LSU1, 3, 1, 1, 0, 0, // PC_STHBRX + LSU1, 3, 1, 1, 0, 0, // PC_STW + LSU1, 3, 1, 1, 0, 0, // PC_STWU + LSU1, 3, 1, 1, 0, 0, // PC_STWX + LSU1, 3, 1, 1, 0, 0, // PC_STWUX + LSU1, 3, 1, 1, 0, 0, // PC_STWBRX + LSU1, 2, 1, 1, 0, 0, // PC_STMW + LSU1, 2, 1, 1, 0, 0, // PC_DCBF + LSU1, 2, 1, 1, 0, 0, // PC_DCBST + LSU1, 2, 1, 1, 0, 0, // PC_DCBT + LSU1, 2, 1, 1, 0, 0, // PC_DCBTST + LSU1, 2, 1, 1, 0, 0, // PC_DCBZ + SCIU, 1, 1, 0, 0, 0, // PC_ADD + SCIU, 1, 1, 0, 0, 0, // PC_ADDC + SCIU, 1, 1, 0, 0, 0, // PC_ADDE + SCIU, 1, 1, 0, 0, 0, // PC_ADDI + SCIU, 1, 1, 0, 0, 0, // PC_ADDIC + SCIU, 1, 1, 0, 0, 0, // PC_ADDICR + SCIU, 1, 1, 0, 0, 0, // PC_ADDIS + SCIU, 1, 1, 0, 0, 0, // PC_ADDME + SCIU, 1, 1, 0, 0, 0, // PC_ADDZE + MCIU, 20, 20, 0, 0, 0, // PC_DIVW + MCIU, 20, 20, 0, 0, 0, // PC_DIVWU + MCIU, 4, 4, 0, 0, 0, // PC_MULHW + MCIU, 4, 4, 0, 0, 0, // PC_MULHWU + MCIU, 3, 3, 0, 0, 0, // PC_MULLI + MCIU, 4, 4, 0, 0, 0, // PC_MULLW + SCIU, 1, 1, 0, 0, 0, // PC_NEG + SCIU, 1, 1, 0, 0, 0, // PC_SUBF + SCIU, 1, 1, 0, 0, 0, // PC_SUBFC + SCIU, 1, 1, 0, 0, 0, // PC_SUBFE + SCIU, 1, 1, 0, 0, 0, // PC_SUBFIC + SCIU, 1, 1, 0, 0, 0, // PC_SUBFME + SCIU, 1, 1, 0, 0, 0, // PC_SUBFZE + SCIU, 3, 1, 0, 0, 0, // PC_CMPI + SCIU, 3, 1, 0, 0, 0, // PC_CMP + SCIU, 3, 1, 0, 0, 0, // PC_CMPLI + SCIU, 3, 1, 0, 0, 0, // PC_CMPL + SCIU, 1, 1, 0, 0, 0, // PC_ANDI + SCIU, 1, 1, 0, 0, 0, // PC_ANDIS + SCIU, 1, 1, 0, 0, 0, // PC_ORI + SCIU, 1, 1, 0, 0, 0, // PC_ORIS + SCIU, 1, 1, 0, 0, 0, // PC_XORI + SCIU, 1, 1, 0, 0, 0, // PC_XORIS + SCIU, 1, 1, 0, 0, 0, // PC_AND + SCIU, 1, 1, 0, 0, 0, // PC_OR + SCIU, 1, 1, 0, 0, 0, // PC_XOR + SCIU, 1, 1, 0, 0, 0, // PC_NAND + SCIU, 1, 1, 0, 0, 0, // PC_NOR + SCIU, 1, 1, 0, 0, 0, // PC_EQV + SCIU, 1, 1, 0, 0, 0, // PC_ANDC + SCIU, 1, 1, 0, 0, 0, // PC_ORC + SCIU, 1, 1, 0, 0, 0, // PC_EXTSB + SCIU, 1, 1, 0, 0, 0, // PC_EXTSH + SCIU, 1, 1, 0, 0, 0, // PC_CNTLZW + SCIU, 1, 1, 0, 0, 0, // PC_RLWINM + SCIU, 1, 1, 0, 0, 0, // PC_RLWNM + SCIU, 1, 1, 0, 0, 0, // PC_RLWIMI + SCIU, 1, 1, 0, 0, 0, // PC_SLW + SCIU, 1, 1, 0, 0, 0, // PC_SRW + SCIU, 1, 1, 0, 0, 0, // PC_SRAWI + SCIU, 1, 1, 0, 0, 0, // PC_SRAW + BPU, 1, 1, 0, 0, 0, // PC_CRAND + BPU, 1, 1, 0, 0, 0, // PC_CRANDC + BPU, 1, 1, 0, 0, 0, // PC_CREQV + BPU, 1, 1, 0, 0, 0, // PC_CRNAND + BPU, 1, 1, 0, 0, 0, // PC_CRNOR + BPU, 1, 1, 0, 0, 0, // PC_CROR + BPU, 1, 1, 0, 0, 0, // PC_CRORC + BPU, 1, 1, 0, 0, 0, // PC_CRXOR + BPU, 1, 1, 0, 0, 0, // PC_MCRF + MCIU, 1, 1, 0, 0, 0, // PC_MTXER + MCIU, 1, 1, 0, 0, 0, // PC_MTCTR + MCIU, 1, 1, 0, 0, 0, // PC_MTLR + MCIU, 1, 1, 0, 0, 0, // PC_MTCRF + MCIU, 1, 1, 0, 0, 0, // PC_MTMSR + MCIU, 1, 1, 0, 0, 0, // PC_MTSPR + MCIU, 1, 1, 0, 0, 0, // PC_MFMSR + MCIU, 1, 1, 0, 0, 0, // PC_MFSPR + MCIU, 3, 3, 0, 0, 0, // PC_MFXER + MCIU, 3, 3, 0, 0, 0, // PC_MFCTR + MCIU, 3, 3, 0, 0, 0, // PC_MFLR + MCIU, 3, 3, 0, 0, 0, // PC_MFCR + FPU1, 3, 1, 1, 1, 0, // PC_MFFS + FPU1, 3, 1, 1, 1, 0, // PC_MTFSF + LSU1, 1, 0, 0, 0, 1, // PC_EIEIO + LSU1, 1, 0, 0, 0, 1, // PC_ISYNC + LSU1, 1, 0, 0, 0, 1, // PC_SYNC + LSU1, 1, 1, 0, 0, 1, // PC_RFI + SCIU, 1, 1, 0, 0, 0, // PC_LI + SCIU, 1, 1, 0, 0, 0, // PC_LIS + SCIU, 1, 1, 0, 0, 0, // PC_MR + SCIU, 1, 1, 0, 0, 0, // PC_NOP + SCIU, 1, 1, 0, 0, 0, // PC_NOT + LSU1, 3, 1, 1, 0, 0, // PC_LFS + LSU1, 3, 1, 1, 0, 0, // PC_LFSU + LSU1, 3, 1, 1, 0, 0, // PC_LFSX + LSU1, 3, 1, 1, 0, 0, // PC_LFSUX + LSU1, 3, 1, 1, 0, 0, // PC_LFD + LSU1, 3, 1, 1, 0, 0, // PC_LFDU + LSU1, 3, 1, 1, 0, 0, // PC_LFDX + LSU1, 3, 1, 1, 0, 0, // PC_LFDUX + LSU1, 3, 1, 1, 0, 0, // PC_STFS + LSU1, 3, 1, 1, 0, 0, // PC_STFSU + LSU1, 3, 1, 1, 0, 0, // PC_STFSX + LSU1, 3, 1, 1, 0, 0, // PC_STFSUX + LSU1, 3, 1, 1, 0, 0, // PC_STFD + LSU1, 3, 1, 1, 0, 0, // PC_STFDU + LSU1, 3, 1, 1, 0, 0, // PC_STFDX + LSU1, 3, 1, 1, 0, 0, // PC_STFDUX + FPU1, 3, 1, 1, 1, 0, // PC_FMR + FPU1, 3, 1, 1, 1, 0, // PC_FABS + FPU1, 3, 1, 1, 1, 0, // PC_FNEG + FPU1, 3, 1, 1, 1, 0, // PC_FNABS + FPU1, 3, 1, 1, 1, 0, // PC_FADD + FPU1, 3, 1, 1, 1, 0, // PC_FADDS + FPU1, 3, 1, 1, 1, 0, // PC_FSUB + FPU1, 3, 1, 1, 1, 0, // PC_FSUBS + FPU1, 3, 1, 1, 1, 0, // PC_FMUL + FPU1, 3, 1, 1, 1, 0, // PC_FMULS + FPU1, 32, 32, 0, 0, 0, // PC_FDIV + FPU1, 18, 18, 0, 0, 0, // PC_FDIVS + FPU1, 3, 1, 1, 1, 0, // PC_FMADD + FPU1, 3, 1, 1, 1, 0, // PC_FMADDS + FPU1, 3, 1, 1, 1, 0, // PC_FMSUB + FPU1, 3, 1, 1, 1, 0, // PC_FMSUBS + FPU1, 3, 1, 1, 1, 0, // PC_FNMADD + FPU1, 3, 1, 1, 1, 0, // PC_FNMADDS + FPU1, 3, 1, 1, 1, 0, // PC_FNMSUB + FPU1, 3, 1, 1, 1, 0, // PC_FNMSUBS + FPU1, 18, 18, 0, 0, 0, // PC_FRES + FPU1, 3, 1, 1, 1, 0, // PC_FRSQRTE + FPU1, 3, 1, 1, 1, 0, // PC_FSEL + FPU1, 3, 1, 1, 1, 0, // PC_FRSP + FPU1, 3, 1, 1, 1, 0, // PC_FCTIW + FPU1, 3, 1, 1, 1, 0, // PC_FCTIWZ + FPU1, 5, 1, 1, 1, 0, // PC_FCMPU + FPU1, 5, 1, 1, 1, 0, // PC_FCMPO + LSU1, 1, 1, 0, 0, 0, // PC_LWARX + LSU1, 1, 1, 0, 0, 0, // PC_LSWI + LSU1, 1, 1, 0, 0, 0, // PC_LSWX + LSU1, 1, 1, 0, 0, 0, // PC_STFIWX + LSU1, 1, 1, 0, 0, 0, // PC_STSWI + LSU1, 1, 1, 0, 0, 0, // PC_STSWX + LSU1, 1, 1, 0, 0, 0, // PC_STWCX + MCIU, 1, 1, 0, 0, 1, // PC_ECIWX + MCIU, 1, 1, 0, 0, 1, // PC_ECOWX + MCIU, 1, 1, 0, 0, 0, // PC_DCBI + MCIU, 1, 1, 0, 0, 0, // PC_ICBI + MCIU, 1, 1, 0, 0, 0, // PC_MCRFS + MCIU, 1, 1, 0, 0, 0, // PC_MCRXR + MCIU, 1, 1, 0, 0, 0, // PC_MFTB + MCIU, 1, 1, 0, 0, 0, // PC_MFSR + MCIU, 1, 1, 0, 0, 0, // PC_MTSR + MCIU, 1, 1, 0, 0, 0, // PC_MFSRIN + MCIU, 1, 1, 0, 0, 0, // PC_MTSRIN + MCIU, 1, 1, 0, 0, 0, // PC_MTFSB0 + MCIU, 1, 1, 0, 0, 0, // PC_MTFSB1 + MCIU, 1, 1, 0, 0, 0, // PC_MTFSFI + MCIU, 1, 1, 0, 0, 1, // PC_SC + FPU1, 1, 1, 0, 0, 0, // PC_FSQRT + FPU1, 1, 1, 0, 0, 0, // PC_FSQRTS + MCIU, 1, 1, 0, 0, 0, // PC_TLBIA + MCIU, 1, 1, 0, 0, 0, // PC_TLBIE + MCIU, 1, 1, 0, 0, 0, // PC_TLBLD + MCIU, 1, 1, 0, 0, 0, // PC_TLBLI + MCIU, 1, 1, 0, 0, 0, // PC_TLBSYNC + MCIU, 1, 1, 0, 0, 1, // PC_TW + MCIU, 1, 1, 0, 0, 1, // PC_TRAP + MCIU, 1, 1, 0, 0, 1, // PC_TWI + MCIU, 1, 1, 0, 0, 1, // PC_OPWORD + MCIU, 1, 1, 0, 0, 0, // PC_MFROM + MCIU, 1, 1, 0, 0, 1, // PC_DSA + MCIU, 1, 1, 0, 0, 1, // PC_ESA + MCIU, 0, 0, 0, 0, 0, // PC_DCCCI + MCIU, 0, 0, 0, 0, 0, // PC_DCREAD + MCIU, 0, 0, 0, 0, 0, // PC_ICBT + MCIU, 0, 0, 0, 0, 0, // PC_ICCCI + MCIU, 0, 0, 0, 0, 0, // PC_ICREAD + MCIU, 0, 0, 0, 0, 0, // PC_RFCI + MCIU, 0, 0, 0, 0, 0, // PC_TLBRE + MCIU, 0, 0, 0, 0, 0, // PC_TLBSX + MCIU, 0, 0, 0, 0, 0, // PC_TLBWE + MCIU, 0, 0, 0, 0, 0, // PC_WRTEE + MCIU, 0, 0, 0, 0, 0, // PC_WRTEEI + MCIU, 0, 0, 0, 0, 0, // PC_MFDCR + MCIU, 0, 0, 0, 0, 0, // PC_MTDCR + MCIU, 0, 0, 0, 0, 0, // PC_DCBA + SCIU, 0, 0, 0, 0, 0, // PC_DSS + SCIU, 0, 0, 0, 0, 0, // PC_DSSALL + SCIU, 0, 0, 0, 0, 0, // PC_DST + SCIU, 0, 0, 0, 0, 0, // PC_DSTT + SCIU, 0, 0, 0, 0, 0, // PC_DSTST + SCIU, 0, 0, 0, 0, 0, // PC_DSTSTT + SCIU, 0, 0, 0, 0, 0, // PC_LVEBX + SCIU, 0, 0, 0, 0, 0, // PC_LVEHX + SCIU, 0, 0, 0, 0, 0, // PC_LVEWX + SCIU, 0, 0, 0, 0, 0, // PC_LVSL + SCIU, 0, 0, 0, 0, 0, // PC_LVSR + SCIU, 0, 0, 0, 0, 0, // PC_LVX + SCIU, 0, 0, 0, 0, 0, // PC_LVXL + SCIU, 0, 0, 0, 0, 0, // PC_STVEBX + SCIU, 0, 0, 0, 0, 0, // PC_STVEHX + SCIU, 0, 0, 0, 0, 0, // PC_STVEWX + SCIU, 0, 0, 0, 0, 0, // PC_STVX + SCIU, 0, 0, 0, 0, 0, // PC_STVXL + SCIU, 0, 0, 0, 0, 0, // PC_MFVSCR + SCIU, 0, 0, 0, 0, 0, // PC_MTVSCR + SCIU, 0, 0, 0, 0, 0, // PC_VADDCUW + SCIU, 0, 0, 0, 0, 0, // PC_VADDFP + SCIU, 0, 0, 0, 0, 0, // PC_VADDSBS + SCIU, 0, 0, 0, 0, 0, // PC_VADDSHS + SCIU, 0, 0, 0, 0, 0, // PC_VADDSWS + SCIU, 0, 0, 0, 0, 0, // PC_VADDUBM + SCIU, 0, 0, 0, 0, 0, // PC_VADDUBS + SCIU, 0, 0, 0, 0, 0, // PC_VADDUHM + SCIU, 0, 0, 0, 0, 0, // PC_VADDUHS + SCIU, 0, 0, 0, 0, 0, // PC_VADDUWM + SCIU, 0, 0, 0, 0, 0, // PC_VADDUWS + SCIU, 0, 0, 0, 0, 0, // PC_VAND + SCIU, 0, 0, 0, 0, 0, // PC_VANDC + SCIU, 0, 0, 0, 0, 0, // PC_VAVGSB + SCIU, 0, 0, 0, 0, 0, // PC_VAVGSH + SCIU, 0, 0, 0, 0, 0, // PC_VAVGSW + SCIU, 0, 0, 0, 0, 0, // PC_VAVGUB + SCIU, 0, 0, 0, 0, 0, // PC_VAVGUH + SCIU, 0, 0, 0, 0, 0, // PC_VAVGUW + SCIU, 0, 0, 0, 0, 0, // PC_VCFSX + SCIU, 0, 0, 0, 0, 0, // PC_VCFUX + SCIU, 0, 0, 0, 0, 0, // PC_VCMPBFP + SCIU, 0, 0, 0, 0, 0, // PC_VCMPEQFP + SCIU, 0, 0, 0, 0, 0, // PC_VCMPEQUB + SCIU, 0, 0, 0, 0, 0, // PC_VCMPEQUH + SCIU, 0, 0, 0, 0, 0, // PC_VCMPEQUW + SCIU, 0, 0, 0, 0, 0, // PC_VCMPGEFP + SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTFP + SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTSB + SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTSH + SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTSW + SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTUB + SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTUH + SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTUW + SCIU, 0, 0, 0, 0, 0, // PC_VCTSXS + SCIU, 0, 0, 0, 0, 0, // PC_VCTUXS + SCIU, 0, 0, 0, 0, 0, // PC_VEXPTEFP + SCIU, 0, 0, 0, 0, 0, // PC_VLOGEFP + SCIU, 0, 0, 0, 0, 0, // PC_VMAXFP + SCIU, 0, 0, 0, 0, 0, // PC_VMAXSB + SCIU, 0, 0, 0, 0, 0, // PC_VMAXSH + SCIU, 0, 0, 0, 0, 0, // PC_VMAXSW + SCIU, 0, 0, 0, 0, 0, // PC_VMAXUB + SCIU, 0, 0, 0, 0, 0, // PC_VMAXUH + SCIU, 0, 0, 0, 0, 0, // PC_VMAXUW + SCIU, 0, 0, 0, 0, 0, // PC_VMINFP + SCIU, 0, 0, 0, 0, 0, // PC_VMINSB + SCIU, 0, 0, 0, 0, 0, // PC_VMINSH + SCIU, 0, 0, 0, 0, 0, // PC_VMINSW + SCIU, 0, 0, 0, 0, 0, // PC_VMINUB + SCIU, 0, 0, 0, 0, 0, // PC_VMINUH + SCIU, 0, 0, 0, 0, 0, // PC_VMINUW + SCIU, 0, 0, 0, 0, 0, // PC_VMRGHB + SCIU, 0, 0, 0, 0, 0, // PC_VMRGHH + SCIU, 0, 0, 0, 0, 0, // PC_VMRGHW + SCIU, 0, 0, 0, 0, 0, // PC_VMRGLB + SCIU, 0, 0, 0, 0, 0, // PC_VMRGLH + SCIU, 0, 0, 0, 0, 0, // PC_VMRGLW + SCIU, 0, 0, 0, 0, 0, // PC_VMULESB + SCIU, 0, 0, 0, 0, 0, // PC_VMULESH + SCIU, 0, 0, 0, 0, 0, // PC_VMULEUB + SCIU, 0, 0, 0, 0, 0, // PC_VMULEUH + SCIU, 0, 0, 0, 0, 0, // PC_VMULOSB + SCIU, 0, 0, 0, 0, 0, // PC_VMULOSH + SCIU, 0, 0, 0, 0, 0, // PC_VMULOUB + SCIU, 0, 0, 0, 0, 0, // PC_VMULOUH + SCIU, 0, 0, 0, 0, 0, // PC_VNOR + SCIU, 0, 0, 0, 0, 0, // PC_VOR + SCIU, 0, 0, 0, 0, 0, // PC_VPKPX + SCIU, 0, 0, 0, 0, 0, // PC_VPKSHSS + SCIU, 0, 0, 0, 0, 0, // PC_VPKSHUS + SCIU, 0, 0, 0, 0, 0, // PC_VPKSWSS + SCIU, 0, 0, 0, 0, 0, // PC_VPKSWUS + SCIU, 0, 0, 0, 0, 0, // PC_VPKUHUM + SCIU, 0, 0, 0, 0, 0, // PC_VPKUHUS + SCIU, 0, 0, 0, 0, 0, // PC_VPKUWUM + SCIU, 0, 0, 0, 0, 0, // PC_VPKUWUS + SCIU, 0, 0, 0, 0, 0, // PC_VREFP + SCIU, 0, 0, 0, 0, 0, // PC_VRFIM + SCIU, 0, 0, 0, 0, 0, // PC_VRFIN + SCIU, 0, 0, 0, 0, 0, // PC_VRFIP + SCIU, 0, 0, 0, 0, 0, // PC_VRFIZ + SCIU, 0, 0, 0, 0, 0, // PC_VRLB + SCIU, 0, 0, 0, 0, 0, // PC_VRLH + SCIU, 0, 0, 0, 0, 0, // PC_VRLW + SCIU, 0, 0, 0, 0, 0, // PC_VRSQRTEFP + SCIU, 0, 0, 0, 0, 0, // PC_VSL + SCIU, 0, 0, 0, 0, 0, // PC_VSLB + SCIU, 0, 0, 0, 0, 0, // PC_VSLH + SCIU, 0, 0, 0, 0, 0, // PC_VSLO + SCIU, 0, 0, 0, 0, 0, // PC_VSLW + SCIU, 0, 0, 0, 0, 0, // PC_VSPLTB + SCIU, 0, 0, 0, 0, 0, // PC_VSPLTH + SCIU, 0, 0, 0, 0, 0, // PC_VSPLTW + SCIU, 0, 0, 0, 0, 0, // PC_VSPLTISB + SCIU, 0, 0, 0, 0, 0, // PC_VSPLTISH + SCIU, 0, 0, 0, 0, 0, // PC_VSPLTISW + SCIU, 0, 0, 0, 0, 0, // PC_VSR + SCIU, 0, 0, 0, 0, 0, // PC_VSRAB + SCIU, 0, 0, 0, 0, 0, // PC_VSRAH + SCIU, 0, 0, 0, 0, 0, // PC_VSRAW + SCIU, 0, 0, 0, 0, 0, // PC_VSRB + SCIU, 0, 0, 0, 0, 0, // PC_VSRH + SCIU, 0, 0, 0, 0, 0, // PC_VSRO + SCIU, 0, 0, 0, 0, 0, // PC_VSRW + SCIU, 0, 0, 0, 0, 0, // PC_VSUBCUW + SCIU, 0, 0, 0, 0, 0, // PC_VSUBFP + SCIU, 0, 0, 0, 0, 0, // PC_VSUBSBS + SCIU, 0, 0, 0, 0, 0, // PC_VSUBSHS + SCIU, 0, 0, 0, 0, 0, // PC_VSUBSWS + SCIU, 0, 0, 0, 0, 0, // PC_VSUBUBM + SCIU, 0, 0, 0, 0, 0, // PC_VSUBUBS + SCIU, 0, 0, 0, 0, 0, // PC_VSUBUHM + SCIU, 0, 0, 0, 0, 0, // PC_VSUBUHS + SCIU, 0, 0, 0, 0, 0, // PC_VSUBUWM + SCIU, 0, 0, 0, 0, 0, // PC_VSUBUWS + SCIU, 0, 0, 0, 0, 0, // PC_VSUMSWS + SCIU, 0, 0, 0, 0, 0, // PC_VSUM2SWS + SCIU, 0, 0, 0, 0, 0, // PC_VSUM4SBS + SCIU, 0, 0, 0, 0, 0, // PC_VSUM4SHS + SCIU, 0, 0, 0, 0, 0, // PC_VSUM4UBS + SCIU, 0, 0, 0, 0, 0, // PC_VUPKHPX + SCIU, 0, 0, 0, 0, 0, // PC_VUPKHSB + SCIU, 0, 0, 0, 0, 0, // PC_VUPKHSH + SCIU, 0, 0, 0, 0, 0, // PC_VUPKLPX + SCIU, 0, 0, 0, 0, 0, // PC_VUPKLSB + SCIU, 0, 0, 0, 0, 0, // PC_VUPKLSH + SCIU, 0, 0, 0, 0, 0, // PC_VXOR + SCIU, 0, 0, 0, 0, 0, // PC_VMADDFP + SCIU, 0, 0, 0, 0, 0, // PC_VMHADDSHS + SCIU, 0, 0, 0, 0, 0, // PC_VMHRADDSHS + SCIU, 0, 0, 0, 0, 0, // PC_VMLADDUHM + SCIU, 0, 0, 0, 0, 0, // PC_VMSUMMBM + SCIU, 0, 0, 0, 0, 0, // PC_VMSUMSHM + SCIU, 0, 0, 0, 0, 0, // PC_VMSUMSHS + SCIU, 0, 0, 0, 0, 0, // PC_VMSUMUBM + SCIU, 0, 0, 0, 0, 0, // PC_VMSUMUHM + SCIU, 0, 0, 0, 0, 0, // PC_VMSUMUHS + SCIU, 0, 0, 0, 0, 0, // PC_VNMSUBFP + SCIU, 0, 0, 0, 0, 0, // PC_VPERM + SCIU, 0, 0, 0, 0, 0, // PC_VSEL + SCIU, 0, 0, 0, 0, 0, // PC_VSLDOI + SCIU, 0, 0, 0, 0, 0, // PC_VMR + SCIU, 0, 0, 0, 0, 0, // PC_VMRP + SCIU, 0, 0, 0, 0, 0, // PC_SLE + SCIU, 0, 0, 0, 0, 0, // PC_SLEQ + SCIU, 0, 0, 0, 0, 0, // PC_SLIQ + SCIU, 0, 0, 0, 0, 0, // PC_SLLIQ + SCIU, 0, 0, 0, 0, 0, // PC_SLLQ + SCIU, 0, 0, 0, 0, 0, // PC_SLQ + SCIU, 0, 0, 0, 0, 0, // PC_SRAIQ + SCIU, 0, 0, 0, 0, 0, // PC_SRAQ + SCIU, 0, 0, 0, 0, 0, // PC_SRE + SCIU, 0, 0, 0, 0, 0, // PC_SREA + SCIU, 0, 0, 0, 0, 0, // PC_SREQ + SCIU, 0, 0, 0, 0, 0, // PC_SRIQ + SCIU, 0, 0, 0, 0, 0, // PC_SRLIQ + SCIU, 0, 0, 0, 0, 0, // PC_SRLQ + SCIU, 0, 0, 0, 0, 0, // PC_SRQ + SCIU, 0, 0, 0, 0, 0, // PC_MASKG + SCIU, 0, 0, 0, 0, 0, // PC_MASKIR + SCIU, 0, 0, 0, 0, 0, // PC_LSCBX + SCIU, 0, 0, 0, 0, 0, // PC_DIV + SCIU, 0, 0, 0, 0, 0, // PC_DIVS + SCIU, 0, 0, 0, 0, 0, // PC_DOZ + SCIU, 0, 0, 0, 0, 0, // PC_MUL + SCIU, 0, 0, 0, 0, 0, // PC_NABS + SCIU, 0, 0, 0, 0, 0, // PC_ABS + SCIU, 0, 0, 0, 0, 0, // PC_CLCS + SCIU, 0, 0, 0, 0, 0, // PC_DOZI + SCIU, 0, 0, 0, 0, 0, // PC_RLMI + SCIU, 0, 0, 0, 0, 0, // PC_RRIB +}; + +static void advance(int firstStage, int oldStage, int newStage) { + PCode *instr = pipeline[oldStage].instr; + int cycles = instruction_timing[instr->op].cycles[newStage - firstStage]; + pipeline[newStage].instr = instr; + pipeline[newStage].remaining = cycles; + pipeline[oldStage].instr = NULL; +} + +static void assign_completion_buffer(PCode *instr) { + completionbuffers.used++; + completionbuffers.free--; + completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr; + completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0; + completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries; +} + +static void complete_instruction(int stage) { + PCode *instr = pipeline[stage].instr; + int buf = 0; + while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr) + buf++; + + completionbuffers.entries[buf].completed = 1; + pipeline[stage].instr = NULL; + + if (stage == SCIU) + sciu_completed_instruction = instr; + else if (stage == SCIU2) + sciu2_completed_instruction = instr; +} + +static void retire_instruction(void) { + completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL; + completionbuffers.used--; + completionbuffers.free++; + completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries; +} + +static int latency(PCode *instr) { + int cycles = instruction_timing[instr->op].latency; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + cycles += 2; + if (instr->op == PC_LMW || instr->op == PC_STMW) + cycles += instr->argCount - 2; + return cycles; +} + +static void initialize(void) { + int stage; + int i; + + for (stage = 0; stage < NumStages; stage++) + pipeline[stage].instr = NULL; + + completionbuffers.free = MaxEntries; + completionbuffers.used = 0; + completionbuffers.nextToRetire = 0; + completionbuffers.nextFreeSlot = 0; + for (i = 0; i < MaxEntries; i++) + completionbuffers.entries[i].instr = NULL; + + sciu_completed_instruction = NULL; + sciu2_completed_instruction = NULL; +} + +static int can_issue(PCode *instr) { + PCode *check; + int stage = instruction_timing[instr->op].stage; + + if (completionbuffers.free == 0) + return 0; + + if (stage == SCIU) { + int isClear1 = !pipeline[SCIU].instr; + int isClear2 = !pipeline[SCIU2].instr; + if (!isClear1 && !isClear2) + return 0; + if (isClear1 && isClear2) + return 1; + + if (isClear1) + check = pipeline[SCIU2].instr; + else + check = pipeline[SCIU].instr; + + if (is_dependent(instr, check, RegClass_GPR)) + return 0; + if (is_dependent(instr, sciu_completed_instruction, RegClass_GPR)) + return 0; + if (is_dependent(instr, sciu2_completed_instruction, RegClass_GPR)) + return 0; + } else { + if (pipeline[stage].instr) + return 0; + } + + return 1; +} + +static void issue(PCode *instr) { + int stage = instruction_timing[instr->op].stage; + int cycles = instruction_timing[instr->op].cycles[0]; + if (stage == SCIU && pipeline[SCIU].instr) + stage = SCIU2; + assign_completion_buffer(instr); + pipeline[stage].instr = instr; + pipeline[stage].remaining = cycles; +} + +static void advance_clock(void) { + int stage; + int i; + + sciu_completed_instruction = NULL; + sciu2_completed_instruction = NULL; + + for (stage = 0; stage < NumStages; stage++) { + if (pipeline[stage].instr && pipeline[stage].remaining) + --pipeline[stage].remaining; + } + + for (i = 0; i < 5; i++) { + if (completionbuffers.used == 0) + break; + if (completionbuffers.entries[completionbuffers.nextToRetire].completed == 0) + break; + retire_instruction(); + } + + if (pipeline[SCIU].instr && pipeline[SCIU].remaining == 0) + complete_instruction(SCIU); + if (pipeline[SCIU2].instr && pipeline[SCIU2].remaining == 0) + complete_instruction(SCIU2); + if (pipeline[MCIU].instr && pipeline[MCIU].remaining == 0) + complete_instruction(MCIU); + if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0) + complete_instruction(LSU2); + if (pipeline[FPU3].instr && pipeline[FPU3].remaining == 0) + complete_instruction(FPU3); + if (pipeline[BPU].instr && pipeline[BPU].remaining == 0) + complete_instruction(BPU); + + if ( + pipeline[FPU1].instr && + pipeline[FPU1].remaining == 0 && + (pipeline[FPU1].instr->op == PC_FDIV || pipeline[FPU1].instr->op == PC_FDIVS) + ) + complete_instruction(FPU1); + + if (pipeline[FPU2].instr && pipeline[FPU2].remaining == 0 && !pipeline[FPU3].instr) + advance(FPU1, FPU2, FPU3); + if (pipeline[FPU1].instr && pipeline[FPU1].remaining == 0 && !pipeline[FPU2].instr) + advance(FPU1, FPU1, FPU2); + if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr) + advance(LSU1, LSU1, LSU2); +} + +static int serializes(PCode *instr) { + return instruction_timing[instr->op].serializes; +} + +MachineInfo machine604 = { + 4, + 1, + 0, + &latency, + &initialize, + &can_issue, + &issue, + &advance_clock, + &serializes, + &default_uses_vpermute_unit +}; diff --git a/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation7400.c b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation7400.c new file mode 100644 index 0000000..56b375c --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation7400.c @@ -0,0 +1,744 @@ +#include "compiler/Scheduler.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" + +// https://www.nxp.com/docs/en/reference-manual/MPC7410UM.pdf + +typedef enum Stage { + BPU, // Branch Prediction Unit + IU1, // Integer Unit 1 + IU2, // Integer Unit 2 + + LSU1, // Load/Store Unit + LSU2, + + FPU1, // Floating Point Unit + FPU2, + FPU3, + + SRU, // System Register Unit + VSIU, // Vector Simple Integer Unit + VPU, // AltiVec Permute Unit + + VCIU1, // Vector Complex Integer Unit + VCIU2, + VCIU3, + + VFPU1, // Vector Floating-Point Unit + VFPU2, + VFPU3, + VFPU4, + + NumStages +} Stage; + +static struct { + // the instruction currently in this pipeline stage + PCode *instr; + + // how many cycles are left for this instruction to finish + int remaining; +} pipeline[NumStages]; + +static PCode *iu1_completed_instruction; +static PCode *iu2_completed_instruction; + +enum { + MaxEntries = 8 +}; + +static struct { + // how many entries remain unused in the queue + unsigned int free; + + // how many entries are currently used in the queue + unsigned int used; + + // the index of the next instruction that will be retired + unsigned int nextToRetire; + + // the index of the next free slot that will be used when an instruction is dispatched + unsigned int nextFreeSlot; + + // circular array of entries in the completion queue + struct { + PCode *instr; + int completed; + } entries[MaxEntries]; +} completionbuffers; + +static struct { + // the initial stage for this instruction + Stage stage; + + // the total amount of cycles required by this instruction + char latency; + + // how long it takes to finish each stage + char cycles[4]; + + // does this instruction serialise? + char serializes; + + char unused; +} instruction_timing[OPCODE_MAX] = { + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_B + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BL + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BC + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCLR + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCCTR + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BT + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BTLR + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BTCTR + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BF + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BFLR + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BFCTR + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZT + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZF + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZT + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZF + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BLR + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCTR + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCTRL + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BLRL + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LBZ + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LBZU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LBZX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LBZUX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHZ + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHZU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHZX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHZUX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHA + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHAU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHAX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHAUX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHBRX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LWZ + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LWZU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LWZX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LWZUX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LWBRX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LMW + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STB + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STBU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STBX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STBUX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STH + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STHU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STHX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STHUX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STHBRX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STW + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STWU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STWX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STWUX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STWBRX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STMW + LSU1, 3, 1, 2, 0, 0, 0, 0, // PC_DCBF + LSU1, 3, 1, 2, 0, 0, 0, 0, // PC_DCBST + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DCBT + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DCBTST + LSU1, 3, 1, 2, 0, 0, 0, 0, // PC_DCBZ + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADD + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDC + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDE + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDI + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDIC + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDICR + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDIS + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDME + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDZE + IU1, 19, 19, 0, 0, 0, 0, 0, // PC_DIVW + IU1, 19, 19, 0, 0, 0, 0, 0, // PC_DIVWU + IU1, 5, 5, 0, 0, 0, 0, 0, // PC_MULHW + IU1, 6, 5, 0, 0, 0, 0, 0, // PC_MULHWU + IU1, 3, 3, 0, 0, 0, 0, 0, // PC_MULLI + IU1, 5, 5, 0, 0, 0, 0, 0, // PC_MULLW + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_NEG + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBF + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFC + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFE + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFIC + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFME + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFZE + IU2, 3, 1, 0, 0, 0, 0, 0, // PC_CMPI + IU2, 3, 1, 0, 0, 0, 0, 0, // PC_CMP + IU2, 3, 1, 0, 0, 0, 0, 0, // PC_CMPLI + IU2, 3, 1, 0, 0, 0, 0, 0, // PC_CMPL + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ANDI + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ANDIS + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ORI + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ORIS + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_XORI + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_XORIS + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_AND + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_OR + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_XOR + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_NAND + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_NOR + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_EQV + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ANDC + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ORC + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_EXTSB + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_EXTSH + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_CNTLZW + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_RLWINM + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_RLWNM + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_RLWIMI + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SLW + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SRW + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SRAWI + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SRAW + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRAND + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRANDC + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CREQV + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRNAND + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRNOR + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CROR + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRORC + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRXOR + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MCRF + SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTXER + SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTCTR + SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTLR + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MTCRF + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MTMSR + SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTSPR + SRU, 1, 1, 0, 0, 0, 0, 0, // PC_MFMSR + SRU, 3, 3, 0, 0, 0, 1, 0, // PC_MFSPR + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MFXER + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MFCTR + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MFLR + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MFCR + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_MFFS + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_MTFSF + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_EIEIO + SRU, 2, 2, 0, 0, 0, 1, 0, // PC_ISYNC + SRU, 3, 3, 0, 0, 0, 1, 0, // PC_SYNC + SRU, 2, 2, 0, 0, 0, 1, 0, // PC_RFI + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_LI + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_LIS + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_MR + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_NOP + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_NOT + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFS + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFSU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFSX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFSUX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFD + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFDU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFDX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFDUX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFS + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFSU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFSX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFSUX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFD + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFDU + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFDX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFDUX + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FMR + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FABS + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FNEG + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FNABS + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FADD + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FADDS + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FSUB + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FSUBS + FPU1, 4, 2, 1, 1, 0, 0, 0, // PC_FMUL + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FMULS + FPU1, 31, 31, 0, 0, 0, 0, 0, // PC_FDIV + FPU1, 17, 17, 0, 0, 0, 0, 0, // PC_FDIVS + FPU1, 4, 2, 1, 1, 0, 0, 0, // PC_FMADD + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FMADDS + FPU1, 4, 2, 1, 1, 0, 0, 0, // PC_FMSUB + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FMSUBS + FPU1, 4, 2, 1, 1, 0, 0, 0, // PC_FNMADD + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FNMADDS + FPU1, 4, 2, 1, 1, 0, 0, 0, // PC_FNMSUB + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FNMSUBS + FPU1, 10, 10, 0, 0, 0, 0, 0, // PC_FRES + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FRSQRTE + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FSEL + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FRSP + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FCTIW + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FCTIWZ + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FCMPU + FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FCMPO + LSU1, 2, 1, 1, 0, 0, 1, 0, // PC_LWARX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LSWI + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LSWX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFIWX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STSWI + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STSWX + LSU1, 2, 1, 1, 0, 0, 1, 0, // PC_STWCX + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ECIWX + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ECOWX + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_DCBI + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ICBI + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MCRFS + SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MCRXR + SRU, 1, 1, 0, 0, 0, 0, 0, // PC_MFTB + SRU, 3, 3, 0, 0, 0, 0, 0, // PC_MFSR + SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTSR + SRU, 3, 3, 0, 0, 0, 0, 0, // PC_MFSRIN + SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTSRIN + FPU1, 1, 1, 0, 0, 0, 0, 0, // PC_MTFSB0 + FPU1, 1, 1, 0, 0, 0, 0, 0, // PC_MTFSB1 + FPU1, 1, 1, 0, 0, 0, 0, 0, // PC_MTFSFI + SRU, 2, 2, 0, 0, 0, 1, 0, // PC_SC + FPU1, 1, 1, 0, 0, 0, 0, 0, // PC_FSQRT + FPU1, 1, 1, 0, 0, 0, 0, 0, // PC_FSQRTS + LSU1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBIA + LSU1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBIE + LSU1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBLD + LSU1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBLI + LSU1, 1, 1, 0, 0, 0, 1, 0, // PC_TLBSYNC + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_TW + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_TRAP + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_TWI + IU2, 1, 1, 0, 0, 0, 1, 0, // PC_OPWORD + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_MFROM + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_DSA + IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ESA + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_DCCCI + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_DCREAD + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_ICBT + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_ICCCI + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_ICREAD + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_RFCI + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_TLBRE + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_TLBSX + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_TLBWE + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_WRTEE + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_WRTEEI + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_MFDCR + IU2, 1, 0, 0, 0, 0, 0, 0, // PC_MTDCR + LSU1, 3, 1, 2, 0, 0, 0, 0, // PC_DCBA + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DSS + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DSSALL + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DST + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DSTT + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DSTST + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DSTSTT + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVEBX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVEHX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVEWX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVSL + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVSR + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVXL + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STVEBX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STVEHX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STVEWX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STVX + LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STVXL + VSIU, 1, 1, 0, 0, 0, 1, 0, // PC_MFVSCR + VSIU, 1, 1, 0, 0, 0, 1, 0, // PC_MTVSCR + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDCUW + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VADDFP + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSBS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSHS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSWS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUBM + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUBS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUHM + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUHS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUWM + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUWS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAND + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VANDC + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSW + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUW + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCFSX + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCFUX + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCMPBFP + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCMPEQFP + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUW + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCMPGEFP + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCMPGTFP + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSW + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUW + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCTSXS + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCTUXS + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VEXPTEFP + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VLOGEFP + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VMAXFP + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSW + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUW + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VMINFP + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSW + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUW + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGHB + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGHH + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGHW + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGLB + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGLH + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGLW + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULESB + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULESH + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULEUB + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULEUH + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULOSB + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULOSH + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULOUB + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULOUH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VNOR + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VOR + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKPX + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKSHSS + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKSHUS + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKSWSS + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKSWUS + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKUHUM + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKUHUS + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKUWUM + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKUWUS + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VREFP + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIM + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIN + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIP + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIZ + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VRLB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VRLH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VRLW + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VRSQRTEFP + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSL + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSLB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSLH + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSLO + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSLW + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTB + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTH + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTW + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTISB + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTISH + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTISW + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSR + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAW + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRB + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRH + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRO + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRW + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBCUW + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUBFP + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSBS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSHS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSWS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUBM + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUBS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUHM + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUHS + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUWM + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUWS + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VSUMSWS + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VSUM2SWS + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VSUM4SBS + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VSUM4SHS + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VSUM4UBS + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKHPX + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKHSB + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKHSH + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKLPX + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKLSB + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKLSH + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VXOR + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VMADDFP + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMHADDSHS + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMHRADDSHS + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMLADDUHM + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMMBM + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMSHM + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMSHS + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMUBM + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMUHM + VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMUHS + VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VNMSUBFP + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPERM + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSEL + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSLDOI + VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMR + VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRP + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLE + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLEQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLIQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLLIQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLLQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRAIQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRAQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRE + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SREA + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SREQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRIQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRLIQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRLQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRQ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_MASKG + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_MASKIR + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_LSCBX + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_DIV + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_DIVS + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_DOZ + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_MUL + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_NABS + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_ABS + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_CLCS + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_DOZI + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_RLMI + BPU, 0, 0, 0, 0, 0, 0, 0, // PC_RRIB +}; + +static void advance(int firstStage, int oldStage, int newStage) { + PCode *instr = pipeline[oldStage].instr; + int cycles = instruction_timing[instr->op].cycles[newStage - firstStage]; + pipeline[newStage].instr = instr; + pipeline[newStage].remaining = cycles; + pipeline[oldStage].instr = NULL; +} + +static void assign_completion_buffer(PCode *instr) { + completionbuffers.used++; + completionbuffers.free--; + completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr; + completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0; + completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries; +} + +static void complete_instruction(int stage) { + PCode *instr = pipeline[stage].instr; + int buf = 0; + while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr) + buf++; + + completionbuffers.entries[buf].completed = 1; + pipeline[stage].instr = NULL; + + if (stage == IU1) + iu1_completed_instruction = instr; + else if (stage == IU2) + iu2_completed_instruction = instr; +} + +static void retire_instruction(void) { + completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL; + completionbuffers.used--; + completionbuffers.free++; + completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries; +} + +static int latency(PCode *instr) { + int cycles = instruction_timing[instr->op].latency; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + cycles += 2; + if (instr->op == PC_LMW || instr->op == PC_STMW) + cycles += instr->argCount - 2; + return cycles; +} + +static void initialize(void) { + int stage; + int i; + + for (stage = 0; stage < NumStages; stage++) + pipeline[stage].instr = NULL; + + completionbuffers.free = MaxEntries; + completionbuffers.used = 0; + completionbuffers.nextToRetire = 0; + completionbuffers.nextFreeSlot = 0; + for (i = 0; i < MaxEntries; i++) + completionbuffers.entries[i].instr = NULL; + + iu1_completed_instruction = NULL; + iu2_completed_instruction = NULL; +} + +static int can_issue(PCode *instr) { + int stage; + + if (completionbuffers.free == 0) + return 0; + + stage = instruction_timing[instr->op].stage; + + if (stage == IU2) { + PCode *check; + int isClear1 = !pipeline[IU1].instr; + int isClear2 = !pipeline[IU2].instr; + if (!isClear1 && !isClear2) + return 0; + if (isClear1 && isClear2) + return 1; + + if (isClear1) + check = pipeline[IU2].instr; + else + check = pipeline[IU1].instr; + + if (is_dependent(instr, check, RegClass_GPR)) + return 0; + if (is_dependent(instr, iu1_completed_instruction, RegClass_GPR)) + return 0; + if (is_dependent(instr, iu2_completed_instruction, RegClass_GPR)) + return 0; + } else if (stage == VFPU1 || stage == VCIU1 || stage == VSIU || stage == VPU) { + PCode *check; + int isVpuClear = !pipeline[VPU].instr; + int isVFpuClear = !pipeline[VFPU1].instr; + int isVCiuClear = !pipeline[VCIU1].instr; + int isVSiuClear = !pipeline[VSIU].instr; + + if (stage == VPU) { + if (!isVpuClear) + return 0; + + if (!isVFpuClear) + check = pipeline[VFPU1].instr; + else if (!isVCiuClear) + check = pipeline[VCIU1].instr; + else if (!isVSiuClear) + check = pipeline[VSIU].instr; + else + check = NULL; + + if (is_dependent(instr, check, RegClass_VR)) + return 0; + } else { + if (!isVFpuClear || !isVCiuClear || !isVSiuClear) + return 0; + + if (!isVpuClear && is_dependent(instr, pipeline[VPU].instr, RegClass_VR)) + return 0; + } + } else { + if (pipeline[stage].instr) + return 0; + } + + if ((instr->flags & fIsWrite) && pipeline[LSU2].instr && (pipeline[LSU2].instr->flags & fIsWrite)) + return 0; + + return 1; +} + +static void issue(PCode *instr) { + int stage = instruction_timing[instr->op].stage; + int cycles = instruction_timing[instr->op].cycles[0]; + assign_completion_buffer(instr); + if (stage == IU2 && !pipeline[IU1].instr) + stage = IU1; + pipeline[stage].instr = instr; + pipeline[stage].remaining = cycles; +} + +static void advance_clock(void) { + int stage; + + iu1_completed_instruction = NULL; + iu2_completed_instruction = NULL; + + for (stage = 0; stage < NumStages; stage++) { + if (pipeline[stage].instr && pipeline[stage].remaining) + --pipeline[stage].remaining; + } + + if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) { + retire_instruction(); + if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) { + retire_instruction(); + } + } + + if (pipeline[IU1].instr && pipeline[IU1].remaining == 0) + complete_instruction(IU1); + if (pipeline[VPU].instr && pipeline[VPU].remaining == 0) + complete_instruction(VPU); + if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0) + complete_instruction(LSU2); + if (pipeline[FPU3].instr && pipeline[FPU3].remaining == 0) + complete_instruction(FPU3); + if (pipeline[SRU].instr && pipeline[SRU].remaining == 0) + complete_instruction(SRU); + if (pipeline[BPU].instr && pipeline[BPU].remaining == 0) + complete_instruction(BPU); + if (pipeline[VSIU].instr && pipeline[VSIU].remaining == 0) + complete_instruction(VSIU); + if (pipeline[VCIU3].instr && pipeline[VCIU3].remaining == 0) + complete_instruction(VCIU3); + if (pipeline[VFPU4].instr && pipeline[VFPU4].remaining == 0) + complete_instruction(VFPU4); + if (pipeline[IU2].instr && pipeline[IU2].remaining == 0) + complete_instruction(IU2); + + if ( + pipeline[FPU1].instr && + pipeline[FPU1].remaining == 0 && + (pipeline[FPU1].instr->op == PC_FDIV || pipeline[FPU1].instr->op == PC_FDIVS) + ) + complete_instruction(FPU1); + + if (pipeline[FPU2].instr && pipeline[FPU2].remaining == 0 && !pipeline[FPU3].instr) + advance(FPU1, FPU2, FPU3); + if (pipeline[FPU1].instr && pipeline[FPU1].remaining == 0 && !pipeline[FPU2].instr) + advance(FPU1, FPU1, FPU2); + + if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr) + advance(LSU1, LSU1, LSU2); + + if (pipeline[VCIU2].instr && pipeline[VCIU2].remaining == 0 && !pipeline[VCIU3].instr) + advance(VCIU1, VCIU2, VCIU3); + if (pipeline[VCIU1].instr && pipeline[VCIU1].remaining == 0 && !pipeline[VCIU2].instr) + advance(VCIU1, VCIU1, VCIU2); + + if (pipeline[VFPU3].instr && pipeline[VFPU3].remaining == 0 && !pipeline[VFPU4].instr) + advance(VFPU1, VFPU3, VFPU4); + if (pipeline[VFPU2].instr && pipeline[VFPU2].remaining == 0 && !pipeline[VFPU3].instr) + advance(VFPU1, VFPU2, VFPU3); + if (pipeline[VFPU1].instr && pipeline[VFPU1].remaining == 0 && !pipeline[VFPU2].instr) + advance(VFPU1, VFPU1, VFPU2); +} + +static int serializes(PCode *instr) { + return instruction_timing[instr->op].serializes; +} + +static int uses_vpermute_unit_7400(PCode *instr) { + return instruction_timing[instr->op].stage == VPU; +} + +MachineInfo machine7400 = { + 2, + 1, + 0, + &latency, + &initialize, + &can_issue, + &issue, + &advance_clock, + &serializes, + &uses_vpermute_unit_7400 +}; diff --git a/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation750.c b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation750.c new file mode 100644 index 0000000..d412df3 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation750.c @@ -0,0 +1,678 @@ +#include "compiler/Scheduler.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" + +// https://www.nxp.com/docs/en/reference-manual/MPC750UM.pdf + +typedef enum Stage { + BPU, // Branch Prediction Unit + IU1, // Integer Unit 1 + IU2, // Integer Unit 2 + + LSU1, // Load/Store Unit + LSU2, + + FPU1, // Floating Point Unit + FPU2, + FPU3, + + SRU, // System Register Unit + + NumStages +} Stage; + +static struct { + // the instruction currently in this pipeline stage + PCode *instr; + + // how many cycles are left for this instruction to finish + int remaining; +} pipeline[NumStages]; + +static PCode *iu1_completed_instruction; +static PCode *iu2_completed_instruction; + +enum { + MaxEntries = 6 +}; + +static struct { + // how many entries remain unused in the queue + unsigned int free; + + // how many entries are currently used in the queue + unsigned int used; + + // the index of the next instruction that will be retired + unsigned int nextToRetire; + + // the index of the next free slot that will be used when an instruction is dispatched + unsigned int nextFreeSlot; + + // circular array of entries in the completion queue + struct { + PCode *instr; + int completed; + } entries[MaxEntries]; +} completionbuffers; + +static struct { + // the initial stage for this instruction + Stage stage; + + // the total amount of cycles required by this instruction + char latency; + + // how long it takes to finish each stage + char cycles[3]; + + // does this instruction serialise? + char serializes; +} instruction_timing[OPCODE_MAX] = { + BPU, 0, 0, 0, 0, 1, // PC_B + BPU, 0, 0, 0, 0, 1, // PC_BL + BPU, 0, 0, 0, 0, 1, // PC_BC + BPU, 0, 0, 0, 0, 1, // PC_BCLR + BPU, 0, 0, 0, 0, 1, // PC_BCCTR + BPU, 0, 0, 0, 0, 1, // PC_BT + BPU, 0, 0, 0, 0, 1, // PC_BTLR + BPU, 0, 0, 0, 0, 1, // PC_BTCTR + BPU, 0, 0, 0, 0, 1, // PC_BF + BPU, 0, 0, 0, 0, 1, // PC_BFLR + BPU, 0, 0, 0, 0, 1, // PC_BFCTR + BPU, 0, 0, 0, 0, 1, // PC_BDNZ + BPU, 0, 0, 0, 0, 1, // PC_BDNZT + BPU, 0, 0, 0, 0, 1, // PC_BDNZF + BPU, 0, 0, 0, 0, 1, // PC_BDZ + BPU, 0, 0, 0, 0, 1, // PC_BDZT + BPU, 0, 0, 0, 0, 1, // PC_BDZF + BPU, 0, 0, 0, 0, 1, // PC_BLR + BPU, 0, 0, 0, 0, 1, // PC_BCTR + BPU, 0, 0, 0, 0, 1, // PC_BCTRL + BPU, 0, 0, 0, 0, 1, // PC_BLRL + LSU1, 2, 1, 1, 0, 0, // PC_LBZ + LSU1, 2, 1, 1, 0, 0, // PC_LBZU + LSU1, 2, 1, 1, 0, 0, // PC_LBZX + LSU1, 2, 1, 1, 0, 0, // PC_LBZUX + LSU1, 2, 1, 1, 0, 0, // PC_LHZ + LSU1, 2, 1, 1, 0, 0, // PC_LHZU + LSU1, 2, 1, 1, 0, 0, // PC_LHZX + LSU1, 2, 1, 1, 0, 0, // PC_LHZUX + LSU1, 2, 1, 1, 0, 0, // PC_LHA + LSU1, 2, 1, 1, 0, 0, // PC_LHAU + LSU1, 2, 1, 1, 0, 0, // PC_LHAX + LSU1, 2, 1, 1, 0, 0, // PC_LHAUX + LSU1, 2, 1, 1, 0, 0, // PC_LHBRX + LSU1, 2, 1, 1, 0, 0, // PC_LWZ + LSU1, 2, 1, 1, 0, 0, // PC_LWZU + LSU1, 2, 1, 1, 0, 0, // PC_LWZX + LSU1, 2, 1, 1, 0, 0, // PC_LWZUX + LSU1, 2, 1, 1, 0, 0, // PC_LWBRX + LSU1, 2, 1, 1, 0, 0, // PC_LMW + LSU1, 2, 1, 1, 0, 0, // PC_STB + LSU1, 2, 1, 1, 0, 0, // PC_STBU + LSU1, 2, 1, 1, 0, 0, // PC_STBX + LSU1, 2, 1, 1, 0, 0, // PC_STBUX + LSU1, 2, 1, 1, 0, 0, // PC_STH + LSU1, 2, 1, 1, 0, 0, // PC_STHU + LSU1, 2, 1, 1, 0, 0, // PC_STHX + LSU1, 2, 1, 1, 0, 0, // PC_STHUX + LSU1, 2, 1, 1, 0, 0, // PC_STHBRX + LSU1, 2, 1, 1, 0, 0, // PC_STW + LSU1, 2, 1, 1, 0, 0, // PC_STWU + LSU1, 2, 1, 1, 0, 0, // PC_STWX + LSU1, 2, 1, 1, 0, 0, // PC_STWUX + LSU1, 2, 1, 1, 0, 0, // PC_STWBRX + LSU1, 2, 1, 1, 0, 0, // PC_STMW + LSU1, 3, 1, 2, 0, 0, // PC_DCBF + LSU1, 3, 1, 2, 0, 0, // PC_DCBST + LSU1, 2, 1, 1, 0, 0, // PC_DCBT + LSU1, 2, 1, 1, 0, 0, // PC_DCBTST + LSU1, 3, 1, 2, 0, 0, // PC_DCBZ + IU2, 1, 1, 0, 0, 0, // PC_ADD + IU2, 1, 1, 0, 0, 0, // PC_ADDC + IU2, 1, 1, 0, 0, 0, // PC_ADDE + IU2, 1, 1, 0, 0, 0, // PC_ADDI + IU2, 1, 1, 0, 0, 0, // PC_ADDIC + IU2, 1, 1, 0, 0, 0, // PC_ADDICR + IU2, 1, 1, 0, 0, 0, // PC_ADDIS + IU2, 1, 1, 0, 0, 0, // PC_ADDME + IU2, 1, 1, 0, 0, 0, // PC_ADDZE + IU1, 19, 19, 0, 0, 0, // PC_DIVW + IU1, 19, 19, 0, 0, 0, // PC_DIVWU + IU1, 5, 5, 0, 0, 0, // PC_MULHW + IU1, 6, 5, 0, 0, 0, // PC_MULHWU + IU1, 3, 3, 0, 0, 0, // PC_MULLI + IU1, 5, 5, 0, 0, 0, // PC_MULLW + IU2, 1, 1, 0, 0, 0, // PC_NEG + IU2, 1, 1, 0, 0, 0, // PC_SUBF + IU2, 1, 1, 0, 0, 0, // PC_SUBFC + IU2, 1, 1, 0, 0, 0, // PC_SUBFE + IU2, 1, 1, 0, 0, 0, // PC_SUBFIC + IU2, 1, 1, 0, 0, 0, // PC_SUBFME + IU2, 1, 1, 0, 0, 0, // PC_SUBFZE + IU2, 3, 1, 0, 0, 0, // PC_CMPI + IU2, 3, 1, 0, 0, 0, // PC_CMP + IU2, 3, 1, 0, 0, 0, // PC_CMPLI + IU2, 3, 1, 0, 0, 0, // PC_CMPL + IU2, 1, 1, 0, 0, 0, // PC_ANDI + IU2, 1, 1, 0, 0, 0, // PC_ANDIS + IU2, 1, 1, 0, 0, 0, // PC_ORI + IU2, 1, 1, 0, 0, 0, // PC_ORIS + IU2, 1, 1, 0, 0, 0, // PC_XORI + IU2, 1, 1, 0, 0, 0, // PC_XORIS + IU2, 1, 1, 0, 0, 0, // PC_AND + IU2, 1, 1, 0, 0, 0, // PC_OR + IU2, 1, 1, 0, 0, 0, // PC_XOR + IU2, 1, 1, 0, 0, 0, // PC_NAND + IU2, 1, 1, 0, 0, 0, // PC_NOR + IU2, 1, 1, 0, 0, 0, // PC_EQV + IU2, 1, 1, 0, 0, 0, // PC_ANDC + IU2, 1, 1, 0, 0, 0, // PC_ORC + IU2, 1, 1, 0, 0, 0, // PC_EXTSB + IU2, 1, 1, 0, 0, 0, // PC_EXTSH + IU2, 1, 1, 0, 0, 0, // PC_CNTLZW + IU2, 1, 1, 0, 0, 0, // PC_RLWINM + IU2, 1, 1, 0, 0, 0, // PC_RLWNM + IU2, 1, 1, 0, 0, 0, // PC_RLWIMI + IU2, 1, 1, 0, 0, 0, // PC_SLW + IU2, 1, 1, 0, 0, 0, // PC_SRW + IU2, 1, 1, 0, 0, 0, // PC_SRAWI + IU2, 1, 1, 0, 0, 0, // PC_SRAW + SRU, 1, 1, 0, 0, 1, // PC_CRAND + SRU, 1, 1, 0, 0, 1, // PC_CRANDC + SRU, 1, 1, 0, 0, 1, // PC_CREQV + SRU, 1, 1, 0, 0, 1, // PC_CRNAND + SRU, 1, 1, 0, 0, 1, // PC_CRNOR + SRU, 1, 1, 0, 0, 1, // PC_CROR + SRU, 1, 1, 0, 0, 1, // PC_CRORC + SRU, 1, 1, 0, 0, 1, // PC_CRXOR + SRU, 1, 1, 0, 0, 1, // PC_MCRF + SRU, 2, 2, 0, 0, 1, // PC_MTXER + SRU, 2, 2, 0, 0, 1, // PC_MTCTR + SRU, 2, 2, 0, 0, 1, // PC_MTLR + SRU, 1, 1, 0, 0, 1, // PC_MTCRF + SRU, 1, 1, 0, 0, 0, // PC_MTMSR + SRU, 1, 1, 0, 0, 1, // PC_MTSPR + SRU, 1, 1, 0, 0, 1, // PC_MFMSR + SRU, 1, 1, 0, 0, 1, // PC_MFSPR + SRU, 1, 1, 0, 0, 1, // PC_MFXER + SRU, 1, 1, 0, 0, 1, // PC_MFCTR + SRU, 1, 1, 0, 0, 1, // PC_MFLR + SRU, 1, 1, 0, 0, 1, // PC_MFCR + FPU1, 3, 1, 1, 1, 0, // PC_MFFS + FPU1, 3, 1, 1, 1, 0, // PC_MTFSF + SRU, 1, 1, 0, 0, 1, // PC_EIEIO + SRU, 2, 2, 0, 0, 1, // PC_ISYNC + SRU, 3, 3, 0, 0, 1, // PC_SYNC + SRU, 1, 1, 0, 0, 1, // PC_RFI + IU2, 1, 1, 0, 0, 0, // PC_LI + IU2, 1, 1, 0, 0, 0, // PC_LIS + IU2, 1, 1, 0, 0, 0, // PC_MR + IU2, 1, 1, 0, 0, 0, // PC_NOP + IU2, 1, 1, 0, 0, 0, // PC_NOT + LSU1, 2, 1, 1, 0, 0, // PC_LFS + LSU1, 2, 1, 1, 0, 0, // PC_LFSU + LSU1, 2, 1, 1, 0, 0, // PC_LFSX + LSU1, 2, 1, 1, 0, 0, // PC_LFSUX + LSU1, 2, 1, 1, 0, 0, // PC_LFD + LSU1, 2, 1, 1, 0, 0, // PC_LFDU + LSU1, 2, 1, 1, 0, 0, // PC_LFDX + LSU1, 2, 1, 1, 0, 0, // PC_LFDUX + LSU1, 2, 1, 1, 0, 0, // PC_STFS + LSU1, 2, 1, 1, 0, 0, // PC_STFSU + LSU1, 2, 1, 1, 0, 0, // PC_STFSX + LSU1, 2, 1, 1, 0, 0, // PC_STFSUX + LSU1, 2, 1, 1, 0, 0, // PC_STFD + LSU1, 2, 1, 1, 0, 0, // PC_STFDU + LSU1, 2, 1, 1, 0, 0, // PC_STFDX + LSU1, 2, 1, 1, 0, 0, // PC_STFDUX + FPU1, 3, 1, 1, 1, 0, // PC_FMR + FPU1, 3, 1, 1, 1, 0, // PC_FABS + FPU1, 3, 1, 1, 1, 0, // PC_FNEG + FPU1, 3, 1, 1, 1, 0, // PC_FNABS + FPU1, 3, 1, 1, 1, 0, // PC_FADD + FPU1, 3, 1, 1, 1, 0, // PC_FADDS + FPU1, 3, 1, 1, 1, 0, // PC_FSUB + FPU1, 3, 1, 1, 1, 0, // PC_FSUBS + FPU1, 4, 2, 1, 1, 0, // PC_FMUL + FPU1, 3, 1, 1, 1, 0, // PC_FMULS + FPU1, 31, 31, 0, 0, 0, // PC_FDIV + FPU1, 17, 17, 0, 0, 0, // PC_FDIVS + FPU1, 4, 2, 1, 1, 0, // PC_FMADD + FPU1, 3, 1, 1, 1, 0, // PC_FMADDS + FPU1, 4, 2, 1, 1, 0, // PC_FMSUB + FPU1, 3, 1, 1, 1, 0, // PC_FMSUBS + FPU1, 4, 2, 1, 1, 0, // PC_FNMADD + FPU1, 3, 1, 1, 1, 0, // PC_FNMADDS + FPU1, 4, 2, 1, 1, 0, // PC_FNMSUB + FPU1, 3, 1, 1, 1, 0, // PC_FNMSUBS + FPU1, 10, 10, 0, 0, 0, // PC_FRES + FPU1, 3, 1, 1, 1, 0, // PC_FRSQRTE + FPU1, 3, 1, 1, 1, 0, // PC_FSEL + FPU1, 3, 1, 1, 1, 0, // PC_FRSP + FPU1, 3, 1, 1, 1, 0, // PC_FCTIW + FPU1, 3, 1, 1, 1, 0, // PC_FCTIWZ + FPU1, 3, 1, 1, 1, 0, // PC_FCMPU + FPU1, 3, 1, 1, 1, 0, // PC_FCMPO + LSU1, 1, 1, 0, 0, 0, // PC_LWARX + LSU1, 1, 1, 0, 0, 0, // PC_LSWI + LSU1, 1, 1, 0, 0, 0, // PC_LSWX + LSU1, 1, 1, 0, 0, 0, // PC_STFIWX + LSU1, 1, 1, 0, 0, 0, // PC_STSWI + LSU1, 1, 1, 0, 0, 0, // PC_STSWX + LSU1, 1, 1, 0, 0, 0, // PC_STWCX + IU1, 1, 1, 0, 0, 1, // PC_ECIWX + IU1, 1, 1, 0, 0, 1, // PC_ECOWX + IU1, 1, 1, 0, 0, 0, // PC_DCBI + IU1, 1, 1, 0, 0, 0, // PC_ICBI + IU1, 1, 1, 0, 0, 0, // PC_MCRFS + IU1, 1, 1, 0, 0, 0, // PC_MCRXR + IU1, 1, 1, 0, 0, 0, // PC_MFTB + IU1, 1, 1, 0, 0, 0, // PC_MFSR + IU1, 1, 1, 0, 0, 0, // PC_MTSR + IU1, 1, 1, 0, 0, 0, // PC_MFSRIN + IU1, 1, 1, 0, 0, 0, // PC_MTSRIN + IU1, 1, 1, 0, 0, 0, // PC_MTFSB0 + IU1, 1, 1, 0, 0, 0, // PC_MTFSB1 + IU1, 1, 1, 0, 0, 0, // PC_MTFSFI + IU1, 1, 1, 0, 0, 1, // PC_SC + FPU1, 1, 1, 0, 0, 0, // PC_FSQRT + FPU1, 1, 1, 0, 0, 0, // PC_FSQRTS + IU1, 1, 1, 0, 0, 0, // PC_TLBIA + IU1, 1, 1, 0, 0, 0, // PC_TLBIE + IU1, 1, 1, 0, 0, 0, // PC_TLBLD + IU1, 1, 1, 0, 0, 0, // PC_TLBLI + IU1, 1, 1, 0, 0, 0, // PC_TLBSYNC + IU1, 1, 1, 0, 0, 1, // PC_TW + IU1, 1, 1, 0, 0, 1, // PC_TRAP + IU1, 1, 1, 0, 0, 1, // PC_TWI + IU1, 1, 1, 0, 0, 1, // PC_OPWORD + IU1, 1, 1, 0, 0, 0, // PC_MFROM + IU1, 1, 1, 0, 0, 1, // PC_DSA + IU1, 1, 1, 0, 0, 1, // PC_ESA + IU1, 0, 0, 0, 0, 0, // PC_DCCCI + IU1, 0, 0, 0, 0, 0, // PC_DCREAD + IU1, 0, 0, 0, 0, 0, // PC_ICBT + IU1, 0, 0, 0, 0, 0, // PC_ICCCI + IU1, 0, 0, 0, 0, 0, // PC_ICREAD + IU1, 0, 0, 0, 0, 0, // PC_RFCI + IU1, 0, 0, 0, 0, 0, // PC_TLBRE + IU1, 0, 0, 0, 0, 0, // PC_TLBSX + IU1, 0, 0, 0, 0, 0, // PC_TLBWE + IU1, 0, 0, 0, 0, 0, // PC_WRTEE + IU1, 0, 0, 0, 0, 0, // PC_WRTEEI + IU1, 0, 0, 0, 0, 0, // PC_MFDCR + IU1, 0, 0, 0, 0, 0, // PC_MTDCR + IU1, 0, 0, 0, 0, 0, // PC_DCBA + BPU, 0, 0, 0, 0, 0, // PC_DSS + BPU, 0, 0, 0, 0, 0, // PC_DSSALL + BPU, 0, 0, 0, 0, 0, // PC_DST + BPU, 0, 0, 0, 0, 0, // PC_DSTT + BPU, 0, 0, 0, 0, 0, // PC_DSTST + BPU, 0, 0, 0, 0, 0, // PC_DSTSTT + BPU, 0, 0, 0, 0, 0, // PC_LVEBX + BPU, 0, 0, 0, 0, 0, // PC_LVEHX + BPU, 0, 0, 0, 0, 0, // PC_LVEWX + BPU, 0, 0, 0, 0, 0, // PC_LVSL + BPU, 0, 0, 0, 0, 0, // PC_LVSR + BPU, 0, 0, 0, 0, 0, // PC_LVX + BPU, 0, 0, 0, 0, 0, // PC_LVXL + BPU, 0, 0, 0, 0, 0, // PC_STVEBX + BPU, 0, 0, 0, 0, 0, // PC_STVEHX + BPU, 0, 0, 0, 0, 0, // PC_STVEWX + BPU, 0, 0, 0, 0, 0, // PC_STVX + BPU, 0, 0, 0, 0, 0, // PC_STVXL + BPU, 0, 0, 0, 0, 0, // PC_MFVSCR + BPU, 0, 0, 0, 0, 0, // PC_MTVSCR + BPU, 0, 0, 0, 0, 0, // PC_VADDCUW + BPU, 0, 0, 0, 0, 0, // PC_VADDFP + BPU, 0, 0, 0, 0, 0, // PC_VADDSBS + BPU, 0, 0, 0, 0, 0, // PC_VADDSHS + BPU, 0, 0, 0, 0, 0, // PC_VADDSWS + BPU, 0, 0, 0, 0, 0, // PC_VADDUBM + BPU, 0, 0, 0, 0, 0, // PC_VADDUBS + BPU, 0, 0, 0, 0, 0, // PC_VADDUHM + BPU, 0, 0, 0, 0, 0, // PC_VADDUHS + BPU, 0, 0, 0, 0, 0, // PC_VADDUWM + BPU, 0, 0, 0, 0, 0, // PC_VADDUWS + BPU, 0, 0, 0, 0, 0, // PC_VAND + BPU, 0, 0, 0, 0, 0, // PC_VANDC + BPU, 0, 0, 0, 0, 0, // PC_VAVGSB + BPU, 0, 0, 0, 0, 0, // PC_VAVGSH + BPU, 0, 0, 0, 0, 0, // PC_VAVGSW + BPU, 0, 0, 0, 0, 0, // PC_VAVGUB + BPU, 0, 0, 0, 0, 0, // PC_VAVGUH + BPU, 0, 0, 0, 0, 0, // PC_VAVGUW + BPU, 0, 0, 0, 0, 0, // PC_VCFSX + BPU, 0, 0, 0, 0, 0, // PC_VCFUX + BPU, 0, 0, 0, 0, 0, // PC_VCMPBFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUB + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUH + BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUW + BPU, 0, 0, 0, 0, 0, // PC_VCMPGEFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTFP + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSB + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSH + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSW + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUB + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUH + BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUW + BPU, 0, 0, 0, 0, 0, // PC_VCTSXS + BPU, 0, 0, 0, 0, 0, // PC_VCTUXS + BPU, 0, 0, 0, 0, 0, // PC_VEXPTEFP + BPU, 0, 0, 0, 0, 0, // PC_VLOGEFP + BPU, 0, 0, 0, 0, 0, // PC_VMAXFP + BPU, 0, 0, 0, 0, 0, // PC_VMAXSB + BPU, 0, 0, 0, 0, 0, // PC_VMAXSH + BPU, 0, 0, 0, 0, 0, // PC_VMAXSW + BPU, 0, 0, 0, 0, 0, // PC_VMAXUB + BPU, 0, 0, 0, 0, 0, // PC_VMAXUH + BPU, 0, 0, 0, 0, 0, // PC_VMAXUW + BPU, 0, 0, 0, 0, 0, // PC_VMINFP + BPU, 0, 0, 0, 0, 0, // PC_VMINSB + BPU, 0, 0, 0, 0, 0, // PC_VMINSH + BPU, 0, 0, 0, 0, 0, // PC_VMINSW + BPU, 0, 0, 0, 0, 0, // PC_VMINUB + BPU, 0, 0, 0, 0, 0, // PC_VMINUH + BPU, 0, 0, 0, 0, 0, // PC_VMINUW + BPU, 0, 0, 0, 0, 0, // PC_VMRGHB + BPU, 0, 0, 0, 0, 0, // PC_VMRGHH + BPU, 0, 0, 0, 0, 0, // PC_VMRGHW + BPU, 0, 0, 0, 0, 0, // PC_VMRGLB + BPU, 0, 0, 0, 0, 0, // PC_VMRGLH + BPU, 0, 0, 0, 0, 0, // PC_VMRGLW + BPU, 0, 0, 0, 0, 0, // PC_VMULESB + BPU, 0, 0, 0, 0, 0, // PC_VMULESH + BPU, 0, 0, 0, 0, 0, // PC_VMULEUB + BPU, 0, 0, 0, 0, 0, // PC_VMULEUH + BPU, 0, 0, 0, 0, 0, // PC_VMULOSB + BPU, 0, 0, 0, 0, 0, // PC_VMULOSH + BPU, 0, 0, 0, 0, 0, // PC_VMULOUB + BPU, 0, 0, 0, 0, 0, // PC_VMULOUH + BPU, 0, 0, 0, 0, 0, // PC_VNOR + BPU, 0, 0, 0, 0, 0, // PC_VOR + BPU, 0, 0, 0, 0, 0, // PC_VPKPX + BPU, 0, 0, 0, 0, 0, // PC_VPKSHSS + BPU, 0, 0, 0, 0, 0, // PC_VPKSHUS + BPU, 0, 0, 0, 0, 0, // PC_VPKSWSS + BPU, 0, 0, 0, 0, 0, // PC_VPKSWUS + BPU, 0, 0, 0, 0, 0, // PC_VPKUHUM + BPU, 0, 0, 0, 0, 0, // PC_VPKUHUS + BPU, 0, 0, 0, 0, 0, // PC_VPKUWUM + BPU, 0, 0, 0, 0, 0, // PC_VPKUWUS + BPU, 0, 0, 0, 0, 0, // PC_VREFP + BPU, 0, 0, 0, 0, 0, // PC_VRFIM + BPU, 0, 0, 0, 0, 0, // PC_VRFIN + BPU, 0, 0, 0, 0, 0, // PC_VRFIP + BPU, 0, 0, 0, 0, 0, // PC_VRFIZ + BPU, 0, 0, 0, 0, 0, // PC_VRLB + BPU, 0, 0, 0, 0, 0, // PC_VRLH + BPU, 0, 0, 0, 0, 0, // PC_VRLW + BPU, 0, 0, 0, 0, 0, // PC_VRSQRTEFP + BPU, 0, 0, 0, 0, 0, // PC_VSL + BPU, 0, 0, 0, 0, 0, // PC_VSLB + BPU, 0, 0, 0, 0, 0, // PC_VSLH + BPU, 0, 0, 0, 0, 0, // PC_VSLO + BPU, 0, 0, 0, 0, 0, // PC_VSLW + BPU, 0, 0, 0, 0, 0, // PC_VSPLTB + BPU, 0, 0, 0, 0, 0, // PC_VSPLTH + BPU, 0, 0, 0, 0, 0, // PC_VSPLTW + BPU, 0, 0, 0, 0, 0, // PC_VSPLTISB + BPU, 0, 0, 0, 0, 0, // PC_VSPLTISH + BPU, 0, 0, 0, 0, 0, // PC_VSPLTISW + BPU, 0, 0, 0, 0, 0, // PC_VSR + BPU, 0, 0, 0, 0, 0, // PC_VSRAB + BPU, 0, 0, 0, 0, 0, // PC_VSRAH + BPU, 0, 0, 0, 0, 0, // PC_VSRAW + BPU, 0, 0, 0, 0, 0, // PC_VSRB + BPU, 0, 0, 0, 0, 0, // PC_VSRH + BPU, 0, 0, 0, 0, 0, // PC_VSRO + BPU, 0, 0, 0, 0, 0, // PC_VSRW + BPU, 0, 0, 0, 0, 0, // PC_VSUBCUW + BPU, 0, 0, 0, 0, 0, // PC_VSUBFP + BPU, 0, 0, 0, 0, 0, // PC_VSUBSBS + BPU, 0, 0, 0, 0, 0, // PC_VSUBSHS + BPU, 0, 0, 0, 0, 0, // PC_VSUBSWS + BPU, 0, 0, 0, 0, 0, // PC_VSUBUBM + BPU, 0, 0, 0, 0, 0, // PC_VSUBUBS + BPU, 0, 0, 0, 0, 0, // PC_VSUBUHM + BPU, 0, 0, 0, 0, 0, // PC_VSUBUHS + BPU, 0, 0, 0, 0, 0, // PC_VSUBUWM + BPU, 0, 0, 0, 0, 0, // PC_VSUBUWS + BPU, 0, 0, 0, 0, 0, // PC_VSUMSWS + BPU, 0, 0, 0, 0, 0, // PC_VSUM2SWS + BPU, 0, 0, 0, 0, 0, // PC_VSUM4SBS + BPU, 0, 0, 0, 0, 0, // PC_VSUM4SHS + BPU, 0, 0, 0, 0, 0, // PC_VSUM4UBS + BPU, 0, 0, 0, 0, 0, // PC_VUPKHPX + BPU, 0, 0, 0, 0, 0, // PC_VUPKHSB + BPU, 0, 0, 0, 0, 0, // PC_VUPKHSH + BPU, 0, 0, 0, 0, 0, // PC_VUPKLPX + BPU, 0, 0, 0, 0, 0, // PC_VUPKLSB + BPU, 0, 0, 0, 0, 0, // PC_VUPKLSH + BPU, 0, 0, 0, 0, 0, // PC_VXOR + BPU, 0, 0, 0, 0, 0, // PC_VMADDFP + BPU, 0, 0, 0, 0, 0, // PC_VMHADDSHS + BPU, 0, 0, 0, 0, 0, // PC_VMHRADDSHS + BPU, 0, 0, 0, 0, 0, // PC_VMLADDUHM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMMBM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHS + BPU, 0, 0, 0, 0, 0, // PC_VMSUMUBM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHM + BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHS + BPU, 0, 0, 0, 0, 0, // PC_VNMSUBFP + BPU, 0, 0, 0, 0, 0, // PC_VPERM + BPU, 0, 0, 0, 0, 0, // PC_VSEL + BPU, 0, 0, 0, 0, 0, // PC_VSLDOI + BPU, 0, 0, 0, 0, 0, // PC_VMR + BPU, 0, 0, 0, 0, 0, // PC_VMRP + BPU, 0, 0, 0, 0, 0, // PC_SLE + BPU, 0, 0, 0, 0, 0, // PC_SLEQ + BPU, 0, 0, 0, 0, 0, // PC_SLIQ + BPU, 0, 0, 0, 0, 0, // PC_SLLIQ + BPU, 0, 0, 0, 0, 0, // PC_SLLQ + BPU, 0, 0, 0, 0, 0, // PC_SLQ + BPU, 0, 0, 0, 0, 0, // PC_SRAIQ + BPU, 0, 0, 0, 0, 0, // PC_SRAQ + BPU, 0, 0, 0, 0, 0, // PC_SRE + BPU, 0, 0, 0, 0, 0, // PC_SREA + BPU, 0, 0, 0, 0, 0, // PC_SREQ + BPU, 0, 0, 0, 0, 0, // PC_SRIQ + BPU, 0, 0, 0, 0, 0, // PC_SRLIQ + BPU, 0, 0, 0, 0, 0, // PC_SRLQ + BPU, 0, 0, 0, 0, 0, // PC_SRQ + BPU, 0, 0, 0, 0, 0, // PC_MASKG + BPU, 0, 0, 0, 0, 0, // PC_MASKIR + BPU, 0, 0, 0, 0, 0, // PC_LSCBX + BPU, 0, 0, 0, 0, 0, // PC_DIV + BPU, 0, 0, 0, 0, 0, // PC_DIVS + BPU, 0, 0, 0, 0, 0, // PC_DOZ + BPU, 0, 0, 0, 0, 0, // PC_MUL + BPU, 0, 0, 0, 0, 0, // PC_NABS + BPU, 0, 0, 0, 0, 0, // PC_ABS + BPU, 0, 0, 0, 0, 0, // PC_CLCS + BPU, 0, 0, 0, 0, 0, // PC_DOZI + BPU, 0, 0, 0, 0, 0, // PC_RLMI + BPU, 0, 0, 0, 0, 0, // PC_RRIB +}; + +static void advance(int firstStage, int oldStage, int newStage) { + PCode *instr = pipeline[oldStage].instr; + int cycles = instruction_timing[instr->op].cycles[newStage - firstStage]; + pipeline[newStage].instr = instr; + pipeline[newStage].remaining = cycles; + pipeline[oldStage].instr = NULL; +} + +static void assign_completion_buffer(PCode *instr) { + completionbuffers.used++; + completionbuffers.free--; + completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr; + completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0; + completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries; +} + +static void complete_instruction(int stage) { + PCode *instr = pipeline[stage].instr; + int buf = 0; + while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr) + buf++; + + completionbuffers.entries[buf].completed = 1; + pipeline[stage].instr = NULL; + + if (stage == IU1) + iu1_completed_instruction = instr; + else if (stage == IU2) + iu2_completed_instruction = instr; +} + +static void retire_instruction(void) { + completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL; + completionbuffers.used--; + completionbuffers.free++; + completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries; +} + +static int latency(PCode *instr) { + int cycles = instruction_timing[instr->op].latency; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + cycles += 2; + if (instr->op == PC_LMW || instr->op == PC_STMW) + cycles += instr->argCount - 2; + return cycles; +} + +static void initialize(void) { + int stage; + int i; + + for (stage = 0; stage < NumStages; stage++) + pipeline[stage].instr = NULL; + + completionbuffers.free = MaxEntries; + completionbuffers.used = 0; + completionbuffers.nextToRetire = 0; + completionbuffers.nextFreeSlot = 0; + for (i = 0; i < MaxEntries; i++) + completionbuffers.entries[i].instr = NULL; + + iu1_completed_instruction = NULL; + iu2_completed_instruction = NULL; +} + +static int can_issue(PCode *instr) { + int stage; + + if (completionbuffers.free == 0) + return 0; + + stage = instruction_timing[instr->op].stage; + + if (stage == IU2) { + PCode *check; + int isClear1 = !pipeline[IU1].instr; + int isClear2 = !pipeline[IU2].instr; + if (!isClear1 && !isClear2) + return 0; + if (isClear1 && isClear2) + return 1; + + if (isClear1) + check = pipeline[IU2].instr; + else + check = pipeline[IU1].instr; + + if (is_dependent(instr, check, RegClass_GPR)) + return 0; + if (is_dependent(instr, iu1_completed_instruction, RegClass_GPR)) + return 0; + if (is_dependent(instr, iu2_completed_instruction, RegClass_GPR)) + return 0; + } else { + if (pipeline[stage].instr) + return 0; + } + + if ((instr->flags & fIsWrite) && pipeline[LSU2].instr && (pipeline[LSU2].instr->flags & fIsWrite)) + return 0; + + return 1; +} + +static void issue(PCode *instr) { + int stage = instruction_timing[instr->op].stage; + int cycles = instruction_timing[instr->op].cycles[0]; + assign_completion_buffer(instr); + if (stage == IU2 && !pipeline[IU1].instr) + stage = IU1; + pipeline[stage].instr = instr; + pipeline[stage].remaining = cycles; +} + +static void advance_clock(void) { + int stage; + + iu1_completed_instruction = NULL; + iu2_completed_instruction = NULL; + + for (stage = 0; stage < NumStages; stage++) { + if (pipeline[stage].instr && pipeline[stage].remaining) + --pipeline[stage].remaining; + } + + if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) { + retire_instruction(); + if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) { + retire_instruction(); + } + } + + if (pipeline[IU1].instr && pipeline[IU1].remaining == 0) + complete_instruction(IU1); + if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0) + complete_instruction(LSU2); + if (pipeline[FPU3].instr && pipeline[FPU3].remaining == 0) + complete_instruction(FPU3); + if (pipeline[SRU].instr && pipeline[SRU].remaining == 0) + complete_instruction(SRU); + if (pipeline[BPU].instr && pipeline[BPU].remaining == 0) + complete_instruction(BPU); + if (pipeline[IU2].instr && pipeline[IU2].remaining == 0) + complete_instruction(IU2); + + if ( + pipeline[FPU1].instr && + pipeline[FPU1].remaining == 0 && + (pipeline[FPU1].instr->op == PC_FDIV || pipeline[FPU1].instr->op == PC_FDIVS) + ) + complete_instruction(FPU1); + + if (pipeline[FPU2].instr && pipeline[FPU2].remaining == 0 && !pipeline[FPU3].instr) + advance(FPU1, FPU2, FPU3); + if (pipeline[FPU1].instr && pipeline[FPU1].remaining == 0 && !pipeline[FPU2].instr) + advance(FPU1, FPU1, FPU2); + + if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr) + advance(LSU1, LSU1, LSU2); +} + +static int serializes(PCode *instr) { + return instruction_timing[instr->op].serializes; +} + +MachineInfo machine750 = { + 2, + 1, + 0, + &latency, + &initialize, + &can_issue, + &issue, + &advance_clock, + &serializes, + &default_uses_vpermute_unit +}; diff --git a/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation821.c b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation821.c new file mode 100644 index 0000000..bbf0509 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulation821.c @@ -0,0 +1,615 @@ +#include "compiler/Scheduler.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" + +// https://www.nxp.com/docs/en/user-guide/MPC821UM.pdf + +typedef enum Stage { + BranchUnit, + Stage1, + Stage2, + LSU1, + LSU2, + CRUnit, + NumStages, + Stage7 +} Stage; + +static struct { + // the instruction currently in this pipeline stage + PCode *instr; + + // how many cycles are left for this instruction to finish + int remaining; +} pipeline[NumStages]; + +enum { + MaxEntries = 6 +}; + +static struct { + // how many entries remain unused in the queue + unsigned int free; + + // how many entries are currently used in the queue + unsigned int used; + + // the index of the next instruction that will be retired + unsigned int nextToRetire; + + // the index of the next free slot that will be used when an instruction is dispatched + unsigned int nextFreeSlot; + + // circular array of entries in the completion queue + struct { + PCode *instr; + int completed; + } entries[MaxEntries]; +} completionbuffers; + +static struct { + // the initial stage for this instruction + Stage stage; + + // the total amount of cycles required by this instruction + char latency; + + // how long it takes to finish each stage + char cycles[3]; + + // does this instruction serialise? + char serializes; +} instruction_timing[OPCODE_MAX] = { + BranchUnit, 0, 0, 0, 0, 0, // PC_B + BranchUnit, 0, 0, 0, 0, 0, // PC_BL + BranchUnit, 0, 0, 0, 0, 0, // PC_BC + BranchUnit, 0, 0, 0, 0, 0, // PC_BCLR + BranchUnit, 0, 0, 0, 0, 0, // PC_BCCTR + BranchUnit, 0, 0, 0, 0, 0, // PC_BT + BranchUnit, 0, 0, 0, 0, 0, // PC_BTLR + BranchUnit, 0, 0, 0, 0, 0, // PC_BTCTR + BranchUnit, 0, 0, 0, 0, 0, // PC_BF + BranchUnit, 0, 0, 0, 0, 0, // PC_BFLR + BranchUnit, 0, 0, 0, 0, 0, // PC_BFCTR + BranchUnit, 0, 0, 0, 0, 0, // PC_BDNZ + BranchUnit, 0, 0, 0, 0, 0, // PC_BDNZT + BranchUnit, 0, 0, 0, 0, 0, // PC_BDNZF + BranchUnit, 0, 0, 0, 0, 0, // PC_BDZ + BranchUnit, 0, 0, 0, 0, 0, // PC_BDZT + BranchUnit, 0, 0, 0, 0, 0, // PC_BDZF + BranchUnit, 0, 0, 0, 0, 0, // PC_BLR + BranchUnit, 0, 0, 0, 0, 0, // PC_BCTR + BranchUnit, 0, 0, 0, 0, 0, // PC_BCTRL + BranchUnit, 0, 0, 0, 0, 0, // PC_BLRL + BranchUnit, 0, 0, 0, 0, 0, // PC_LBZ + LSU1, 2, 1, 1, 0, 0, // PC_LBZU + LSU1, 2, 1, 1, 0, 0, // PC_LBZX + LSU1, 2, 1, 1, 0, 0, // PC_LBZUX + LSU1, 2, 1, 1, 0, 0, // PC_LHZ + LSU1, 2, 1, 1, 0, 0, // PC_LHZU + LSU1, 2, 1, 1, 0, 0, // PC_LHZX + LSU1, 2, 1, 1, 0, 0, // PC_LHZUX + LSU1, 2, 1, 1, 0, 0, // PC_LHA + LSU1, 2, 1, 1, 0, 0, // PC_LHAU + LSU1, 2, 1, 1, 0, 0, // PC_LHAX + LSU1, 2, 1, 1, 0, 0, // PC_LHAUX + LSU1, 2, 1, 1, 0, 0, // PC_LHBRX + LSU1, 2, 1, 1, 0, 0, // PC_LWZ + LSU1, 2, 1, 1, 0, 0, // PC_LWZU + LSU1, 2, 1, 1, 0, 0, // PC_LWZX + LSU1, 2, 1, 1, 0, 0, // PC_LWZUX + LSU1, 2, 1, 1, 0, 0, // PC_LWBRX + LSU1, 2, 1, 1, 0, 0, // PC_LMW + LSU1, 2, 1, 1, 0, 0, // PC_STB + LSU1, 2, 1, 1, 0, 0, // PC_STBU + LSU1, 2, 1, 1, 0, 0, // PC_STBX + LSU1, 2, 1, 1, 0, 0, // PC_STBUX + LSU1, 2, 1, 1, 0, 0, // PC_STH + LSU1, 2, 1, 1, 0, 0, // PC_STHU + LSU1, 2, 1, 1, 0, 0, // PC_STHX + LSU1, 2, 1, 1, 0, 0, // PC_STHUX + LSU1, 2, 1, 1, 0, 0, // PC_STHBRX + LSU1, 2, 1, 1, 0, 0, // PC_STW + LSU1, 2, 1, 1, 0, 0, // PC_STWU + LSU1, 2, 1, 1, 0, 0, // PC_STWX + LSU1, 2, 1, 1, 0, 0, // PC_STWUX + LSU1, 2, 1, 1, 0, 0, // PC_STWBRX + LSU1, 2, 1, 1, 0, 0, // PC_STMW + LSU1, 2, 1, 1, 0, 0, // PC_DCBF + LSU1, 2, 1, 1, 0, 0, // PC_DCBST + LSU1, 2, 1, 1, 0, 0, // PC_DCBT + LSU1, 2, 1, 1, 0, 0, // PC_DCBTST + LSU1, 2, 1, 1, 0, 0, // PC_DCBZ + LSU1, 2, 1, 1, 0, 0, // PC_ADD + Stage1, 1, 1, 0, 0, 0, // PC_ADDC + Stage1, 1, 1, 0, 0, 0, // PC_ADDE + Stage1, 1, 1, 0, 0, 0, // PC_ADDI + Stage1, 1, 1, 0, 0, 0, // PC_ADDIC + Stage1, 1, 1, 0, 0, 0, // PC_ADDICR + Stage1, 1, 1, 0, 0, 0, // PC_ADDIS + Stage1, 1, 1, 0, 0, 0, // PC_ADDME + Stage1, 1, 1, 0, 0, 0, // PC_ADDZE + Stage1, 1, 1, 0, 0, 0, // PC_DIVW + Stage1, 37, 37, 0, 0, 0, // PC_DIVWU + Stage1, 37, 37, 0, 0, 0, // PC_MULHW + Stage1, 5, 5, 0, 0, 0, // PC_MULHWU + Stage1, 5, 5, 0, 0, 0, // PC_MULLI + Stage1, 3, 3, 0, 0, 0, // PC_MULLW + Stage1, 5, 5, 0, 0, 0, // PC_NEG + Stage1, 1, 1, 0, 0, 0, // PC_SUBF + Stage1, 1, 1, 0, 0, 0, // PC_SUBFC + Stage1, 1, 1, 0, 0, 0, // PC_SUBFE + Stage1, 1, 1, 0, 0, 0, // PC_SUBFIC + Stage1, 1, 1, 0, 0, 0, // PC_SUBFME + Stage1, 1, 1, 0, 0, 0, // PC_SUBFZE + Stage1, 1, 1, 0, 0, 0, // PC_CMPI + Stage1, 3, 1, 0, 0, 0, // PC_CMP + Stage1, 3, 1, 0, 0, 0, // PC_CMPLI + Stage1, 3, 1, 0, 0, 0, // PC_CMPL + Stage1, 3, 1, 0, 0, 0, // PC_ANDI + Stage1, 1, 1, 0, 0, 0, // PC_ANDIS + Stage1, 1, 1, 0, 0, 0, // PC_ORI + Stage1, 1, 1, 0, 0, 0, // PC_ORIS + Stage1, 1, 1, 0, 0, 0, // PC_XORI + Stage1, 1, 1, 0, 0, 0, // PC_XORIS + Stage1, 1, 1, 0, 0, 0, // PC_AND + Stage1, 1, 1, 0, 0, 0, // PC_OR + Stage1, 1, 1, 0, 0, 0, // PC_XOR + Stage1, 1, 1, 0, 0, 0, // PC_NAND + Stage1, 1, 1, 0, 0, 0, // PC_NOR + Stage1, 1, 1, 0, 0, 0, // PC_EQV + Stage1, 1, 1, 0, 0, 0, // PC_ANDC + Stage1, 1, 1, 0, 0, 0, // PC_ORC + Stage1, 1, 1, 0, 0, 0, // PC_EXTSB + Stage1, 1, 1, 0, 0, 0, // PC_EXTSH + Stage1, 1, 1, 0, 0, 0, // PC_CNTLZW + Stage1, 1, 1, 0, 0, 0, // PC_RLWINM + Stage1, 1, 1, 0, 0, 0, // PC_RLWNM + Stage1, 1, 1, 0, 0, 0, // PC_RLWIMI + Stage1, 1, 1, 0, 0, 0, // PC_SLW + Stage1, 1, 1, 0, 0, 0, // PC_SRW + Stage1, 1, 1, 0, 0, 0, // PC_SRAWI + Stage1, 1, 1, 0, 0, 0, // PC_SRAW + Stage1, 1, 1, 0, 0, 0, // PC_CRAND + CRUnit, 1, 1, 0, 0, 0, // PC_CRANDC + CRUnit, 1, 1, 0, 0, 0, // PC_CREQV + CRUnit, 1, 1, 0, 0, 0, // PC_CRNAND + CRUnit, 1, 1, 0, 0, 0, // PC_CRNOR + CRUnit, 1, 1, 0, 0, 0, // PC_CROR + CRUnit, 1, 1, 0, 0, 0, // PC_CRORC + CRUnit, 1, 1, 0, 0, 0, // PC_CRXOR + CRUnit, 1, 1, 0, 0, 0, // PC_MCRF + CRUnit, 1, 1, 0, 0, 0, // PC_MTXER + Stage1, 1, 1, 0, 0, 0, // PC_MTCTR + BranchUnit, 2, 2, 0, 0, 0, // PC_MTLR + BranchUnit, 2, 2, 0, 0, 0, // PC_MTCRF + Stage1, 1, 1, 0, 0, 0, // PC_MTMSR + Stage1, 1, 1, 0, 0, 0, // PC_MTSPR + Stage1, 1, 1, 0, 0, 0, // PC_MFMSR + Stage1, 1, 1, 0, 0, 0, // PC_MFSPR + Stage1, 1, 1, 0, 0, 0, // PC_MFXER + Stage7, 3, 1, 1, 1, 0, // PC_MFCTR + Stage7, 3, 1, 1, 1, 0, // PC_MFLR + Stage1, 1, 1, 0, 0, 0, // PC_MFCR + Stage1, 1, 1, 0, 0, 0, // PC_MFFS + Stage1, 1, 1, 0, 0, 0, // PC_MTFSF + Stage1, 1, 1, 0, 0, 0, // PC_EIEIO + Stage1, 1, 1, 0, 0, 0, // PC_ISYNC + Stage1, 1, 1, 0, 0, 0, // PC_SYNC + Stage1, 1, 1, 0, 0, 0, // PC_RFI + Stage1, 1, 1, 0, 0, 0, // PC_LI + LSU1, 2, 1, 1, 0, 0, // PC_LIS + LSU1, 2, 1, 1, 0, 0, // PC_MR + LSU1, 2, 1, 1, 0, 0, // PC_NOP + LSU1, 2, 1, 1, 0, 0, // PC_NOT + LSU1, 2, 1, 1, 0, 0, // PC_LFS + LSU1, 2, 1, 1, 0, 0, // PC_LFSU + LSU1, 2, 1, 1, 0, 0, // PC_LFSX + LSU1, 2, 1, 1, 0, 0, // PC_LFSUX + LSU1, 2, 1, 1, 0, 0, // PC_LFD + LSU1, 2, 1, 1, 0, 0, // PC_LFDU + LSU1, 2, 1, 1, 0, 0, // PC_LFDX + LSU1, 2, 1, 1, 0, 0, // PC_LFDUX + LSU1, 2, 1, 1, 0, 0, // PC_STFS + LSU1, 2, 1, 1, 0, 0, // PC_STFSU + LSU1, 2, 1, 1, 0, 0, // PC_STFSX + LSU1, 2, 1, 1, 0, 0, // PC_STFSUX + Stage7, 3, 1, 1, 1, 0, // PC_STFD + Stage7, 3, 1, 1, 1, 0, // PC_STFDU + Stage7, 3, 1, 1, 1, 0, // PC_STFDX + Stage7, 3, 1, 1, 1, 0, // PC_STFDUX + Stage7, 3, 1, 1, 1, 0, // PC_FMR + Stage7, 3, 1, 1, 1, 0, // PC_FABS + Stage7, 3, 1, 1, 1, 0, // PC_FNEG + Stage7, 3, 1, 1, 1, 0, // PC_FNABS + Stage7, 4, 2, 1, 1, 0, // PC_FADD + Stage7, 3, 1, 1, 1, 0, // PC_FADDS + Stage7, 33, 33, 0, 0, 0, // PC_FSUB + Stage7, 18, 18, 0, 0, 0, // PC_FSUBS + Stage7, 4, 2, 1, 1, 0, // PC_FMUL + Stage7, 3, 1, 1, 1, 0, // PC_FMULS + Stage7, 4, 2, 1, 1, 0, // PC_FDIV + Stage7, 3, 1, 1, 1, 0, // PC_FDIVS + Stage7, 4, 2, 1, 1, 0, // PC_FMADD + Stage7, 3, 1, 1, 1, 0, // PC_FMADDS + Stage7, 4, 2, 1, 1, 0, // PC_FMSUB + Stage7, 3, 1, 1, 1, 0, // PC_FMSUBS + Stage7, 18, 18, 0, 0, 0, // PC_FNMADD + Stage7, 3, 1, 1, 1, 0, // PC_FNMADDS + Stage7, 3, 1, 1, 1, 0, // PC_FNMSUB + Stage7, 3, 1, 1, 1, 0, // PC_FNMSUBS + Stage7, 3, 1, 1, 1, 0, // PC_FRES + Stage7, 3, 1, 1, 1, 0, // PC_FRSQRTE + Stage7, 5, 1, 1, 1, 0, // PC_FSEL + Stage7, 5, 1, 1, 1, 0, // PC_FRSP + LSU1, 1, 0, 0, 0, 0, // PC_FCTIW + LSU1, 1, 0, 0, 0, 0, // PC_FCTIWZ + LSU1, 1, 0, 0, 0, 0, // PC_FCMPU + LSU1, 1, 0, 0, 0, 0, // PC_FCMPO + LSU1, 1, 0, 0, 0, 0, // PC_LWARX + LSU1, 1, 0, 0, 0, 0, // PC_LSWI + LSU1, 1, 0, 0, 0, 0, // PC_LSWX + Stage1, 1, 0, 0, 0, 0, // PC_STFIWX + Stage1, 1, 0, 0, 0, 0, // PC_STSWI + Stage1, 1, 0, 0, 0, 0, // PC_STSWX + Stage1, 1, 0, 0, 0, 0, // PC_STWCX + Stage1, 1, 0, 0, 0, 0, // PC_ECIWX + Stage1, 1, 0, 0, 0, 0, // PC_ECOWX + Stage1, 1, 0, 0, 0, 0, // PC_DCBI + Stage1, 1, 0, 0, 0, 0, // PC_ICBI + Stage1, 1, 0, 0, 0, 0, // PC_MCRFS + Stage1, 1, 0, 0, 0, 0, // PC_MCRXR + Stage1, 1, 0, 0, 0, 0, // PC_MFTB + Stage1, 1, 0, 0, 0, 0, // PC_MFSR + Stage1, 1, 0, 0, 0, 0, // PC_MTSR + Stage1, 1, 0, 0, 0, 0, // PC_MFSRIN + Stage1, 1, 0, 0, 0, 0, // PC_MTSRIN + Stage1, 1, 0, 0, 0, 0, // PC_MTFSB0 + Stage1, 1, 0, 0, 0, 0, // PC_MTFSB1 + Stage1, 1, 0, 0, 0, 0, // PC_MTFSFI + Stage1, 1, 0, 0, 0, 1, // PC_SC + Stage1, 1, 0, 0, 0, 1, // PC_FSQRT + Stage1, 1, 0, 0, 0, 0, // PC_FSQRTS + Stage1, 1, 0, 0, 0, 0, // PC_TLBIA + Stage1, 1, 0, 0, 0, 0, // PC_TLBIE + Stage1, 1, 0, 0, 0, 0, // PC_TLBLD + Stage1, 1, 0, 0, 0, 0, // PC_TLBLI + Stage1, 1, 0, 0, 0, 0, // PC_TLBSYNC + Stage1, 1, 0, 0, 0, 0, // PC_TW + Stage1, 1, 0, 0, 0, 1, // PC_TRAP + Stage1, 1, 0, 0, 0, 1, // PC_TWI + Stage1, 1, 0, 0, 0, 1, // PC_OPWORD + Stage1, 1, 0, 0, 0, 1, // PC_MFROM + Stage1, 1, 0, 0, 0, 0, // PC_DSA + Stage1, 1, 0, 0, 0, 0, // PC_ESA + Stage1, 1, 0, 0, 0, 0, // PC_DCCCI + Stage1, 0, 0, 0, 0, 0, // PC_DCREAD + Stage1, 0, 0, 0, 0, 0, // PC_ICBT + Stage1, 0, 0, 0, 0, 0, // PC_ICCCI + Stage1, 0, 0, 0, 0, 0, // PC_ICREAD + Stage1, 0, 0, 0, 0, 0, // PC_RFCI + Stage1, 0, 0, 0, 0, 0, // PC_TLBRE + Stage1, 0, 0, 0, 0, 0, // PC_TLBSX + Stage1, 0, 0, 0, 0, 0, // PC_TLBWE + Stage1, 0, 0, 0, 0, 0, // PC_WRTEE + Stage1, 0, 0, 0, 0, 0, // PC_WRTEEI + Stage1, 0, 0, 0, 0, 0, // PC_MFDCR + Stage1, 0, 0, 0, 0, 0, // PC_MTDCR + Stage1, 0, 0, 0, 0, 0, // PC_DCBA + Stage1, 0, 0, 0, 0, 0, // PC_DSS + BranchUnit, 0, 0, 0, 0, 0, // PC_DSSALL + BranchUnit, 0, 0, 0, 0, 0, // PC_DST + BranchUnit, 0, 0, 0, 0, 0, // PC_DSTT + BranchUnit, 0, 0, 0, 0, 0, // PC_DSTST + BranchUnit, 0, 0, 0, 0, 0, // PC_DSTSTT + BranchUnit, 0, 0, 0, 0, 0, // PC_LVEBX + BranchUnit, 0, 0, 0, 0, 0, // PC_LVEHX + BranchUnit, 0, 0, 0, 0, 0, // PC_LVEWX + BranchUnit, 0, 0, 0, 0, 0, // PC_LVSL + BranchUnit, 0, 0, 0, 0, 0, // PC_LVSR + BranchUnit, 0, 0, 0, 0, 0, // PC_LVX + BranchUnit, 0, 0, 0, 0, 0, // PC_LVXL + BranchUnit, 0, 0, 0, 0, 0, // PC_STVEBX + BranchUnit, 0, 0, 0, 0, 0, // PC_STVEHX + BranchUnit, 0, 0, 0, 0, 0, // PC_STVEWX + BranchUnit, 0, 0, 0, 0, 0, // PC_STVX + BranchUnit, 0, 0, 0, 0, 0, // PC_STVXL + BranchUnit, 0, 0, 0, 0, 0, // PC_MFVSCR + BranchUnit, 0, 0, 0, 0, 0, // PC_MTVSCR + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDCUW + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDSBS + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDSHS + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDSWS + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUBM + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUBS + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUHM + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUHS + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUWM + BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUWS + BranchUnit, 0, 0, 0, 0, 0, // PC_VAND + BranchUnit, 0, 0, 0, 0, 0, // PC_VANDC + BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGSB + BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGSH + BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGSW + BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGUB + BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGUH + BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGUW + BranchUnit, 0, 0, 0, 0, 0, // PC_VCFSX + BranchUnit, 0, 0, 0, 0, 0, // PC_VCFUX + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPBFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPEQFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPEQUB + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPEQUH + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPEQUW + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGEFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTSB + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTSH + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTSW + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTUB + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTUH + BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTUW + BranchUnit, 0, 0, 0, 0, 0, // PC_VCTSXS + BranchUnit, 0, 0, 0, 0, 0, // PC_VCTUXS + BranchUnit, 0, 0, 0, 0, 0, // PC_VEXPTEFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VLOGEFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXSB + BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXSH + BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXSW + BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXUB + BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXUH + BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXUW + BranchUnit, 0, 0, 0, 0, 0, // PC_VMINFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VMINSB + BranchUnit, 0, 0, 0, 0, 0, // PC_VMINSH + BranchUnit, 0, 0, 0, 0, 0, // PC_VMINSW + BranchUnit, 0, 0, 0, 0, 0, // PC_VMINUB + BranchUnit, 0, 0, 0, 0, 0, // PC_VMINUH + BranchUnit, 0, 0, 0, 0, 0, // PC_VMINUW + BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGHB + BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGHH + BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGHW + BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGLB + BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGLH + BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGLW + BranchUnit, 0, 0, 0, 0, 0, // PC_VMULESB + BranchUnit, 0, 0, 0, 0, 0, // PC_VMULESH + BranchUnit, 0, 0, 0, 0, 0, // PC_VMULEUB + BranchUnit, 0, 0, 0, 0, 0, // PC_VMULEUH + BranchUnit, 0, 0, 0, 0, 0, // PC_VMULOSB + BranchUnit, 0, 0, 0, 0, 0, // PC_VMULOSH + BranchUnit, 0, 0, 0, 0, 0, // PC_VMULOUB + BranchUnit, 0, 0, 0, 0, 0, // PC_VMULOUH + BranchUnit, 0, 0, 0, 0, 0, // PC_VNOR + BranchUnit, 0, 0, 0, 0, 0, // PC_VOR + BranchUnit, 0, 0, 0, 0, 0, // PC_VPKPX + BranchUnit, 0, 0, 0, 0, 0, // PC_VPKSHSS + BranchUnit, 0, 0, 0, 0, 0, // PC_VPKSHUS + BranchUnit, 0, 0, 0, 0, 0, // PC_VPKSWSS + BranchUnit, 0, 0, 0, 0, 0, // PC_VPKSWUS + BranchUnit, 0, 0, 0, 0, 0, // PC_VPKUHUM + BranchUnit, 0, 0, 0, 0, 0, // PC_VPKUHUS + BranchUnit, 0, 0, 0, 0, 0, // PC_VPKUWUM + BranchUnit, 0, 0, 0, 0, 0, // PC_VPKUWUS + BranchUnit, 0, 0, 0, 0, 0, // PC_VREFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VRFIM + BranchUnit, 0, 0, 0, 0, 0, // PC_VRFIN + BranchUnit, 0, 0, 0, 0, 0, // PC_VRFIP + BranchUnit, 0, 0, 0, 0, 0, // PC_VRFIZ + BranchUnit, 0, 0, 0, 0, 0, // PC_VRLB + BranchUnit, 0, 0, 0, 0, 0, // PC_VRLH + BranchUnit, 0, 0, 0, 0, 0, // PC_VRLW + BranchUnit, 0, 0, 0, 0, 0, // PC_VRSQRTEFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VSL + BranchUnit, 0, 0, 0, 0, 0, // PC_VSLB + BranchUnit, 0, 0, 0, 0, 0, // PC_VSLH + BranchUnit, 0, 0, 0, 0, 0, // PC_VSLO + BranchUnit, 0, 0, 0, 0, 0, // PC_VSLW + BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTB + BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTH + BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTW + BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTISB + BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTISH + BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTISW + BranchUnit, 0, 0, 0, 0, 0, // PC_VSR + BranchUnit, 0, 0, 0, 0, 0, // PC_VSRAB + BranchUnit, 0, 0, 0, 0, 0, // PC_VSRAH + BranchUnit, 0, 0, 0, 0, 0, // PC_VSRAW + BranchUnit, 0, 0, 0, 0, 0, // PC_VSRB + BranchUnit, 0, 0, 0, 0, 0, // PC_VSRH + BranchUnit, 0, 0, 0, 0, 0, // PC_VSRO + BranchUnit, 0, 0, 0, 0, 0, // PC_VSRW + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBCUW + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBSBS + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBSHS + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBSWS + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUBM + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUBS + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUHM + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUHS + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUWM + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUWS + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUMSWS + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUM2SWS + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUM4SBS + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUM4SHS + BranchUnit, 0, 0, 0, 0, 0, // PC_VSUM4UBS + BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKHPX + BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKHSB + BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKHSH + BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKLPX + BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKLSB + BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKLSH + BranchUnit, 0, 0, 0, 0, 0, // PC_VXOR + BranchUnit, 0, 0, 0, 0, 0, // PC_VMADDFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VMHADDSHS + BranchUnit, 0, 0, 0, 0, 0, // PC_VMHRADDSHS + BranchUnit, 0, 0, 0, 0, 0, // PC_VMLADDUHM + BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMMBM + BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMSHM + BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMSHS + BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMUBM + BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMUHM + BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMUHS + BranchUnit, 0, 0, 0, 0, 0, // PC_VNMSUBFP + BranchUnit, 0, 0, 0, 0, 0, // PC_VPERM + BranchUnit, 0, 0, 0, 0, 0, // PC_VSEL + BranchUnit, 0, 0, 0, 0, 0, // PC_VSLDOI + BranchUnit, 0, 0, 0, 0, 0, // PC_VMR + BranchUnit, 0, 0, 0, 0, 0, // PC_VMRP + BranchUnit, 0, 0, 0, 0, 0, // PC_SLE + BranchUnit, 0, 0, 0, 0, 0, // PC_SLEQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SLIQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SLLIQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SLLQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SLQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SRAIQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SRAQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SRE + BranchUnit, 0, 0, 0, 0, 0, // PC_SREA + BranchUnit, 0, 0, 0, 0, 0, // PC_SREQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SRIQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SRLIQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SRLQ + BranchUnit, 0, 0, 0, 0, 0, // PC_SRQ + BranchUnit, 0, 0, 0, 0, 0, // PC_MASKG + BranchUnit, 0, 0, 0, 0, 0, // PC_MASKIR + BranchUnit, 0, 0, 0, 0, 0, // PC_LSCBX + BranchUnit, 0, 0, 0, 0, 0, // PC_DIV + BranchUnit, 0, 0, 0, 0, 0, // PC_DIVS + BranchUnit, 0, 0, 0, 0, 0, // PC_DOZ + BranchUnit, 0, 0, 0, 0, 0, // PC_MUL + BranchUnit, 0, 0, 0, 0, 0, // PC_NABS + BranchUnit, 0, 0, 0, 0, 0, // PC_ABS + BranchUnit, 0, 0, 0, 0, 0, // PC_CLCS + BranchUnit, 0, 0, 0, 0, 0, // PC_DOZI + BranchUnit, 0, 0, 0, 0, 0, // PC_RLMI + BranchUnit, 0, 0, 0, 0, 0, // PC_RRIB +}; + +static void advance(int firstStage, int oldStage, int newStage) { + PCode *instr = pipeline[oldStage].instr; + int cycles = instruction_timing[instr->op].cycles[newStage - firstStage]; + pipeline[newStage].instr = instr; + pipeline[newStage].remaining = cycles; + pipeline[oldStage].instr = NULL; +} + +static void assign_completion_buffer(PCode *instr) { + completionbuffers.used++; + completionbuffers.free--; + completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr; + completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0; + completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries; +} + +static void complete_instruction(int stage) { + PCode *instr = pipeline[stage].instr; + int buf = 0; + while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr) + buf++; + + completionbuffers.entries[buf].completed = 1; + pipeline[stage].instr = NULL; +} + +static void retire_instruction(void) { + completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL; + completionbuffers.used--; + completionbuffers.free++; + completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries; +} + +static int latency(PCode *instr) { + int cycles = instruction_timing[instr->op].latency; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + cycles += 2; + if (instr->op == PC_LMW || instr->op == PC_STMW) + cycles += instr->argCount - 2; + return cycles; +} + +static void initialize(void) { + int stage; + int i; + + for (stage = 0; stage < NumStages; stage++) + pipeline[stage].instr = NULL; + + completionbuffers.free = MaxEntries; + completionbuffers.used = 0; + completionbuffers.nextToRetire = 0; + completionbuffers.nextFreeSlot = 0; + for (i = 0; i < MaxEntries; i++) + completionbuffers.entries[i].instr = NULL; +} + +static int can_issue(PCode *instr) { + int stage; + + if (completionbuffers.free == 0) + return 0; + + stage = instruction_timing[instr->op].stage; + if (pipeline[stage].instr) + return 0; + + if ((instr->flags & fIsWrite) && pipeline[LSU2].instr && (pipeline[LSU2].instr->flags & fIsWrite)) + return 0; + + return 1; +} + +static void issue(PCode *instr) { + int stage = instruction_timing[instr->op].stage; + int cycles = instruction_timing[instr->op].cycles[0]; + assign_completion_buffer(instr); + pipeline[stage].instr = instr; + pipeline[stage].remaining = cycles; +} + +static void advance_clock(void) { + int stage; + + for (stage = 0; stage < NumStages; stage++) { + if (pipeline[stage].instr && pipeline[stage].remaining) + --pipeline[stage].remaining; + } + + if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) { + retire_instruction(); + if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) { + retire_instruction(); + } + } + + if (pipeline[Stage1].instr && pipeline[Stage1].remaining == 0) + complete_instruction(Stage1); + if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0) + complete_instruction(LSU2); + if (pipeline[BranchUnit].instr && pipeline[BranchUnit].remaining == 0) + complete_instruction(BranchUnit); + + if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr) + advance(LSU1, LSU1, LSU2); +} + +static int serializes(PCode *instr) { + return instruction_timing[instr->op].serializes; +} + +MachineInfo machine821 = { + 1, + 0, + 0, + &latency, + &initialize, + &can_issue, + &issue, + &advance_clock, + &serializes, + &default_uses_vpermute_unit +}; diff --git a/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulationAltiVec.c b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulationAltiVec.c new file mode 100644 index 0000000..d261ee9 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulationAltiVec.c @@ -0,0 +1,752 @@ +#include "compiler/Scheduler.h" +#include "compiler/CError.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" + +// https://www.nxp.com/docs/en/reference-manual/MPC7450UM.pdf + +typedef enum Stage { + BPU, // Branch Prediction Unit + + IU2_1, // Multiple-Cycle Integer Unit + IU2_2, + IU2_3, + + IU1a, // Single-Cycle Integer Unit + IU1b, // Single-Cycle Integer Unit + IU1c, // Single-Cycle Integer Unit + + LSU_1, // Load/Store Unit + LSU_2, + LSU_3, + LSU_4, + + FPU_1, // Floating-Point Unit + FPU_2, + FPU_3, + FPU_4, + + VIU1, // Vector Simple Integer Unit + + VPU_1, // Vector Permute Unit + VPU_2, + + VIU2_1, // Vector Complex Integer Unit + VIU2_2, + VIU2_3, + VIU2_4, + + VFPU_1, // Vector Floating-Point Unit + VFPU_2, + VFPU_3, + VFPU_4, + + NumStages +} Stage; + +static struct { + // the instruction currently in this pipeline stage + PCode *instr; + + // how many cycles are left for this instruction to finish + int remaining; +} pipeline_altivec[NumStages]; + +enum { + Queue0, + Queue1, + Queue2, + Queue3, + Queue4, + Queue5, + Queue6, + Queue7, + NumQueues +}; + +static int fetchqueues[NumQueues]; + +enum { + MaxEntries = 16 +}; + +static struct { + // how many entries remain unused in the queue + unsigned int free; + + // how many entries are currently used in the queue + unsigned int used; + + // the index of the next instruction that will be retired + unsigned int nextToRetire; + + // the index of the next free slot that will be used when an instruction is dispatched + unsigned int nextFreeSlot; + + // circular array of entries in the completion queue + struct { + PCode *instr; + int completed; + } entries[MaxEntries]; +} completionbuffers; + +static struct { + short index; + + // the initial stage for this instruction + Stage stage; + + // the total amount of cycles required by this instruction + char latency; + + // how long it takes to finish each stage + char cycles[4]; + + // does this instruction serialise? + char serializes; + + char unused; +} instruction_timing[] = { + 0, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_B + 1, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BL + 2, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BC + 3, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCLR + 4, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCCTR + 5, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BT + 6, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BTLR + 7, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BTCTR + 8, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BF + 9, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BFLR + 10, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BFCTR + 11, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZ + 12, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZT + 13, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZF + 14, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZ + 15, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZT + 16, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZF + 17, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BLR + 18, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCTR + 19, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCTRL + 20, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BLRL + 21, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZ + 22, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZU + 23, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZX + 24, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZUX + 25, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZ + 26, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZU + 27, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZX + 28, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZUX + 29, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHA + 30, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHAU + 31, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHAX + 32, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHAUX + 33, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHBRX + 34, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZ + 35, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZU + 36, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZX + 37, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZUX + 38, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWBRX + 39, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_LMW + 40, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STB + 41, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STBU + 42, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STBX + 43, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STBUX + 44, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STH + 45, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHU + 46, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHX + 47, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHUX + 48, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHBRX + 49, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STW + 50, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWU + 51, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWX + 52, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWUX + 53, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWBRX + 54, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STMW + 55, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_DCBF + 56, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_DCBST + 57, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBT + 58, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBTST + 59, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBZ + 60, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADD + 61, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDC + 62, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDE + 63, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDI + 64, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDIC + 65, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDICR + 66, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDIS + 67, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDME + 68, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDZE + 69, IU2_1, 23, 23, 0, 0, 0, 0, 0, // PC_DIVW + 70, IU2_1, 23, 23, 0, 0, 0, 0, 0, // PC_DIVWU + 71, IU2_1, 4, 2, 2, 0, 0, 0, 0, // PC_MULHW + 72, IU2_1, 4, 2, 2, 0, 0, 0, 0, // PC_MULHWU + 73, IU2_1, 3, 1, 1, 1, 0, 0, 0, // PC_MULLI + 74, IU2_1, 4, 2, 2, 0, 0, 0, 0, // PC_MULLW + 75, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NEG + 76, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBF + 77, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFC + 78, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFE + 79, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFIC + 80, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFME + 81, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFZE + 82, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMPI + 83, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMP + 84, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMPLI + 85, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMPL + 86, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ANDI + 87, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ANDIS + 88, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ORI + 89, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ORIS + 90, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_XORI + 91, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_XORIS + 92, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_AND + 93, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_OR + 94, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_XOR + 95, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NAND + 96, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NOR + 97, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_EQV + 98, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ANDC + 99, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ORC + 100, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_EXTSB + 101, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_EXTSH + 102, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CNTLZW + 103, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_RLWINM + 104, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_RLWNM + 105, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_RLWIMI + 106, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SLW + 107, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SRW + 108, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_SRAWI + 109, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_SRAW + 110, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRAND + 111, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRANDC + 112, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CREQV + 113, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRNAND + 114, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRNOR + 115, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CROR + 116, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRORC + 117, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRXOR + 118, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MCRF + 119, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTXER + 120, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTCTR + 121, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTLR + 122, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_MTCRF + 123, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTMSR + 124, IU2_1, 3, 3, 0, 0, 0, 1, 0, // PC_MTSPR + 125, IU2_1, 3, 2, 1, 0, 0, 0, 0, // PC_MFMSR + 126, IU2_1, 3, 3, 0, 0, 0, 1, 0, // PC_MFSPR + 127, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MFXER + 128, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MFCTR + 129, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MFLR + 130, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MFCR + 131, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MFFS + 132, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MTFSF + 133, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_EIEIO + 134, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_ISYNC + 135, LSU_1, 35, 35, 0, 0, 0, 1, 0, // PC_SYNC + 136, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_RFI + 137, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_LI + 138, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_LIS + 139, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_MR + 140, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NOP + 141, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NOT + 142, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFS + 143, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFSU + 144, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFSX + 145, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFSUX + 146, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFD + 147, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFDU + 148, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFDX + 149, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFDUX + 150, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFS + 151, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFSU + 152, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFSX + 153, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFSUX + 154, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFD + 155, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFDU + 156, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFDX + 157, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFDUX + 158, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMR + 159, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FABS + 160, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNEG + 161, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNABS + 162, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FADD + 163, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FADDS + 164, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FSUB + 165, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FSUBS + 166, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMUL + 167, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMULS + 168, FPU_1, 35, 35, 0, 0, 0, 0, 0, // PC_FDIV + 169, FPU_1, 21, 21, 0, 0, 0, 0, 0, // PC_FDIVS + 170, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMADD + 171, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMADDS + 172, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMSUB + 173, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMSUBS + 174, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMADD + 175, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMADDS + 176, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMSUB + 177, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMSUBS + 178, FPU_1, 14, 14, 0, 0, 0, 0, 0, // PC_FRES + 179, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FRSQRTE + 180, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FSEL + 181, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FRSP + 182, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCTIW + 183, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCTIWZ + 184, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCMPU + 185, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCMPO + 186, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_LWARX + 187, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LSWI + 188, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_LSWX + 189, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STFIWX + 190, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STSWI + 191, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STSWX + 192, LSU_1, 3, 1, 1, 1, 0, 1, 0, // PC_STWCX + 193, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_ECIWX + 194, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_ECOWX + 195, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_DCBI + 196, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_ICBI + 197, IU2_1, 5, 5, 0, 0, 0, 1, 0, // PC_MCRFS + 198, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MCRXR + 199, IU2_1, 5, 5, 0, 0, 0, 0, 0, // PC_MFTB + 200, IU2_1, 4, 1, 3, 0, 0, 0, 0, // PC_MFSR + 201, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTSR + 202, IU2_1, 4, 1, 3, 0, 0, 0, 0, // PC_MFSRIN + 203, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTSRIN + 204, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MTFSB0 + 205, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MTFSB1 + 206, FPU_1, 5, 5, 0, 0, 0, 0, 0, // PC_MTFSFI + 207, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_SC + 208, FPU_1, 1, 1, 0, 0, 0, 0, 0, // PC_FSQRT + 209, FPU_1, 1, 1, 0, 0, 0, 0, 0, // PC_FSQRTS + 210, LSU_1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBIA + 211, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_TLBIE + 212, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_TLBLD + 213, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_TLBLI + 214, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_TLBSYNC + 215, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_TW + 216, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_TRAP + 217, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_TWI + 218, IU1a, 1, 1, 0, 0, 0, 1, 0, // PC_OPWORD + 219, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_MFROM + 220, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_DSA + 221, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ESA + 222, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_DCCCI + 223, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_DCREAD + 224, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_ICBT + 225, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_ICCCI + 226, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_ICREAD + 227, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_RFCI + 228, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_TLBRE + 229, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_TLBSX + 230, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_TLBWE + 231, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_WRTEE + 232, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_WRTEEI + 233, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_MFDCR + 234, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_MTDCR + 235, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBA + 236, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSS + 237, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSSALL + 238, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DST + 239, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSTT + 240, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSTST + 241, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSTSTT + 242, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVEBX + 243, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVEHX + 244, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVEWX + 245, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVSL + 246, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVSR + 247, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVX + 248, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVXL + 249, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVEBX + 250, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVEHX + 251, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVEWX + 252, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVX + 253, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVXL + 254, VFPU_1, 2, 2, 0, 0, 0, 1, 0, // PC_MFVSCR + 255, VFPU_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTVSCR + 256, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDCUW + 257, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VADDFP + 258, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSBS + 259, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSHS + 260, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSWS + 261, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUBM + 262, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUBS + 263, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUHM + 264, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUHS + 265, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUWM + 266, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUWS + 267, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAND + 268, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VANDC + 269, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSB + 270, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSH + 271, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSW + 272, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUB + 273, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUH + 274, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUW + 275, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCFSX + 276, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCFUX + 277, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPBFP + 278, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPEQFP + 279, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUB + 280, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUH + 281, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUW + 282, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPGEFP + 283, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPGTFP + 284, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSB + 285, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSH + 286, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSW + 287, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUB + 288, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUH + 289, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUW + 290, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCTSXS + 291, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCTUXS + 292, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VEXPTEFP + 293, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VLOGEFP + 294, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMAXFP + 295, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSB + 296, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSH + 297, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSW + 298, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUB + 299, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUH + 300, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUW + 301, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMINFP + 302, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSB + 303, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSH + 304, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSW + 305, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUB + 306, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUH + 307, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUW + 308, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGHB + 309, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGHH + 310, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGHW + 311, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGLB + 312, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGLH + 313, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGLW + 314, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULESB + 315, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULESH + 316, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULEUB + 317, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULEUH + 318, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOSB + 319, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOSH + 320, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOUB + 321, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOUH + 322, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VNOR + 323, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VOR + 324, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKPX + 325, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSHSS + 326, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSHUS + 327, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSWSS + 328, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSWUS + 329, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUHUM + 330, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUHUS + 331, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUWUM + 332, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUWUS + 333, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VREFP + 334, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIM + 335, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIN + 336, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIP + 337, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIZ + 338, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VRLB + 339, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VRLH + 340, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VRLW + 341, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRSQRTEFP + 342, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSL + 343, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSLB + 344, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSLH + 345, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSLO + 346, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSLW + 347, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTB + 348, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTH + 349, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTW + 350, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTISB + 351, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTISH + 352, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTISW + 353, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSR + 354, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAB + 355, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAH + 356, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAW + 357, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRB + 358, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRH + 359, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSRO + 360, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRW + 361, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBCUW + 362, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUBFP + 363, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSBS + 364, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSHS + 365, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSWS + 366, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUBM + 367, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUBS + 368, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUHM + 369, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUHS + 370, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUWM + 371, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUWS + 372, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUMSWS + 373, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM2SWS + 374, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM4SBS + 375, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM4SHS + 376, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM4UBS + 377, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKHPX + 378, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKHSB + 379, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKHSH + 380, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKLPX + 381, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKLSB + 382, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKLSH + 383, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VXOR + 384, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMADDFP + 385, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMHADDSHS + 386, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMHRADDSHS + 387, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMLADDUHM + 388, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMMBM + 389, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMSHM + 390, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMSHS + 391, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMUBM + 392, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMUHM + 393, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMUHS + 394, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VNMSUBFP + 395, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPERM + 396, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSEL + 397, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSLDOI + 398, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMR + 399, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRP + -1, IU2_1, 1, 1, 0, 0, 0, 1, 0 +}; + +enum { NumPipelineUnits = 6 }; +static struct { + Stage start, end; +} pipeline_units[8] = { + IU2_1, IU2_3, + LSU_1, LSU_4, + FPU_1, FPU_4, + VPU_1, VPU_2, + VIU2_1, VIU2_4, + VFPU_1, VFPU_4 +}; + +enum { NumFinalStages = 11 }; +static Stage finalstages[16] = { + BPU, IU2_3, IU1a, IU1b, + IU1c, LSU_4, FPU_4, VIU1, + VPU_2, VIU2_4, VFPU_4 +}; + +// forward decl +static void complete_instruction(int stage); + +static void advance(int firstStage, int oldStage, int newStage) { + PCode *instr = pipeline_altivec[oldStage].instr; + int cycles = instruction_timing[instr->op].cycles[newStage - firstStage]; + pipeline_altivec[newStage].instr = instr; + pipeline_altivec[newStage].remaining = cycles; + pipeline_altivec[oldStage].instr = NULL; + pipeline_altivec[oldStage].remaining = 0; + if (cycles == 0) + complete_instruction(newStage); +} + +static void assign_completion_buffer(PCode *instr) { + completionbuffers.used++; + completionbuffers.free--; + completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr; + completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0; + completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries; +} + +static void complete_instruction(int stage) { + PCode *instr = pipeline_altivec[stage].instr; + int buf = 0; + while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr) + buf++; + + completionbuffers.entries[buf].completed = 1; + pipeline_altivec[stage].instr = NULL; +} + +static void retire_instruction(void) { + completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL; + completionbuffers.entries[completionbuffers.nextToRetire].completed = 0; + completionbuffers.used--; + completionbuffers.free++; + completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries; +} + +static int latency(PCode *instr) { + int cycles = instruction_timing[instr->op].latency; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + cycles += 1; + if (instr->op == PC_LMW || instr->op == PC_STMW) + cycles += instr->argCount - 2; + return cycles; +} + +static void initialize(void) { + int stage; + int i; + + fetchqueues[Queue0] = 1; + for (i = 1; i < NumQueues; i++) + fetchqueues[i] = 0; + + for (stage = 0; stage < NumStages; stage++) + pipeline_altivec[stage].instr = NULL; + + completionbuffers.free = MaxEntries; + completionbuffers.used = 0; + completionbuffers.nextToRetire = 0; + completionbuffers.nextFreeSlot = 0; + for (i = 0; i < MaxEntries; i++) { + completionbuffers.entries[i].instr = NULL; + completionbuffers.entries[i].completed = 0; + } +} + +static int can_issue(PCode *instr) { + int stage; + + if (completionbuffers.free == 0) + return 0; + + stage = instruction_timing[instr->op].stage; + + if (stage == IU1a) { + int isClear1 = !pipeline_altivec[IU1a].instr; + int isClear2 = !pipeline_altivec[IU1b].instr; + if (!isClear1 && !isClear2) + return 0; + } else { + if (pipeline_altivec[stage].instr) + return 0; + } + + if (fetchqueues[Queue1] <= 0) + return 0; + + if (stage == FPU_1) { + if (fetchqueues[Queue2] < 1 || fetchqueues[Queue5] >= 1) + return 0; + } else if (stage >= VIU1 && stage <= VFPU_1) { + if (fetchqueues[Queue4] < 1 || fetchqueues[Queue7] >= 2) + return 0; + } else if (stage != BPU) { + if (fetchqueues[Queue3] < 1 || fetchqueues[Queue6] >= 3) + return 0; + } + + return 1; +} + +static void issue(PCode *instr) { + int stage = instruction_timing[instr->op].stage; + int cycles = instruction_timing[instr->op].cycles[0]; + assign_completion_buffer(instr); + + CError_ASSERT(879, --fetchqueues[Queue1] >= 0); + + if (stage == FPU_1) { + fetchqueues[Queue2]--; + fetchqueues[Queue5]++; + } else if (stage >= VIU1 && stage <= VFPU_1) { + fetchqueues[Queue4]--; + fetchqueues[Queue7]++; + } else if (stage != BPU) { + fetchqueues[Queue3]--; + fetchqueues[Queue6]++; + } + + fetchqueues[Queue2] = (fetchqueues[Queue1] < fetchqueues[Queue2]) ? fetchqueues[Queue1] : fetchqueues[Queue2]; + fetchqueues[Queue3] = (fetchqueues[Queue1] < fetchqueues[Queue3]) ? fetchqueues[Queue1] : fetchqueues[Queue3]; + fetchqueues[Queue4] = (fetchqueues[Queue1] < fetchqueues[Queue4]) ? fetchqueues[Queue1] : fetchqueues[Queue4]; + + if (stage == IU1a) { + if (!pipeline_altivec[IU1a].instr) + stage = IU1a; + else if (!pipeline_altivec[IU1b].instr) + stage = IU1b; + else if (!pipeline_altivec[IU1c].instr) + stage = IU1c; + } + + pipeline_altivec[stage].instr = instr; + pipeline_altivec[stage].remaining = cycles; +} + +static void advance_clock(void) { + int num; + int i; + unsigned int unit; + + for (i = 0; i < NumStages; i++) { + if (pipeline_altivec[i].instr && pipeline_altivec[i].remaining) + --pipeline_altivec[i].remaining; + } + + for (i = 0; i < 3; i++) { + if (completionbuffers.used == 0) + break; + if (completionbuffers.entries[completionbuffers.nextToRetire].completed == 0) + break; + retire_instruction(); + } + + unit = 0; + do { + if (pipeline_altivec[finalstages[unit]].instr && pipeline_altivec[finalstages[unit]].remaining == 0) + complete_instruction(finalstages[unit]); + } while (++unit < NumFinalStages); + + unit = 0; + do { + Stage first; + Stage current; + first = pipeline_units[unit].start; + for (current = first; current < pipeline_units[unit].end; current++) { + if (pipeline_altivec[current].instr && pipeline_altivec[current].remaining == 0 && !pipeline_altivec[current + 1].instr) + advance(first, current, current + 1); + } + } while (++unit < NumPipelineUnits); + + fetchqueues[Queue5] = 0; + fetchqueues[Queue6] = 0; + fetchqueues[Queue7] = 0; + +#define CHEAP_MIN(a, b) ( ((a) < (b)) ? (a) : (b) ) + num = 2 - fetchqueues[Queue2]; + num += 6 - fetchqueues[Queue3]; + num += 4 - fetchqueues[Queue4]; + num = (num > 3) ? 3 : num; + num = (completionbuffers.free < num) ? completionbuffers.free : num; + if (fetchqueues[Queue0] < num) + num = fetchqueues[Queue0]; + + fetchqueues[Queue1] += num; + fetchqueues[Queue0] -= num; + + fetchqueues[Queue2] = CHEAP_MIN(fetchqueues[Queue1], CHEAP_MIN(2, fetchqueues[Queue2] + num)); + fetchqueues[Queue3] = CHEAP_MIN(fetchqueues[Queue1], CHEAP_MIN(6, fetchqueues[Queue3] + num)); + fetchqueues[Queue4] = CHEAP_MIN(fetchqueues[Queue1], CHEAP_MIN(4, fetchqueues[Queue4] + num)); + + CError_ASSERT(991, fetchqueues[Queue1] <= (fetchqueues[Queue2] + fetchqueues[Queue3] + fetchqueues[Queue4])); + + if (fetchqueues[Queue0] <= 8) + fetchqueues[Queue0] += 4; +} + +static int serializes(PCode *instr) { + return instruction_timing[instr->op].serializes; +} + +static int uses_vpermute_unit_altivec(PCode *instr) { + return instruction_timing[instr->op].stage == VPU_1; +} + +MachineInfo machine7450 = { + 6, + 1, + 4, + &latency, + &initialize, + &can_issue, + &issue, + &advance_clock, + &serializes, + &uses_vpermute_unit_altivec +}; diff --git a/compiler_and_linker/BackEnd/PowerPC/Scheduler/Scheduler.c b/compiler_and_linker/BackEnd/PowerPC/Scheduler/Scheduler.c new file mode 100644 index 0000000..23b580f --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/Scheduler/Scheduler.c @@ -0,0 +1,547 @@ +#include "compiler/Scheduler.h" +#include "compiler/CError.h" +#include "compiler/CParser.h" +#include "compiler/Alias.h" +#include "compiler/CompilerTools.h" +#include "compiler/PCode.h" +#include "compiler/Registers.h" + +#ifdef __MWERKS__ +#pragma options align=mac68k +#endif +typedef struct DGNode { + struct DGNode *x0; + struct DGNode *x4; + struct DGSuccessor *successors; + PCode *instr; + UInt16 x10; + UInt16 x12; + UInt16 x14; + UInt16 x16; + short predCount; +} DGNode; + +typedef struct DGSuccessor { + struct DGSuccessor *next; + DGNode *node; + UInt16 x8; +} DGSuccessor; + +typedef struct DGNodeList { + struct DGNodeList *next; + DGNode *node; +} DGNodeList; +#ifdef __MWERKS__ +#pragma options align=reset +#endif + +static DGNodeList **register_uses[RegClassMax]; +static DGNodeList **register_defs[RegClassMax]; +static DGNodeList *memory_uses; +static DGNodeList *memory_defs; +static DGNodeList *side_effects; +static DGNodeList *volatile_refs; +static DGNode *defaultsuccessor; +static UInt16 criticalpath; +static MachineInfo *MI; + +static void initresources(void) { + int rclass; + int i; + + for (rclass = 0; (char) rclass < RegClassMax; rclass++) { + register_uses[(char) rclass] = oalloc(sizeof(DGNodeList *) * used_virtual_registers[(char) rclass]); + register_defs[(char) rclass] = oalloc(sizeof(DGNodeList *) * used_virtual_registers[(char) rclass]); + for (i = 0; i < used_virtual_registers[(char) rclass]; i++) { + register_uses[(char) rclass][i] = register_defs[(char) rclass][i] = NULL; + } + } + + memory_uses = memory_defs = NULL; + side_effects = NULL; + volatile_refs = NULL; + criticalpath = 0; +} + +static DGNode *makedgnode(PCode *instr) { + DGNode *node; + + node = oalloc(sizeof(DGNode)); + node->x0 = NULL; + node->x4 = NULL; + node->successors = NULL; + node->instr = instr; + node->x10 = node->x16 = MI->latency(instr); + node->x12 = 0; + node->x14 = 0; + node->predCount = 0; + return node; +} + +static DGNode *adddgnode(DGNode *head, DGNode *node) { + if (head) + head->x4 = node; + node->x0 = head; + return node; +} + +static DGNode *removedgnode(DGNode *head, DGNode *node) { + if (node->x4) + node->x4->x0 = node->x0; + else + head = node->x0; + + if (node->x0) + node->x0->x4 = node->x4; + + return head; +} + +static void addtolist(DGNodeList **list, DGNode *node) { + DGNodeList *entry = oalloc(sizeof(DGNodeList)); + entry->node = node; + entry->next = *list; + *list = entry; +} + +static DGNodeList *makedglistnode(DGNode *node) { + DGNodeList *list = oalloc(sizeof(DGNodeList)); + list->next = NULL; + list->node = node; + return list; +} + +int is_same_operand(PCodeArg *a, PCodeArg *b) { + if (a->kind != b->kind) + return 0; + + switch (a->kind) { + case PCOp_IMMEDIATE: + if (a->data.imm.value != b->data.imm.value) + return 0; + break; + case PCOp_REGISTER: + if ((char) a->arg != (char) b->arg) + return 0; + if (a->data.reg.reg != b->data.reg.reg) + return 0; + break; + case PCOp_MEMORY: + if (a->data.mem.offset != b->data.mem.offset) + return 0; + if (a->data.mem.obj != b->data.mem.obj) + return 0; + break; + case PCOp_LABEL: + if (a->data.label.label != b->data.label.label) + return 0; + break; + } + + return 1; +} + +static void addsuccessor(DGNode *a, DGNode *b, Boolean flag) { + int v6; + int r29; + DGSuccessor *succ; + + if (flag) + v6 = a->x10; + else + v6 = 0; + + if (a != b) { + r29 = (v6 > 0) ? v6 : 0; + for (succ = a->successors; succ; succ = succ->next) { + if (succ->node == b) { + if (succ->x8 < r29) { + succ->x8 = r29; + if (b->x16 + succ->x8 > a->x16) + a->x16 = b->x16 + succ->x8; + } + return; + } + } + + succ = oalloc(sizeof(DGSuccessor)); + succ->node = b; + succ->next = a->successors; + a->successors = succ; + succ->x8 = r29; + + if (flag && (succ->node->instr->flags & fIsBranch)) + succ->x8 += MI->x8; + + b->predCount++; + + if (b->x16 + succ->x8 > a->x16) + a->x16 = b->x16 + succ->x8; + } +} + +static void serializeall(DGNode *nodes, DGNode *node) { + DGNode *scan; + + for (scan = nodes; scan; scan = scan->x0) + addsuccessor(node, scan, 0); +} + +static void serializelist(DGNode *node, DGNodeList *list) { + while (list) { + if (list->node != node) + addsuccessor(node, list->node, 0); + list = list->next; + } +} + +static void serializeregister(int rclass, DGNode *node, DGNodeList **defs, DGNodeList **uses, int isWrite) { + DGNodeList *list; + + if (isWrite) { + for (list = *uses; list; list = list->next) { + if (list->node != node) + addsuccessor(node, list->node, 1); + } + for (list = *defs; list; list = list->next) { + if (list->node != node) + addsuccessor(node, list->node, ((char) rclass == RegClass_SPR) || (MI->x4 == 0)); + } + + list = makedglistnode(node); + list->next = *defs; + *defs = list; + } else { + for (list = *defs; list; list = list->next) { + if (list->node != node) + addsuccessor(node, list->node, ((char) rclass == RegClass_SPR) || (MI->x4 == 0)); + } + + list = makedglistnode(node); + list->next = *uses; + *uses = list; + } +} + +static void serialize_load(DGNode *node) { + DGNodeList *list; + + for (list = memory_defs; list; list = list->next) { + if (may_alias(node->instr, list->node->instr)) + addsuccessor(node, list->node, 1); + } + + addtolist(&memory_uses, node); +} + +static void serialize_store(DGNode *node) { + DGNodeList *list; + + for (list = memory_uses; list; list = list->next) { + if (may_alias(node->instr, list->node->instr)) + addsuccessor(node, list->node, 1); + } + for (list = memory_defs; list; list = list->next) { + if (may_alias(node->instr, list->node->instr)) + addsuccessor(node, list->node, 1); + } + + addtolist(&memory_defs, node); + if (node->instr->flags & fPCodeFlag40000) + addtolist(&memory_uses, node); +} + +static void findsuccessors(DGNode *nodes, DGNode *node) { + PCode *instr; + PCodeArg *op; + int i; + + instr = node->instr; + for (i = 0, op = instr->args; i < instr->argCount; i++, op++) { + switch (op->kind) { + case PCOp_IMMEDIATE: + case PCOp_MEMORY: + break; + case PCOp_REGISTER: + if ( + op->data.reg.reg < 0 || + op->data.reg.reg > used_virtual_registers[(char) op->arg] + ) + { + CError_FATAL(491); + } + + if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) { + if (op->data.reg.reg == Register2) + break; + if (op->data.reg.reg == Register0 && !(op->data.reg.effect & (EffectRead | EffectWrite))) + break; + } + + serializeregister( + op->arg, + node, + ®ister_defs[(char) op->arg][op->data.reg.reg], + ®ister_uses[(char) op->arg][op->data.reg.reg], + op->data.reg.effect & EffectWrite + ); + + break; + } + } + + if (instr->flags & (fIsRead | fPCodeFlag20000)) + serialize_load(node); + else if (instr->flags & (fIsWrite | fPCodeFlag40000)) + serialize_store(node); + + if (instr->flags & fIsVolatile) { + serializelist(node, volatile_refs); + addtolist(&volatile_refs, node); + } + + if ( + ((instr->flags & fIsCall | fIsBranch) && (instr->flags & fLink)) || + (instr->flags & fSideEffects) || + MI->serializes(instr) + ) + { + serializeall(nodes, node); + addtolist(&side_effects, node); + } + + if (side_effects) + serializelist(node, side_effects); + + if (!node->successors && defaultsuccessor) + addsuccessor(node, defaultsuccessor, 0); + + if (node->x16 > criticalpath) + criticalpath = node->x16; +} + +static void computedeadlines(DGNode *nodes) { + while (nodes) { + nodes->x14 = criticalpath - nodes->x16; + nodes = nodes->x0; + } +} + +static int uncovering(DGNode *node) { + int counter; + DGSuccessor *succ; + + counter = 0; + for (succ = node->successors; succ; succ = succ->next) { + if (succ->node->predCount == 1) + counter++; + } + + return counter; +} + +static DGNode *selectinstruction(DGNode *nodes, UInt16 counter) { + DGNode *node; + DGNode *node2; + int a; + int b; + + node = nodes; + while (node) { + if (node->predCount == 0 && node->x12 <= counter && MI->can_issue(node->instr)) + break; + node = node->x0; + } + + if (!node) + return NULL; + + for (node2 = node->x0; node2; node2 = node2->x0) { + if ( + node2->predCount == 0 && + node2->x12 <= counter && + MI->can_issue(node2->instr) && + (node->x14 > counter || node2->x14 <= counter) + ) + { + if (node->x14 > counter && node2->x14 <= counter) { + node = node2; + continue; + } + + if ((a = uncovering(node)) > (b = uncovering(node2))) + continue; + + if (a < b) { + node = node2; + continue; + } + + if (node->x16 > node2->x16) + continue; + + if (node->x16 < node2->x16) { + node = node2; + continue; + } + + if (coloring) { + if (opcodeinfo[node->instr->op].x9 < opcodeinfo[node2->instr->op].x9) + continue; + + if (opcodeinfo[node->instr->op].x9 > opcodeinfo[node2->instr->op].x9) + node = node2; + } + } + } + + return node; +} + +static void holdoffsuccessors(DGNode *node, UInt16 counter) { + DGSuccessor *succ; + DGNode *n; + + for (succ = node->successors; succ; succ = succ->next) { + n = succ->node; + n->predCount--; + if (n->x12 < counter + succ->x8) + n->x12 = counter + succ->x8; + } +} + +static void scheduleblock(PCodeBlock *block) { + DGNode *node; + UInt16 counter; + PCode *instr; + UInt16 i; + DGNode *head; + + initresources(); + defaultsuccessor = NULL; + head = NULL; + + for (instr = block->lastPCode; instr; instr = instr->prevPCode) { + DGNode *n = makedgnode(instr); + findsuccessors(head, n); + if (instr->flags & fIsBranch) + defaultsuccessor = n; + head = adddgnode(head, n); + } + + computedeadlines(head); + block->firstPCode = block->lastPCode = NULL; + block->pcodeCount = 0; + + MI->initialize(); + counter = 0; + while (head != NULL) { + for (i = 0; i < MI->x0; i++) { + if (head == NULL) + break; + + node = selectinstruction(head, counter); + if (!node) + break; + + instr = node->instr; + if (node->successors) + holdoffsuccessors(node, counter); + + appendpcode(block, instr); + MI->issue(instr); + head = removedgnode(head, node); + } + + MI->advance_clock(); + counter++; + } + + freeoheap(); +} + +void scheduleinstructions(Boolean flag) { + PCodeBlock *block; + int cpu; + + cpu = copts.scheduling; + if (cpu == 10) { + MI = &machine7450; + } else if (copts.altivec_model != 0 || cpu == 7) { + MI = &machine7400; + } else if (cpu == 2) { + MI = &machine603; + } else if (cpu == 5) { + MI = &machine603e; + } else if (cpu == 3) { + MI = &machine604; + } else if (cpu == 6) { + MI = &machine604; + } else if (cpu == 4) { + MI = &machine750; + } else if (cpu == 1) { + MI = &machine601; + } else if (cpu == 9) { + MI = &machine821; + } else { + MI = &machine603; + } + + gather_alias_info(); + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if ( + block->pcodeCount > 2 && + (flag || !(block->flags & (fIsProlog | fIsEpilogue))) && + !(block->flags & fScheduled) + ) + { + scheduleblock(block); + block->flags |= fScheduled; + } + } +} + +int is_dependent(PCode *a, PCode *b, char rclass) { + int i; + int reg; + PCodeArg *op; + + if ( + b && + b->argCount >= 1 && + b->args[0].kind == PCOp_REGISTER && + (char) b->args[0].arg == rclass && + (b->args[0].data.reg.effect & EffectWrite) + ) + { + reg = b->args[0].data.reg.reg; + for (i = 0; i < a->argCount; i++) { + op = &a->args[i]; + if ( + op->kind == PCOp_REGISTER && + (char) op->arg == rclass && + (op->data.reg.effect & (EffectRead | EffectWrite)) && + op->data.reg.reg == reg + ) + return 1; + } + } + + return 0; +} + +int uses_vpermute_unit(PCode *instr) { + int cpu; + + cpu = copts.scheduling; + if (cpu == 10) + return machine7450.uses_vpermute_unit(instr); + if (copts.altivec_model != 0 || cpu == 7) + return machine7400.uses_vpermute_unit(instr); + return 0; +} + +int default_uses_vpermute_unit(PCode *instr) { + return 0; +} |