#include "compiler/StrengthReduction.h" #include "compiler/BitVectors.h" #include "compiler/CompilerTools.h" #include "compiler/LoopDetection.h" #include "compiler/PCode.h" #include "compiler/PCodeInfo.h" #include "compiler/Registers.h" #include "compiler/UseDefChains.h" int strengthreducedloops; static PCode *findinitializer(Loop *loop, short reg) { UInt32 *vec; PCode *best; RegUseOrDef *list; vec = usedefinfo[loop->body->blockIndex].defvec8; best = NULL; for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { if ( !(bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) && bitvectorgetbit(list->id, vec) ) { if (best) return NULL; best = Defs[list->id].pcode; } } if (best) { if (best->op == PC_LI || best->op == PC_ADDI || best->op == PC_ADD) return best; } return NULL; } static int isbasicinductionvariable(Loop *loop, short reg, SInt32 step) { RegUseOrDef *list; PCode *instr; for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { instr = Defs[list->id].pcode; if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) { if (instr->op != PC_ADDI) return 0; if (instr->args[1].data.reg.reg != reg) return 0; if (instr->args[2].data.imm.value != step) return 0; } } return 1; } static void addbasicinductionvariable(Loop *loop, short reg, SInt32 step) { BasicInductionVar *biv; RegUseOrDef *list; PCode *instr; InstrList *instrList; for (biv = loop->basicInductionVars; biv; biv = biv->next) { if (biv->reg == reg) return; } biv = oalloc(sizeof(BasicInductionVar)); biv->next = loop->basicInductionVars; loop->basicInductionVars = biv; biv->loop = loop; biv->inductionVars = NULL; biv->instrsC = NULL; biv->step = step; biv->reg = reg; for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { instr = Defs[list->id].pcode; if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) { instrList = oalloc(sizeof(InstrList)); instrList->next = biv->instrsC; biv->instrsC = instrList; instrList->instr = instr; } } biv->initializer = findinitializer(loop, reg); } static void findbasicinductionvariables(Loop *loop) { SInt16 step; BlockList *block; PCode *instr; short reg; for (block = loop->blocks; block; block = block->next) { for (instr = block->block->firstPCode; instr; instr = instr->nextPCode) { if (instr->op == PC_ADDI) { if ( (reg = instr->args[0].data.reg.reg) >= 32 && instr->args[1].data.reg.reg == reg && isbasicinductionvariable(loop, reg, step = instr->args[2].data.imm.value) ) addbasicinductionvariable(loop, reg, step); } } } } static void findallbasicinductionvariables(Loop *loop) { while (loop) { if (loop->children) findallbasicinductionvariables(loop->children); findbasicinductionvariables(loop); loop = loop->nextSibling; } } static int isinductionvariable(BasicInductionVar *biv, int useID, SInt32 *result1, short *result2, short *result3, Loop **result4) { RegUseOrDef *list; int counter; Loop *loop; Loop *scanloop; PCode *instr; instr = Uses[useID].pcode; *result2 = 0; *result3 = 0; *result4 = NULL; switch (instr->op) { case PC_MULLI: *result1 = instr->args[2].data.imm.value; break; case PC_RLWINM: if (instr->args[3].data.imm.value) return 0; if (instr->args[2].data.imm.value > 15) return 0; if (instr->args[4].data.imm.value != (31 - instr->args[2].data.imm.value)) return 0; if (PCODE_FLAG_SET_F(instr) & fRecordBit) return 0; *result1 = 1 << instr->args[2].data.imm.value; break; case PC_LBZX: case PC_LHZX: case PC_LHAX: case PC_LWZX: case PC_STBX: case PC_STHX: case PC_STWX: case PC_LFSX: case PC_LFDX: case PC_STFSX: case PC_STFDX: *result2 = 0; *result3 = 0; if (instr->args[1].data.reg.reg == biv->reg) { *result2 = 1; *result3 = 2; } else if (instr->args[2].data.reg.reg == biv->reg) { *result2 = 2; *result3 = 1; } counter = 0; for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) { if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks)) counter++; } if (counter) return 0; loop = biv->loop; for (scanloop = loop->parent; scanloop; scanloop = scanloop->parent) { counter = 0; for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) { if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, scanloop->memberblocks)) counter++; } if (!biv->initializer || bitvectorgetbit(biv->initializer->block->blockIndex, scanloop->memberblocks)) counter++; if (counter) break; loop = scanloop; } *result4 = loop; *result1 = 1; return 1; default: return 0; } counter = 0; for (list = reg_Defs[RegClass_GPR][instr->args[0].data.reg.reg]; list; list = list->next) { if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks)) counter++; } return counter == 1; } static void addinductionvariable(BasicInductionVar *biv, PCode *instr, SInt32 val1, short val2, short val3, Loop *val4) { InductionVar *iv; iv = oalloc(sizeof(InductionVar)); iv->next = biv->inductionVars; biv->inductionVars = iv; iv->basicVar = biv; iv->instr = instr; iv->instrC = NULL; iv->step = val1; iv->x18 = val2; iv->x1A = val3; iv->someloop = val4; if (instr->flags & (fIsRead | fIsWrite)) iv->x1C = -1; else iv->x1C = instr->args[0].data.reg.reg; iv->x1E = -1; } static void findnonbasicinductionvariables(Loop *loop) { BasicInductionVar *biv; RegUseOrDef *list; SInt32 result1; short result2; short result3; Loop *result4; for (biv = loop->basicInductionVars; biv; biv = biv->next) { for (list = reg_Uses[RegClass_GPR][biv->reg]; list; list = list->next) { if (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) { if (isinductionvariable(biv, list->id, &result1, &result2, &result3, &result4)) addinductionvariable(biv, Uses[list->id].pcode, result1, result2, result3, result4); } } } } static void findallnonbasicinductionvariables(Loop *loop) { while (loop) { if (loop->children) findallnonbasicinductionvariables(loop->children); if (loop->basicInductionVars) findnonbasicinductionvariables(loop); loop = loop->nextSibling; } } static void initializeinductionvariable(InductionVar *iv) { BasicInductionVar *biv; // r31 PCode *instr; // r27 PCodeBlock *preheader; // r30 SInt32 value30; // r30 short reg29; // r29 short reg26; // r26 biv = iv->basicVar; preheader = biv->loop->preheader; if (iv->x1A) { reg29 = iv->instr->args[iv->x1A].data.reg.reg; reg26 = iv->instr->args[iv->x18].data.reg.reg; instr = NULL; if ( biv->initializer && biv->initializer->op == PC_LI && biv->initializer->block == preheader ) { if (biv->initializer->args[1].data.imm.value == 0) instr = makepcode(PC_MR, iv->x1E, reg29); else if (FITS_IN_SHORT(biv->initializer->args[1].data.imm.value)) instr = makepcode(PC_ADDI, iv->x1E, reg29, 0, biv->initializer->args[1].data.imm.value); } if (!instr) instr = makepcode(PC_ADD, iv->x1E, reg29, reg26); if (biv->initializer && instr->op != PC_ADD) insertpcodeafter(biv->initializer, instr); else if (iv->someloop && iv->someloop->preheader->lastPCode) insertpcodebefore(iv->someloop->preheader->lastPCode, instr); else insertpcodebefore(preheader->lastPCode, instr); iv->instrC = instr; iv->x1C = reg29; return; } if (!biv->initializer || biv->initializer->op != PC_LI) { instr = copypcode(iv->instr); instr->args[0].data.reg.reg = iv->x1E; insertpcodebefore(preheader->lastPCode, instr); } else { value30 = biv->initializer->args[1].data.imm.value * iv->step; if (!FITS_IN_SHORT(value30)) { instr = makepcode(PC_LIS, iv->x1E, 0, HIGH_PART(value30)); insertpcodeafter(biv->initializer, instr); if (value30 != 0) insertpcodeafter(instr, makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value30))); } else { instr = makepcode(PC_LI, iv->x1E, value30); insertpcodeafter(biv->initializer, instr); } } } static void incrementinductionvariable(InductionVar *iv) { SInt32 value; BasicInductionVar *biv; PCode *instr; InstrList *list; biv = iv->basicVar; value = iv->step * biv->step; for (list = biv->instrsC; list; list = list->next) { if (!FITS_IN_SHORT(value)) { instr = makepcode(PC_ADDIS, iv->x1E, iv->x1E, 0, HIGH_PART(value)); insertpcodeafter(list->instr, instr); if (value != 0) { instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value)); insertpcodeafter(list->instr->nextPCode, instr); } } else { instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, value); insertpcodeafter(list->instr, instr); } } } static void copyinductionvariable(InductionVar *iv) { if (iv->instr->flags & (fIsRead | fIsWrite)) { iv->instr->op -= 2; iv->instr->args[1].data.reg.reg = iv->x1E; iv->instr->args[2].kind = PCOp_IMMEDIATE; iv->instr->args[2].data.imm.value = 0; iv->instr->args[2].data.imm.obj = NULL; } else { insertpcodeafter(iv->instr, makepcode(PC_MR, iv->x1C, iv->x1E)); deletepcode(iv->instr); } } static int testnestediv(InductionVar *iv, SInt32 step1, int reg, SInt32 step2, Loop *loop1, Loop *loop2) { SInt32 addend; BlockList *list; PCode *instr; PCodeArg *op; int i; if (iv->instrC && iv->x1C == reg) { if (iv->instrC->op == PC_MR) addend = 0; else if (iv->instrC->op == PC_ADDI) addend = iv->instrC->args[2].data.imm.value; else return 0; if (step2 == (addend + (step1 * iv->step * loop2->iterationCount))) { for (list = loop1->blocks; list && list->block != loop2->blocks->block; list = list->next) { for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) { op = instr->args; i = instr->argCount; while (i--) { if ( op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && op->data.reg.reg == reg ) return 0; op++; } } } return 1; } } return 0; } static void strengthreducenestediv(short reg, SInt32 step, PCode *initializer, Loop *loop) { Loop *scanloop; BasicInductionVar *biv; InductionVar *iv; PCode *instr; PCodeArg *op; int i; for (scanloop = loop->children; scanloop; scanloop = scanloop->nextSibling) { if ( scanloop->isKnownCountingLoop && scanloop->x4F && bitvectorgetbit(scanloop->body->blockIndex, loop->vec2C) ) { for (biv = scanloop->basicInductionVars; biv; biv = biv->next) { for (iv = biv->inductionVars; iv; iv = iv->next) { if (testnestediv(iv, biv->step, reg, step, loop, scanloop)) { deletepcode(iv->instrC); if (initializer) { insertpcodeafter(initializer, iv->instrC); } else if (loop->body->lastPCode) { for (instr = loop->body->lastPCode; instr; instr = instr->prevPCode) { op = instr->args; i = instr->argCount; while (i--) { if ( op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && (op->data.reg.effect & EffectWrite) && op->data.reg.reg == reg ) break; op++; } } if (instr) insertpcodeafter(instr, iv->instrC); else insertpcodebefore(loop->body->firstPCode, iv->instrC); } else { appendpcode(loop->body, iv->instrC); } } } } } } } static void strengthreducenestedbiv(BasicInductionVar *biv) { Loop *loop; InductionVar *iv; loop = biv->loop; for (iv = biv->inductionVars; iv; iv = iv->next) strengthreducenestediv(iv->x1E, iv->step * biv->step, iv->instrC, loop); strengthreducenestediv(biv->reg, biv->step, biv->initializer, loop); } static void strengthreduceinductionvariable(BasicInductionVar *biv) { int counter; InductionVar *iv; InductionVar *otherIv; short reg; counter = 0; for (iv = biv->inductionVars; iv; iv = iv->next) { if (iv->step == 1) counter++; } for (iv = biv->inductionVars; iv; iv = iv->next) { if ( (counter <= 4 || iv->step != 1) && iv->instr->block && (iv->x1A == 0 || iv->instr->args[2].kind != PCOp_IMMEDIATE) ) { if (iv->x1E == -1) { iv->x1E = used_virtual_registers[RegClass_GPR]++; initializeinductionvariable(iv); incrementinductionvariable(iv); if (iv->step == 1) { reg = iv->instr->args[iv->x1A].data.reg.reg; for (otherIv = iv->next; otherIv; otherIv = otherIv->next) { if (otherIv->x1A != 0 && otherIv->instr->args[otherIv->x1A].data.reg.reg == reg) otherIv->x1E = iv->x1E; } } else { for (otherIv = iv->next; otherIv; otherIv = otherIv->next) { if (otherIv->step == iv->step) otherIv->x1E = iv->x1E; } } } copyinductionvariable(iv); strengthreducedloops = 1; } } } #ifdef __MWERKS__ #pragma options align=mac68k #endif typedef struct BivInit { SInt32 x0; short x4; short x6; short x8; Object *xA; } BivInit; #ifdef __MWERKS__ #pragma options align=reset #endif static void calc_biv_init(BasicInductionVar *biv, BivInit *init) { PCode *instr; PCode *scan; PCodeArg *op; int i; instr = biv->initializer; init->x0 = 0; init->x4 = -1; init->x6 = -1; init->x8 = 0; init->xA = NULL; if (!biv->initializer || (biv->initializer->op != PC_ADDI && biv->initializer->op != PC_ADD)) return; if (instr->op == PC_ADDI) { if (instr->args[1].data.reg.reg == biv->reg) { init->x0 = instr->args[2].data.imm.value; for (scan = instr->prevPCode; scan; scan = scan->prevPCode) { op = scan->args; i = scan->argCount; while (i--) { if ( op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && op->data.reg.reg == biv->reg && (op->data.reg.effect & EffectWrite) ) { if (scan->op == PC_ADD) { init->x4 = scan->args[1].data.reg.reg; init->x6 = scan->args[2].data.reg.reg; } else if (scan->op == PC_ADDI) { if (scan->args[2].kind == PCOp_IMMEDIATE) { init->x4 = scan->args[1].data.reg.reg; init->x8 = scan->args[2].data.imm.value; } else if (scan->args[2].kind == PCOp_MEMORY) { init->x4 = scan->args[1].data.reg.reg; init->x8 = scan->args[2].data.mem.offset; init->xA = scan->args[2].data.mem.obj; } } return; } op++; } } } else { if (instr->args[2].kind == PCOp_IMMEDIATE) { init->x4 = instr->args[1].data.reg.reg; init->x8 = instr->args[2].data.imm.value; } else if (instr->args[2].kind == PCOp_MEMORY) { init->x4 = instr->args[1].data.reg.reg; init->x8 = instr->args[2].data.mem.offset; init->xA = instr->args[2].data.mem.obj; } } } else if (instr->op == PC_ADD) { if (instr->args[1].data.reg.reg == biv->reg) { init->x6 = instr->args[2].data.reg.reg; for (scan = instr->prevPCode; scan; scan = scan->prevPCode) { op = scan->args; i = scan->argCount; while (i--) { if ( op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && op->data.reg.reg == biv->reg && (op->data.reg.effect & EffectWrite) && scan->op == PC_ADDI ) { if (scan->args[2].kind == PCOp_IMMEDIATE) { init->x4 = scan->args[1].data.reg.reg; init->x8 = scan->args[2].data.imm.value; } else if (scan->args[2].kind == PCOp_MEMORY) { init->x4 = scan->args[1].data.reg.reg; init->x8 = scan->args[2].data.mem.offset; init->xA = scan->args[2].data.mem.obj; } return; } op++; } } } else { init->x4 = instr->args[1].data.reg.reg; init->x6 = instr->args[2].data.reg.reg; } } } static void combineinductionvariables(Loop *loop, BasicInductionVar *biv1, BasicInductionVar *biv2, SInt32 difference) { PCode *instr1; // r31 int reg1; // r30 int reg2; // r29 PCode *instr2; // r24 PCodeBlock *nextBlock; // r24 BlockList *list; PCodeArg *op; int i; PCode *instr; instr1 = NULL; instr2 = NULL; reg1 = biv1->reg; CError_ASSERT(930, reg1 >= 0); reg2 = biv2->reg; CError_ASSERT(934, reg2 >= 0); if (!FITS_IN_SHORT(difference)) return; for (list = loop->blocks; list; list = list->next) { for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) { if (instr1) { op = instr->args; i = instr->argCount; while (i--) { if ( op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && op->data.reg.reg == reg1 ) return; op++; } } if (instr->op == PC_ADDI) { if (instr->args[0].data.reg.reg == reg1) { if (instr1) return; instr1 = instr; } else if (instr->args[0].data.reg.reg == reg2) { if (instr2) return; instr2 = instr; } } } } if (loop->body->lastPCode->flags & fIsBranch) { nextBlock = NULL; for (i = 0; i < loop->body->lastPCode->argCount; i++) { if (loop->body->lastPCode->args[i].kind == PCOp_LABEL) { nextBlock = loop->body->lastPCode->args[i].data.label.label->block; break; } } if (!nextBlock) return; } else { nextBlock = loop->body->nextBlock; } deletepcode(instr1); instr1->args[1].data.reg.reg = reg2; instr1->args[2].data.imm.value = difference; if (nextBlock->firstPCode) insertpcodebefore(nextBlock->firstPCode, instr1); else appendpcode(nextBlock, instr1); biv1->reg = -1; strengthreducedloops = 1; } static void strengthreduceinductionvariables(Loop *loop) { BasicInductionVar *biv1; BasicInductionVar *biv2; BivInit init1; BivInit init2; for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) { if (biv1->inductionVars) strengthreduceinductionvariable(biv1); strengthreducenestedbiv(biv1); } for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) { if (biv1->reg != -1) { calc_biv_init(biv1, &init1); if (init1.x4 != -1) { for (biv2 = loop->basicInductionVars; biv2; biv2 = biv2->next) { if (biv2->reg != -1 && biv2 != biv1) { calc_biv_init(biv2, &init2); if ( init2.x4 != -1 && init1.x4 == init2.x4 && init1.x6 == init2.x6 && init1.x8 == init2.x8 && init1.xA == init2.xA && biv1->step == biv2->step ) { if (init1.x0 < init2.x0) { combineinductionvariables(loop, biv2, biv1, init2.x0 - init1.x0); } else { combineinductionvariables(loop, biv1, biv2, init1.x0 - init2.x0); break; } } } } } } } } static void strengthreduceallinductionvariables(Loop *loop) { while (loop) { if (loop->children) strengthreduceallinductionvariables(loop->children); if (loop->basicInductionVars) strengthreduceinductionvariables(loop); loop = loop->nextSibling; } } void strengthreduceloops(void) { strengthreducedloops = 0; if (loopsinflowgraph) { computeusedefchains(0); findallbasicinductionvariables(loopsinflowgraph); findallnonbasicinductionvariables(loopsinflowgraph); strengthreduceallinductionvariables(loopsinflowgraph); freeoheap(); } }