diff options
author | Ash Wolf <ninji@wuffs.org> | 2023-01-26 11:30:47 +0000 |
---|---|---|
committer | Ash Wolf <ninji@wuffs.org> | 2023-01-26 11:30:47 +0000 |
commit | 094b96ca1df4a035b5f93c351f773306c0241f3f (patch) | |
tree | 95ce05e3ebe816c7ee7996206bb37ea17d8ca33c /compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c | |
parent | fc0c4c0df7b583b55a08317cf1ef6a71d27c0440 (diff) | |
download | MWCC-094b96ca1df4a035b5f93c351f773306c0241f3f.tar.gz MWCC-094b96ca1df4a035b5f93c351f773306c0241f3f.zip |
move lots of source files around to match their actual placement in the original treemain
Diffstat (limited to 'compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c')
-rw-r--r-- | compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c | 751 |
1 files changed, 751 insertions, 0 deletions
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c new file mode 100644 index 0000000..2b68dca --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c @@ -0,0 +1,751 @@ +#include "compiler/StrengthReduction.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/LoopDetection.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/Registers.h" +#include "compiler/UseDefChains.h" + +int strengthreducedloops; + +static PCode *findinitializer(Loop *loop, short reg) { + UInt32 *vec; + PCode *best; + RegUseOrDef *list; + + vec = usedefinfo[loop->body->blockIndex].defvec8; + best = NULL; + + for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { + if ( + !(bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) && + bitvectorgetbit(list->id, vec) + ) + { + if (best) + return NULL; + best = Defs[list->id].pcode; + } + } + + if (best) { + if (best->op == PC_LI || best->op == PC_ADDI || best->op == PC_ADD) + return best; + } + + return NULL; +} + +static int isbasicinductionvariable(Loop *loop, short reg, SInt32 step) { + RegUseOrDef *list; + PCode *instr; + + for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { + instr = Defs[list->id].pcode; + if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) { + if (instr->op != PC_ADDI) + return 0; + if (instr->args[1].data.reg.reg != reg) + return 0; + if (instr->args[2].data.imm.value != step) + return 0; + } + } + + return 1; +} + +static void addbasicinductionvariable(Loop *loop, short reg, SInt32 step) { + BasicInductionVar *biv; + RegUseOrDef *list; + PCode *instr; + InstrList *instrList; + + for (biv = loop->basicInductionVars; biv; biv = biv->next) { + if (biv->reg == reg) + return; + } + + biv = oalloc(sizeof(BasicInductionVar)); + biv->next = loop->basicInductionVars; + loop->basicInductionVars = biv; + + biv->loop = loop; + biv->inductionVars = NULL; + biv->instrsC = NULL; + biv->step = step; + biv->reg = reg; + + for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { + instr = Defs[list->id].pcode; + if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) { + instrList = oalloc(sizeof(InstrList)); + instrList->next = biv->instrsC; + biv->instrsC = instrList; + instrList->instr = instr; + } + } + + biv->initializer = findinitializer(loop, reg); +} + +static void findbasicinductionvariables(Loop *loop) { + SInt16 step; + BlockList *block; + PCode *instr; + short reg; + + for (block = loop->blocks; block; block = block->next) { + for (instr = block->block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op == PC_ADDI) { + if ( + (reg = instr->args[0].data.reg.reg) >= 32 && + instr->args[1].data.reg.reg == reg && + isbasicinductionvariable(loop, reg, step = instr->args[2].data.imm.value) + ) + addbasicinductionvariable(loop, reg, step); + } + } + } +} + +static void findallbasicinductionvariables(Loop *loop) { + while (loop) { + if (loop->children) + findallbasicinductionvariables(loop->children); + findbasicinductionvariables(loop); + loop = loop->nextSibling; + } +} + +static int isinductionvariable(BasicInductionVar *biv, int useID, SInt32 *result1, short *result2, short *result3, Loop **result4) { + RegUseOrDef *list; + int counter; + Loop *loop; + Loop *scanloop; + PCode *instr; + + instr = Uses[useID].pcode; + *result2 = 0; + *result3 = 0; + *result4 = NULL; + + switch (instr->op) { + case PC_MULLI: + *result1 = instr->args[2].data.imm.value; + break; + + case PC_RLWINM: + if (instr->args[3].data.imm.value) + return 0; + if (instr->args[2].data.imm.value > 15) + return 0; + if (instr->args[4].data.imm.value != (31 - instr->args[2].data.imm.value)) + return 0; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + return 0; + *result1 = 1 << instr->args[2].data.imm.value; + break; + + case PC_LBZX: + case PC_LHZX: + case PC_LHAX: + case PC_LWZX: + case PC_STBX: + case PC_STHX: + case PC_STWX: + case PC_LFSX: + case PC_LFDX: + case PC_STFSX: + case PC_STFDX: + *result2 = 0; + *result3 = 0; + if (instr->args[1].data.reg.reg == biv->reg) { + *result2 = 1; + *result3 = 2; + } else if (instr->args[2].data.reg.reg == biv->reg) { + *result2 = 2; + *result3 = 1; + } + + counter = 0; + for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks)) + counter++; + } + if (counter) + return 0; + + loop = biv->loop; + for (scanloop = loop->parent; scanloop; scanloop = scanloop->parent) { + counter = 0; + for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, scanloop->memberblocks)) + counter++; + } + if (!biv->initializer || bitvectorgetbit(biv->initializer->block->blockIndex, scanloop->memberblocks)) + counter++; + if (counter) + break; + loop = scanloop; + } + + *result4 = loop; + *result1 = 1; + return 1; + + default: + return 0; + } + + counter = 0; + for (list = reg_Defs[RegClass_GPR][instr->args[0].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks)) + counter++; + } + + return counter == 1; +} + +static void addinductionvariable(BasicInductionVar *biv, PCode *instr, SInt32 val1, short val2, short val3, Loop *val4) { + InductionVar *iv; + + iv = oalloc(sizeof(InductionVar)); + iv->next = biv->inductionVars; + biv->inductionVars = iv; + + iv->basicVar = biv; + iv->instr = instr; + iv->instrC = NULL; + iv->step = val1; + iv->x18 = val2; + iv->x1A = val3; + iv->someloop = val4; + if (instr->flags & (fIsRead | fIsWrite)) + iv->x1C = -1; + else + iv->x1C = instr->args[0].data.reg.reg; + iv->x1E = -1; +} + +static void findnonbasicinductionvariables(Loop *loop) { + BasicInductionVar *biv; + RegUseOrDef *list; + SInt32 result1; + short result2; + short result3; + Loop *result4; + + for (biv = loop->basicInductionVars; biv; biv = biv->next) { + for (list = reg_Uses[RegClass_GPR][biv->reg]; list; list = list->next) { + if (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) { + if (isinductionvariable(biv, list->id, &result1, &result2, &result3, &result4)) + addinductionvariable(biv, Uses[list->id].pcode, result1, result2, result3, result4); + } + } + } +} + +static void findallnonbasicinductionvariables(Loop *loop) { + while (loop) { + if (loop->children) + findallnonbasicinductionvariables(loop->children); + if (loop->basicInductionVars) + findnonbasicinductionvariables(loop); + loop = loop->nextSibling; + } +} + +static void initializeinductionvariable(InductionVar *iv) { + BasicInductionVar *biv; // r31 + PCode *instr; // r27 + PCodeBlock *preheader; // r30 + SInt32 value30; // r30 + short reg29; // r29 + short reg26; // r26 + + biv = iv->basicVar; + preheader = biv->loop->preheader; + + if (iv->x1A) { + reg29 = iv->instr->args[iv->x1A].data.reg.reg; + reg26 = iv->instr->args[iv->x18].data.reg.reg; + instr = NULL; + + if ( + biv->initializer && + biv->initializer->op == PC_LI && + biv->initializer->block == preheader + ) + { + if (biv->initializer->args[1].data.imm.value == 0) + instr = makepcode(PC_MR, iv->x1E, reg29); + else if (FITS_IN_SHORT(biv->initializer->args[1].data.imm.value)) + instr = makepcode(PC_ADDI, iv->x1E, reg29, 0, biv->initializer->args[1].data.imm.value); + } + + if (!instr) + instr = makepcode(PC_ADD, iv->x1E, reg29, reg26); + + if (biv->initializer && instr->op != PC_ADD) + insertpcodeafter(biv->initializer, instr); + else if (iv->someloop && iv->someloop->preheader->lastPCode) + insertpcodebefore(iv->someloop->preheader->lastPCode, instr); + else + insertpcodebefore(preheader->lastPCode, instr); + + iv->instrC = instr; + iv->x1C = reg29; + return; + } + + if (!biv->initializer || biv->initializer->op != PC_LI) { + instr = copypcode(iv->instr); + instr->args[0].data.reg.reg = iv->x1E; + insertpcodebefore(preheader->lastPCode, instr); + } else { + value30 = biv->initializer->args[1].data.imm.value * iv->step; + if (!FITS_IN_SHORT(value30)) { + instr = makepcode(PC_LIS, iv->x1E, 0, HIGH_PART(value30)); + insertpcodeafter(biv->initializer, instr); + if (value30 != 0) + insertpcodeafter(instr, makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value30))); + } else { + instr = makepcode(PC_LI, iv->x1E, value30); + insertpcodeafter(biv->initializer, instr); + } + } +} + +static void incrementinductionvariable(InductionVar *iv) { + SInt32 value; + BasicInductionVar *biv; + PCode *instr; + InstrList *list; + + biv = iv->basicVar; + value = iv->step * biv->step; + for (list = biv->instrsC; list; list = list->next) { + if (!FITS_IN_SHORT(value)) { + instr = makepcode(PC_ADDIS, iv->x1E, iv->x1E, 0, HIGH_PART(value)); + insertpcodeafter(list->instr, instr); + + if (value != 0) { + instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value)); + insertpcodeafter(list->instr->nextPCode, instr); + } + } else { + instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, value); + insertpcodeafter(list->instr, instr); + } + } +} + +static void copyinductionvariable(InductionVar *iv) { + if (iv->instr->flags & (fIsRead | fIsWrite)) { + iv->instr->op -= 2; + iv->instr->args[1].data.reg.reg = iv->x1E; + iv->instr->args[2].kind = PCOp_IMMEDIATE; + iv->instr->args[2].data.imm.value = 0; + iv->instr->args[2].data.imm.obj = NULL; + } else { + insertpcodeafter(iv->instr, makepcode(PC_MR, iv->x1C, iv->x1E)); + deletepcode(iv->instr); + } +} + +static int testnestediv(InductionVar *iv, SInt32 step1, int reg, SInt32 step2, Loop *loop1, Loop *loop2) { + SInt32 addend; + BlockList *list; + PCode *instr; + PCodeArg *op; + int i; + + if (iv->instrC && iv->x1C == reg) { + if (iv->instrC->op == PC_MR) + addend = 0; + else if (iv->instrC->op == PC_ADDI) + addend = iv->instrC->args[2].data.imm.value; + else + return 0; + + if (step2 == (addend + (step1 * iv->step * loop2->iterationCount))) { + for (list = loop1->blocks; list && list->block != loop2->blocks->block; list = list->next) { + for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == reg + ) + return 0; + op++; + } + } + } + return 1; + } + } + + return 0; +} + +static void strengthreducenestediv(short reg, SInt32 step, PCode *initializer, Loop *loop) { + Loop *scanloop; + BasicInductionVar *biv; + InductionVar *iv; + PCode *instr; + PCodeArg *op; + int i; + + for (scanloop = loop->children; scanloop; scanloop = scanloop->nextSibling) { + if ( + scanloop->isKnownCountingLoop && + scanloop->x4F && + bitvectorgetbit(scanloop->body->blockIndex, loop->vec2C) + ) + { + for (biv = scanloop->basicInductionVars; biv; biv = biv->next) { + for (iv = biv->inductionVars; iv; iv = iv->next) { + if (testnestediv(iv, biv->step, reg, step, loop, scanloop)) { + deletepcode(iv->instrC); + if (initializer) { + insertpcodeafter(initializer, iv->instrC); + } else if (loop->body->lastPCode) { + for (instr = loop->body->lastPCode; instr; instr = instr->prevPCode) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) && + op->data.reg.reg == reg + ) + break; + op++; + } + } + + if (instr) + insertpcodeafter(instr, iv->instrC); + else + insertpcodebefore(loop->body->firstPCode, iv->instrC); + } else { + appendpcode(loop->body, iv->instrC); + } + } + } + } + } + } +} + +static void strengthreducenestedbiv(BasicInductionVar *biv) { + Loop *loop; + InductionVar *iv; + + loop = biv->loop; + for (iv = biv->inductionVars; iv; iv = iv->next) + strengthreducenestediv(iv->x1E, iv->step * biv->step, iv->instrC, loop); + + strengthreducenestediv(biv->reg, biv->step, biv->initializer, loop); +} + +static void strengthreduceinductionvariable(BasicInductionVar *biv) { + int counter; + InductionVar *iv; + InductionVar *otherIv; + short reg; + + counter = 0; + for (iv = biv->inductionVars; iv; iv = iv->next) { + if (iv->step == 1) + counter++; + } + + for (iv = biv->inductionVars; iv; iv = iv->next) { + if ( + (counter <= 4 || iv->step != 1) && + iv->instr->block && + (iv->x1A == 0 || iv->instr->args[2].kind != PCOp_IMMEDIATE) + ) + { + if (iv->x1E == -1) { + iv->x1E = used_virtual_registers[RegClass_GPR]++; + initializeinductionvariable(iv); + incrementinductionvariable(iv); + if (iv->step == 1) { + reg = iv->instr->args[iv->x1A].data.reg.reg; + for (otherIv = iv->next; otherIv; otherIv = otherIv->next) { + if (otherIv->x1A != 0 && otherIv->instr->args[otherIv->x1A].data.reg.reg == reg) + otherIv->x1E = iv->x1E; + } + } else { + for (otherIv = iv->next; otherIv; otherIv = otherIv->next) { + if (otherIv->step == iv->step) + otherIv->x1E = iv->x1E; + } + } + } + + copyinductionvariable(iv); + strengthreducedloops = 1; + } + } +} + +#ifdef __MWERKS__ +#pragma options align=mac68k +#endif +typedef struct BivInit { + SInt32 x0; + short x4; + short x6; + short x8; + Object *xA; +} BivInit; +#ifdef __MWERKS__ +#pragma options align=reset +#endif + +static void calc_biv_init(BasicInductionVar *biv, BivInit *init) { + PCode *instr; + PCode *scan; + PCodeArg *op; + int i; + + instr = biv->initializer; + init->x0 = 0; + init->x4 = -1; + init->x6 = -1; + init->x8 = 0; + init->xA = NULL; + + if (!biv->initializer || (biv->initializer->op != PC_ADDI && biv->initializer->op != PC_ADD)) + return; + + if (instr->op == PC_ADDI) { + if (instr->args[1].data.reg.reg == biv->reg) { + init->x0 = instr->args[2].data.imm.value; + for (scan = instr->prevPCode; scan; scan = scan->prevPCode) { + op = scan->args; + i = scan->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == biv->reg && + (op->data.reg.effect & EffectWrite) + ) + { + if (scan->op == PC_ADD) { + init->x4 = scan->args[1].data.reg.reg; + init->x6 = scan->args[2].data.reg.reg; + } else if (scan->op == PC_ADDI) { + if (scan->args[2].kind == PCOp_IMMEDIATE) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.imm.value; + } else if (scan->args[2].kind == PCOp_MEMORY) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.mem.offset; + init->xA = scan->args[2].data.mem.obj; + } + } + return; + } + op++; + } + } + } else { + if (instr->args[2].kind == PCOp_IMMEDIATE) { + init->x4 = instr->args[1].data.reg.reg; + init->x8 = instr->args[2].data.imm.value; + } else if (instr->args[2].kind == PCOp_MEMORY) { + init->x4 = instr->args[1].data.reg.reg; + init->x8 = instr->args[2].data.mem.offset; + init->xA = instr->args[2].data.mem.obj; + } + } + } else if (instr->op == PC_ADD) { + if (instr->args[1].data.reg.reg == biv->reg) { + init->x6 = instr->args[2].data.reg.reg; + for (scan = instr->prevPCode; scan; scan = scan->prevPCode) { + op = scan->args; + i = scan->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == biv->reg && + (op->data.reg.effect & EffectWrite) && + scan->op == PC_ADDI + ) + { + if (scan->args[2].kind == PCOp_IMMEDIATE) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.imm.value; + } else if (scan->args[2].kind == PCOp_MEMORY) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.mem.offset; + init->xA = scan->args[2].data.mem.obj; + } + return; + } + op++; + } + } + } else { + init->x4 = instr->args[1].data.reg.reg; + init->x6 = instr->args[2].data.reg.reg; + } + } +} + +static void combineinductionvariables(Loop *loop, BasicInductionVar *biv1, BasicInductionVar *biv2, SInt32 difference) { + PCode *instr1; // r31 + int reg1; // r30 + int reg2; // r29 + PCode *instr2; // r24 + PCodeBlock *nextBlock; // r24 + BlockList *list; + PCodeArg *op; + int i; + PCode *instr; + + instr1 = NULL; + instr2 = NULL; + + reg1 = biv1->reg; + CError_ASSERT(930, reg1 >= 0); + + reg2 = biv2->reg; + CError_ASSERT(934, reg2 >= 0); + + if (!FITS_IN_SHORT(difference)) + return; + + for (list = loop->blocks; list; list = list->next) { + for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) { + if (instr1) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == reg1 + ) + return; + op++; + } + } + + if (instr->op == PC_ADDI) { + if (instr->args[0].data.reg.reg == reg1) { + if (instr1) + return; + instr1 = instr; + } else if (instr->args[0].data.reg.reg == reg2) { + if (instr2) + return; + instr2 = instr; + } + } + } + } + + if (loop->body->lastPCode->flags & fIsBranch) { + nextBlock = NULL; + + for (i = 0; i < loop->body->lastPCode->argCount; i++) { + if (loop->body->lastPCode->args[i].kind == PCOp_LABEL) { + nextBlock = loop->body->lastPCode->args[i].data.label.label->block; + break; + } + } + + if (!nextBlock) + return; + } else { + nextBlock = loop->body->nextBlock; + } + + deletepcode(instr1); + instr1->args[1].data.reg.reg = reg2; + instr1->args[2].data.imm.value = difference; + + if (nextBlock->firstPCode) + insertpcodebefore(nextBlock->firstPCode, instr1); + else + appendpcode(nextBlock, instr1); + + biv1->reg = -1; + strengthreducedloops = 1; +} + +static void strengthreduceinductionvariables(Loop *loop) { + BasicInductionVar *biv1; + BasicInductionVar *biv2; + BivInit init1; + BivInit init2; + + for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) { + if (biv1->inductionVars) + strengthreduceinductionvariable(biv1); + strengthreducenestedbiv(biv1); + } + + for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) { + if (biv1->reg != -1) { + calc_biv_init(biv1, &init1); + if (init1.x4 != -1) { + for (biv2 = loop->basicInductionVars; biv2; biv2 = biv2->next) { + if (biv2->reg != -1 && biv2 != biv1) { + calc_biv_init(biv2, &init2); + if ( + init2.x4 != -1 && + init1.x4 == init2.x4 && + init1.x6 == init2.x6 && + init1.x8 == init2.x8 && + init1.xA == init2.xA && + biv1->step == biv2->step + ) + { + if (init1.x0 < init2.x0) { + combineinductionvariables(loop, biv2, biv1, init2.x0 - init1.x0); + } else { + combineinductionvariables(loop, biv1, biv2, init1.x0 - init2.x0); + break; + } + } + } + } + } + } + } +} + +static void strengthreduceallinductionvariables(Loop *loop) { + while (loop) { + if (loop->children) + strengthreduceallinductionvariables(loop->children); + if (loop->basicInductionVars) + strengthreduceinductionvariables(loop); + loop = loop->nextSibling; + } +} + +void strengthreduceloops(void) { + strengthreducedloops = 0; + if (loopsinflowgraph) { + computeusedefchains(0); + findallbasicinductionvariables(loopsinflowgraph); + findallnonbasicinductionvariables(loopsinflowgraph); + strengthreduceallinductionvariables(loopsinflowgraph); + freeoheap(); + } +} |