From 25bab8b1fb2fc851ea3f1f630b3de65ca6afdc22 Mon Sep 17 00:00:00 2001 From: Ash Wolf Date: Wed, 14 Dec 2022 00:16:59 +0000 Subject: haha it's been a while since i last committed, hasn't it --- compiler_and_linker/unsorted/LoopOptimization.c | 1565 +++++++++++++++++++++++ 1 file changed, 1565 insertions(+) (limited to 'compiler_and_linker/unsorted/LoopOptimization.c') diff --git a/compiler_and_linker/unsorted/LoopOptimization.c b/compiler_and_linker/unsorted/LoopOptimization.c index e69de29..6472119 100644 --- a/compiler_and_linker/unsorted/LoopOptimization.c +++ b/compiler_and_linker/unsorted/LoopOptimization.c @@ -0,0 +1,1565 @@ +#include "compiler/LoopOptimization.h" +#include "compiler/CFunc.h" +#include "compiler/CParser.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/LoopDetection.h" +#include "compiler/PCode.h" +#include "compiler/UseDefChains.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +int optimizedloops; +int optimizedloop_full_unroll; +int optimizedloop_trans_regs; +static UInt32 *liveonexit; +static UInt32 *inductionvariables; +static int last_virtual_GPR; + +static int ispowerof2(SInt32 value) { + int bit = getbit(value); + return (bit > 0 && bit < 31) ? bit : 0; +} + +static void insertupdateinstructions(Loop *loop) { + // nothing +} + +static void computeliveonexit(Loop *loop) { + UInt32 *usevec; + UInt32 *defvec; + BlockList *blocklist; + RegUseOrDef *list; + int gpr; + + bitvectorinitialize(usevec = oalloc(4 * ((number_of_Uses + 31) >> 5)), number_of_Uses, 0); + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + if (bitvectorgetbit(blocklist->block->blockIndex, loop->vec24)) + bitvectorunion(usevec, usedefinfo[blocklist->block->blockIndex].usevec1C, number_of_Uses); + } + + bitvectorinitialize(defvec = oalloc(4 * ((number_of_Defs + 31) >> 5)), number_of_Defs, 0); + if (loop->preheader) + bitvectorunion(defvec, usedefinfo[loop->preheader->blockIndex].defvec8, number_of_Defs); + + bitvectorinitialize(liveonexit, last_virtual_GPR, 0); + + for (gpr = 32; gpr < last_virtual_GPR; gpr++) { + for (list = reg_Defs[RegClass_GPR][gpr]; list; list = list->next) { + if (bitvectorgetbit(list->id, defvec)) { + if (!Defs[list->id].pcode->block || bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) { + bitvectorsetbit(gpr, liveonexit); + break; + } + } + } + + for (list = reg_Uses[RegClass_GPR][gpr]; list; list = list->next) { + if (bitvectorgetbit(list->id, usevec)) { + if (!Uses[list->id].pcode->block || !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) { + bitvectorsetbit(gpr, liveonexit); + break; + } + } + } + } +} + +static void eliminateinductionvariables(Loop *loop) { + BlockList *blocklist; + PCode *instr; + PCode *nextInstr; + PCodeArg *op; + int i; + + bitvectorinitialize(inductionvariables, last_virtual_GPR, 0xFFFFFFFF); + + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + for (instr = blocklist->block->firstPCode; instr; instr = instr->nextPCode) { + if ( + instr->op != PC_ADDI || + instr->args[0].data.reg.reg < 32 || + instr->args[0].data.reg.reg >= last_virtual_GPR || + instr->args[1].data.reg.reg != instr->args[0].data.reg.reg + ) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg >= n_real_registers[RegClass_GPR] && + op->data.reg.reg < last_virtual_GPR + ) + bitvectorclearbit(op->data.reg.reg, inductionvariables); + op++; + } + } + } + } + + if (loop->parent) { + for (instr = loop->preheader->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg >= n_real_registers[RegClass_GPR] && + op->data.reg.reg < last_virtual_GPR + ) + bitvectorsetbit(op->data.reg.reg, liveonexit); + op++; + } + } + } + + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + for (instr = blocklist->block->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + if ( + instr->op == PC_ADDI && + instr->args[0].data.reg.reg >= 32 && + instr->args[0].data.reg.reg < last_virtual_GPR && + instr->args[1].data.reg.reg == instr->args[0].data.reg.reg && + bitvectorgetbit(instr->args[0].data.reg.reg, inductionvariables) && + !bitvectorgetbit(instr->args[0].data.reg.reg, liveonexit) + ) { + deletepcode(instr); + optimizedloops = 1; + } + } + } +} + +static void skiplooptest(Loop *loop) { + PCodeBlock *preheader; + PCodeLabel *label; + PCLink **ptr; + PCLink *link; + PCode *lastInstr; + PCode *instr; + + preheader = loop->preheader; + lastInstr = loop->body->lastPCode; +#line 340 + CError_ASSERT(lastInstr->args[2].kind == PCOp_LABEL); + + label = lastInstr->args[2].data.label.label; + preheader->lastPCode->args[0].data.label.label = label; + preheader->successors->block = label->block; + + ptr = &loop->body->predecessors; + while ((link = *ptr)) { + if (link->block == preheader) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + link->nextLink = label->block->predecessors; + label->block->predecessors = link; + + while (1) { + instr = loop->body->firstPCode; +#line 369 + CError_ASSERT(instr); + + if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) + break; + + deletepcode(instr); + insertpcodebefore(loop->preheader->lastPCode, instr); + loop->bodySize--; + } + + for (instr = instr->nextPCode; instr && !(instr->flags & fPCodeFlag1); instr = instr->nextPCode) + insertpcodebefore(loop->preheader->lastPCode, copypcode(instr)); +} + +static void unrollloop(Loop *loop) { + PCodeBlock *newBlock; + int i; + int factor; + PCode *instr; + PCodeBlock *block; + PCode *firstInstr; + PCode *nextInstr; + + for (factor = copts.ppc_unroll_factor_limit; factor > 1; factor--) { + if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.ppc_unroll_instructions_limit) + break; + } + + if (factor == 1) + return; + if ((loop->iterationCount / factor) != 1 && loop->bodySize < 4) + return; + + newBlock = oalloc(sizeof(PCodeBlock)); + newBlock->firstPCode = newBlock->lastPCode = NULL; + for (i = 0; i < factor - 1; i++) { + firstInstr = loop->body->firstPCode; +#line 448 + CError_ASSERT(firstInstr); + if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI) { +#line 450 + CError_FATAL(); + } + + for (instr = firstInstr->nextPCode; instr && !(instr->flags & fPCodeFlag1); instr = instr->nextPCode) + appendpcode(newBlock, copypcode(instr)); + + for (block = loop->preheader->successors->block; block != loop->body; block = block->successors->block) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op != PC_B) + appendpcode(newBlock, copypcode(instr)); + } + } + } + + block = loop->body->predecessors->block; + for (instr = newBlock->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + appendpcode(block, instr); + } + loop->iterationCount /= factor; +} + +void pccomputepredecessors1(PCodeBlock *block) { + PCLink *succ; + PCLink *pred; + + for (succ = block->successors; succ; succ = succ->nextLink) { + if (!succ->block) { +#line 496 + CError_FATAL(); + } else { + for (pred = succ->block->predecessors; pred; pred = pred->nextLink) { + if (pred->block == block) + break; + } + + if (!pred) { + pred = lalloc(sizeof(PCLink)); + pred->block = block; + pred->nextLink = succ->block->predecessors; + succ->block->predecessors = pred; + } + } + } +} + +static PCodeBlock *insertnewpcblock(PCodeBlock *block, int loopWeight) { + PCodeBlock *newBlock; + PCodeBlock *nextBlock; + PCodeLabel *label; + PCLink *prev; + PCLink *link; + + label = makepclabel(); + newBlock = lalloc(sizeof(PCodeBlock)); + + nextBlock = block->nextBlock; + newBlock->nextBlock = nextBlock; + block->nextBlock = newBlock; + + newBlock->prevBlock = block; + nextBlock->prevBlock = newBlock; + + newBlock->labels = NULL; + newBlock->predecessors = newBlock->successors = NULL; + newBlock->firstPCode = newBlock->lastPCode = NULL; + newBlock->pcodeCount = 0; + newBlock->loopWeight = loopWeight; + newBlock->flags = 0; + newBlock->blockIndex = pcblockcount++; + pclabel(newBlock, label); + + prev = NULL; + for (link = block->successors; link; link = link->nextLink) { + if (link->block == nextBlock) { + if (!prev) + block->successors = link->nextLink; + else + prev->nextLink = link->nextLink; + link->nextLink = NULL; + newBlock->successors = link; + break; + } + prev = link; + } + + prev = NULL; + for (link = nextBlock->predecessors; link; link = link->nextLink) { + if (link->block == block) { + if (!prev) + nextBlock->predecessors = link->nextLink; + else + prev->nextLink = link->nextLink; + link->nextLink = NULL; + newBlock->predecessors = link; + break; + } + prev = link; + } + + link = lalloc(sizeof(PCLink)); + link->block = newBlock; + link->nextLink = block->successors; + block->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = newBlock; + link->nextLink = nextBlock->predecessors; + nextBlock->predecessors = link; + + return newBlock; +} + +static void unrollloopconditional(Loop *loop) { + PCodeBlock *lastBlock; + PCodeLabel *label24; + PCode *instr; + PCode *instrCopy; + int outputBlockCount; + int inputBlockCount; + PCodeBlock **blocks; + PCodeBlock **blocks2; + PCodeBlock **blocks3; + PCode *firstInstr; + int factor; + PCodeBlock *block; + PCLink *link; + PCLink *prev; + int i; + int j; + int k; + int instructionCount; + + label24 = NULL; + outputBlockCount = 0; + instructionCount = 0; + + for (factor = copts.ppc_unroll_factor_limit; factor > 1; factor--) { + if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.ppc_unroll_instructions_limit) + break; + } + + if (factor == 1) + return; + + inputBlockCount = 0; + for (block = loop->preheader->successors->block; block != loop->body; block = block->nextBlock) { + inputBlockCount++; + if (!bitvectorgetbit(block->blockIndex, loop->memberblocks)) + instructionCount += block->pcodeCount; + } + + if ((loop->bodySize - instructionCount - 2) < instructionCount || instructionCount > 8) + return; + + blocks = oalloc(inputBlockCount * sizeof(PCodeBlock *)); + blocks2 = oalloc(inputBlockCount * sizeof(PCodeBlock *)); + blocks3 = oalloc(factor * (inputBlockCount * sizeof(PCodeBlock *))); + memclrw(blocks, inputBlockCount * sizeof(PCodeBlock *)); + memclrw(blocks2, inputBlockCount * sizeof(PCodeBlock *)); + memclrw(blocks3, factor * (inputBlockCount * sizeof(PCodeBlock *))); + + block = loop->preheader->nextBlock; + for (i = 0; i < inputBlockCount; i++) { + blocks[i] = block; + block = block->nextBlock; + } + + lastBlock = blocks[inputBlockCount - 1]; + for (i = 0; i < factor - 1; i++) { + for (j = 0; j < inputBlockCount; j++) { + blocks2[j] = insertnewpcblock(lastBlock, loop->loopWeight); + blocks3[outputBlockCount++] = blocks2[j]; + lastBlock = blocks2[j]; + } + if (label24) { + pclabel(blocks2[0], label24); + label24 = NULL; + } + + for (j = 0; j < inputBlockCount; j++) { + for (instr = blocks[j]->firstPCode; instr; instr = instr->nextPCode) { + if (instr->flags & fPCodeFlag1) { + PCodeArg *op; + int opID; + instrCopy = copypcode(instr); + op = NULL; + for (opID = 0; opID < instr->argCount; opID++) { + if (instr->args[opID].kind == PCOp_LABEL) { + op = &instr->args[opID]; + break; + } + } + + if (op) { + if (op->data.label.label->block == loop->body) { + if (!label24) + label24 = makepclabel(); + instrCopy->args[opID].data.label.label = label24; + } else { + for (k = 0; k < inputBlockCount; k++) { + if (op->data.label.label->block == blocks[k]) { + instrCopy->args[opID].data.label.label = blocks2[k]->labels; + break; + } + } + } + } + + appendpcode(blocks2[j], instrCopy); + if (op) + pcbranch(blocks2[j], instrCopy->args[opID].data.label.label); + } else { + appendpcode(blocks2[j], copypcode(instr)); + } + } + } + + firstInstr = loop->body->firstPCode; +#line 762 + CError_ASSERT(firstInstr != NULL); + if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI) { +#line 764 + CError_FATAL(); + } + + for (instr = firstInstr->nextPCode; instr && !(instr->flags & fPCodeFlag1); instr = instr->nextPCode) + appendpcode(blocks2[inputBlockCount - 1], copypcode(instr)); + + for (j = 0; j < inputBlockCount; j++) { + for (link = blocks[j]->successors; link; link = link->nextLink) { + if (link->block == blocks[j]->nextBlock) + break; + } + + if (!link) { + for (link = blocks2[j]->successors, prev = NULL; link; link = link->nextLink) { + if (link->block == blocks2[j]->nextBlock) { + if (prev) + prev->nextLink = link->nextLink; + else + blocks2[j]->successors = link->nextLink; + } else { + prev = link; + } + } + + for (link = blocks2[j]->nextBlock->predecessors, prev = NULL; link; link = link->nextLink) { + if (link->block == blocks2[j]) { + if (prev) + prev->nextLink = link->nextLink; + else + blocks2[j]->nextBlock->predecessors = link->nextLink; + } else { + prev = link; + } + } + } + } + } + + if (label24) + pclabel(loop->body, label24); + + for (i = 0; i < inputBlockCount; i++) { + for (instr = blocks[i]->firstPCode; instr; instr = instr->nextPCode) { + if (instr->flags & fPCodeFlag1) { + PCodeArg *op; + int opID; + op = NULL; + for (opID = 0; opID < instr->argCount; opID++) { + if (instr->args[opID].kind == PCOp_LABEL) { + op = &instr->args[opID]; + break; + } + } + + if (op && op->data.label.label->block == loop->body) { + instr->args[opID].data.label.label = blocks3[0]->labels; + + for (link = blocks[i]->successors, prev = NULL; link; link = link->nextLink) { + if (link->block == loop->body) { + if (prev) + prev->nextLink = link->nextLink; + else + blocks[i]->successors = link->nextLink; + } else { + prev = link; + } + } + + for (link = loop->body->predecessors, prev = NULL; link; link = link->nextLink) { + if (link->block == blocks[i]) { + if (prev) + prev->nextLink = link->nextLink; + else + loop->body->predecessors = link->nextLink; + } else { + prev = link; + } + } + + link = blocks[i]->successors; + while (link && link->block != blocks3[0]) + link = link->nextLink; + + if (!link) { + pcbranch(blocks[i], op->data.label.label); + pccomputepredecessors1(blocks[i]); + } + } + } + } + } + + for (i = 0; i < outputBlockCount; i++) + pccomputepredecessors1(blocks3[i]); + + loop->iterationCount /= factor; +} + +static void unrollunknownBDNZ(Loop *loop) { + int factor; // r29 + PCodeBlock *preheader; // r17 + PCodeBlock *blockA; // r23 + PCodeBlock *postheader; // r22 + PCodeBlock *newblock1; // r26 + PCodeBlock *newblock2; // r31 + PCodeBlock *newblock3; // r19 + PCodeBlock *newblock4; // r20 + PCodeBlock *newblock5; // r24 + PCode *mtctr; // r21 + int mtctr_reg; // r27 + PCode *instr28; // r28 + PCode *instr6; // r6 + PCodeBlock *block; + PCode *instr; + PCodeArg *op; + int i; + int val; + short mtctr_shifted_reg; // r16 + short reg25; // r25 + PCLink *link; + + if (loop->bodySize < 4) + return; + + factor = 128; + while (factor > copts.ppc_unroll_factor_limit) + factor >>= 1; + while (factor > 1 && (loop->bodySize - 2) * factor > copts.ppc_unroll_instructions_limit) + factor >>= 1; + + if (factor < 2) + return; + + preheader = loop->preheader; + blockA = loop->body->nextBlock; + postheader = preheader->nextBlock; + + newblock1 = insertnewpcblock(preheader, loop->loopWeight); + newblock2 = insertnewpcblock(newblock1, loop->loopWeight); + newblock3 = insertnewpcblock(newblock2, loop->loopWeight); + newblock4 = insertnewpcblock(newblock3, loop->loopWeight); + newblock5 = insertnewpcblock(newblock4, loop->loopWeight); + addblocktoloop(loop, newblock1); + addblocktoloop(loop, newblock2); + addblocktoloop(loop, newblock3); + addblocktoloop(loop, newblock4); + addblocktoloop(loop, newblock5); + + for (instr = preheader->lastPCode; instr; instr = instr->prevPCode) { + if (instr->op == PC_MTCTR) { + mtctr = instr; + mtctr_reg = instr->args[0].data.reg.reg; + } + } + + if (!mtctr) + return; + + instr28 = NULL; + for (block = postheader; block != loop->body; block = block->successors->block) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op != PC_B) { + appendpcode(newblock2, copypcode(instr)); + if (instr == loop->pc18) + instr28 = newblock2->lastPCode; + } + } + } + + if (!instr28) { + appendpcode(newblock2, copypcode(loop->pc18)); + instr28 = newblock2->lastPCode; + } + + instr6 = NULL; + for (instr = newblock2->firstPCode; instr; instr = instr->nextPCode) { + if (instr != instr28) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == loop->pc18->args[0].data.reg.reg && + (op->data.reg.effect & (EffectRead | EffectWrite)) + ) { + instr6 = instr; + break; + } + op++; + } + } + if (instr6) + break; + } + + if (!instr6) { + deletepcode(instr28); + deletepcode(loop->pc18); + if (loop->footer) + blockA = loop->footer; + else + blockA = insertnewpcblock(loop->body, loop->loopWeight); + } else { + instr28 = NULL; + } + + for (i = 1; i < factor; i++) { + for (block = postheader; block != loop->body; block = block->successors->block) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op != PC_B) + appendpcode(newblock2, copypcode(instr)); + } + } + } + + mtctr_shifted_reg = used_virtual_registers[RegClass_GPR]++; + appendpcode(newblock1, makepcode( + PC_RLWINM, + mtctr_shifted_reg, + mtctr_reg, + 32 - ispowerof2(factor), + ispowerof2(factor), + 31)); + + appendpcode(newblock1, makepcode(PC_CMPLI, 0, mtctr_shifted_reg, 0)); + + if (instr28) { + reg25 = used_virtual_registers[RegClass_GPR]++; + if (loop->step == 1) { + instr = makepcode(PC_MR, reg25, mtctr_reg); + } else if (loop->step == -1) { + instr = makepcode(PC_NEG, reg25, mtctr_reg); + } else { + val = ispowerof2(abs(loop->step)); + if (val > 0) { + instr = makepcode(PC_RLWINM, reg25, mtctr_reg, val, 0, 31 - val); + if (loop->step < 0) { + appendpcode(newblock1, instr); + instr = makepcode(PC_NEG, reg25, reg25); + } + } else { + instr = makepcode(PC_MULLI, reg25, mtctr_reg, loop->step); + } + } + appendpcode(newblock1, instr); + } + + appendpcode(newblock1, makepcode(PC_MTCTR, mtctr_shifted_reg)); + appendpcode(newblock1, makepcode(PC_BT, 0, 2, newblock5->labels)); + pcbranch(newblock1, newblock5->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock1; + link->nextLink = newblock5->predecessors; + newblock5->predecessors = link; + + appendpcode(newblock3, makepcode(PC_BDNZ, newblock2->labels)); + pcbranch(newblock3, newblock2->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock3; + link->nextLink = newblock2->predecessors; + newblock2->predecessors = link; + + appendpcode(newblock4, makepcode(PC_ANDI, mtctr_reg, mtctr_reg, factor - 1)); + appendpcode(newblock4, makepcode(PC_BT, 0, 2, blockA->labels)); + pcbranch(newblock4, blockA->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock4; + link->nextLink = blockA->predecessors; + blockA->predecessors = link; + + deletepcode(mtctr); + appendpcode(newblock5, mtctr); + + if (instr28 && bitvectorgetbit(instr28->args[0].data.reg.reg, liveonexit)) { + instr = makepcode(PC_ADD, instr28->args[0].data.reg.reg, instr28->args[0].data.reg.reg, reg25); + if (blockA->firstPCode) + insertpcodebefore(blockA->firstPCode, instr); + else + appendpcode(blockA, instr); + } +} + +static void deleteloop(Loop *loop) { + PCodeBlock *body; + PCodeBlock *preheader; + PCodeBlock *nextblock; + PCodeLabel *label; + PCode *instr; + PCLink **ptr; + PCLink *link; + PCodeBlock *block; + + body = loop->body; + preheader = loop->preheader; + nextblock = body->nextBlock; + label = makepclabel(); + + while (1) { + instr = body->firstPCode; +#line 1294 + CError_ASSERT(instr != NULL); + + if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) + break; + + deletepcode(instr); + insertpcodebefore(loop->preheader->lastPCode, instr); + loop->bodySize--; + } + + pclabel(nextblock, label); + preheader->lastPCode->args[0].data.label.label = label; + preheader->successors->block = nextblock; + + ptr = &nextblock->predecessors; + while ((link = *ptr)) { + if (link->block == body) { + link->block = preheader; + break; + } + ptr = &link->nextLink; + } + + block = pcbasicblocks; + while ((nextblock = block->nextBlock)) { + if (bitvectorgetbit(nextblock->blockIndex, loop->memberblocks)) + block->nextBlock = nextblock->nextBlock; + else + block = nextblock; + } +} + +static void deleteloopheader(Loop *loop) { + PCLink *link; + PCLink **ptr; + + ptr = &loop->body->successors; + while ((link = *ptr)) { + if (link->block == loop->body->lastPCode->args[2].data.label.label->block) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + ptr = &loop->body->lastPCode->args[2].data.label.label->block->predecessors; + while ((link = *ptr)) { + if (link->block == loop->body) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + deletepcode(loop->body->firstPCode); + deletepcode(loop->body->lastPCode); + optimizedloop_full_unroll = 1; +} + +static void rewriteloopwithBDNZ(Loop *loop) { + PCode *instr; + + if (!FITS_IN_SHORT(loop->iterationCount)) { + instr = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(loop->iterationCount)); + insertpcodebefore(loop->preheader->lastPCode, instr); + + if (loop->iterationCount != 0) + insertpcodeafter(instr, makepcode(PC_ADDI, instr->args[0].data.reg.reg, instr->args[0].data.reg.reg, 0, LOW_PART(loop->iterationCount))); + } else { + instr = makepcode(PC_LI, used_virtual_registers[RegClass_GPR]++, loop->iterationCount); + insertpcodebefore(loop->preheader->lastPCode, instr); + } + + insertpcodebefore(loop->preheader->lastPCode, makepcode(PC_MTCTR, instr->args[0].data.reg.reg)); + + instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label); + deletepcode(loop->body->firstPCode); + deletepcode(loop->body->lastPCode); + appendpcode(loop->body, instr); + + for (loop = loop->parent; loop; loop = loop->parent) + loop->x4E = 1; +} + +static void rewriteunknownloopwithBDNZ(Loop *loop) { + SInt32 value1; // r24 + SInt32 value2; // r30 + Opcode branchOpcode; // r19 + SInt32 absStep; // r28 + int branchCondition; // r20 + int counterReg; // r27 + int reg1; // r26 + int reg2; // r22 + unsigned char mode; // r23 + PCodeBlock *afterLoop; // r25 + PCode *addiInstr; + PCode *instr; + PCLink *link; + PCLink **ptr; + + absStep = abs(loop->step); + afterLoop = NULL; + + for (addiInstr = loop->body->lastPCode->prevPCode; addiInstr->op == PC_ADDI; addiInstr = loop->body->lastPCode->prevPCode) { + deletepcode(addiInstr); + if (loop->body->lastPCode->args[2].data.label.label->block->firstPCode) + insertpcodebefore(loop->body->lastPCode->args[2].data.label.label->block->firstPCode, addiInstr); + else + appendpcode(loop->body->lastPCode->args[2].data.label.label->block, addiInstr); + + afterLoop = insertnewpcblock(loop->body, loop->loopWeight); + appendpcode(afterLoop, copypcode(addiInstr)); + loop->footer = afterLoop; + } + + if (loop->unknownCondition == ELESS) { + branchOpcode = PC_BF; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 - loop->lower; + mode = 0; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 + loop->upper; + mode = 1; + } else { + branchCondition = 0; + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 2; + } + } else if (loop->unknownCondition == ELESSEQU) { + branchOpcode = PC_BT; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - loop->lower; + mode = 0; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep + loop->upper; + mode = 1; + } else { + branchCondition = 1; + value1 = 0; + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep; + mode = 2; + } + } else if (loop->unknownCondition == EGREATER) { + branchOpcode = PC_BF; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 + loop->lower; + mode = 1; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 - loop->upper; + mode = 0; + } else { + branchCondition = 1; + value1 = 0; + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 3; + } + } else if (loop->unknownCondition == EGREATEREQU) { + branchOpcode = PC_BT; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep + loop->lower; + mode = 1; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - loop->upper; + mode = 0; + } else { + branchCondition = 0; + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep; + mode = 3; + } + } else if (loop->unknownCondition == ENOTEQU) { + branchOpcode = PC_BT; + branchCondition = 2; + if (loop->step > 0) { + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 - loop->lower; + mode = 0; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 + loop->upper; + mode = 1; + } else { + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 2; + } + } else { + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 + loop->lower; + mode = 1; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 - loop->upper; + mode = 0; + } else { + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 3; + } + } + } + + while (1) { + instr = loop->body->firstPCode; +#line 1857 + CError_ASSERT(instr); + + if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) + break; + + deletepcode(instr); + insertpcodebefore(loop->preheader->lastPCode, instr); + loop->bodySize--; + } + + // build a value for mtctr + counterReg = used_virtual_registers[RegClass_GPR]++; + + if (mode == 1) { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_NEG, counterReg, reg1)); + } else if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBFIC, counterReg, reg1, value2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_LIS, counterReg, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, counterReg)); + } + } else if (mode == 0) { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_MR, counterReg, reg1)); + } else if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, reg1, 0, value2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + } + } else if (mode == 2) { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, reg2)); + } else if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, reg2, 0, value2)); + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, counterReg)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDIS, counterReg, reg2, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, counterReg)); + } + } else { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg2, reg1)); + } else { + if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, reg1, 0, value2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + } + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg2, counterReg)); + } + } + + if (absStep > 1) { + unsigned char bits; + if ((bits = ispowerof2(absStep))) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_RLWINM, counterReg, counterReg, 32 - bits, bits, 31)); + } else { + int reg = used_virtual_registers[RegClass_GPR]++; + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_LI, reg, absStep)); + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_DIVWU, counterReg, counterReg, reg)); + } + } + + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_MTCTR, counterReg)); + + if (mode < 2) { + if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) { + if (FITS_IN_USHORT(value1)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPLI, 0, reg1, value1)); + } else { + PCode *tmp; + tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1)); + insertpcodebefore(loop->preheader->lastPCode, tmp); + + if (loop->iterationCount != 0) + insertpcodeafter(tmp, + makepcode(PC_ADDI, + tmp->args[0].data.reg.reg, + tmp->args[0].data.reg.reg, + 0, LOW_PART(value1))); + + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPL, 0, reg1, tmp->args[0].data.reg.reg)); + } + } else { + if (FITS_IN_SHORT(value1)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPI, 0, reg1, value1)); + } else { + PCode *tmp; + tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1)); + insertpcodebefore(loop->preheader->lastPCode, tmp); + + if (loop->iterationCount != 0) + insertpcodeafter(tmp, + makepcode(PC_ADDI, + tmp->args[0].data.reg.reg, + tmp->args[0].data.reg.reg, + 0, LOW_PART(value1))); + + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMP, 0, reg1, tmp->args[0].data.reg.reg)); + } + } + } else { + if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPL, 0, reg1, reg2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMP, 0, reg1, reg2)); + } + } + + if (!afterLoop) + afterLoop = loop->body->nextBlock; + + instr = makepcode(branchOpcode, 0, branchCondition, afterLoop->labels); + deletepcode(loop->preheader->lastPCode); + appendpcode(loop->preheader, instr); + + instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label); + deletepcode(loop->body->firstPCode); + deletepcode(loop->body->lastPCode); + appendpcode(loop->body, instr); + + loop->preheader->successors = NULL; + ptr = &loop->body->predecessors; + while ((link = *ptr)) { + if (link->block == loop->preheader) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + link = lalloc(sizeof(PCLink)); + link->block = loop->preheader->nextBlock; + link->nextLink = loop->preheader->successors; + loop->preheader->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = loop->preheader; + link->nextLink = loop->preheader->nextBlock->predecessors; + loop->preheader->nextBlock->predecessors = link; + + link = lalloc(sizeof(PCLink)); + link->block = afterLoop; + link->nextLink = loop->preheader->successors; + loop->preheader->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = loop->preheader; + link->nextLink = afterLoop->predecessors; + afterLoop->predecessors = link; + + for (loop = loop->parent; loop; loop = loop->parent) + loop->x4E = 1; +} + +static void optimizeloop(Loop *loop) { + computeliveonexit(loop); + + if (loop->x53 || loop->x54) + insertupdateinstructions(loop); + + if (loop->isKnownCountingLoop) { + if (loop->iterationCount > 0) { + skiplooptest(loop); + if (!copts.optimize_for_size && !loop->x4D && !loop->x57) { + if (loop->x4F) + unrollloop(loop); + else if (!loop->x4E) + unrollloopconditional(loop); + } + } + + if (loop->iterationCount != 0) { + if (loop->iterationCount == 1) + deleteloopheader(loop); + else if (!loop->x4E && !loop->x4D) + rewriteloopwithBDNZ(loop); + } + + optimizedloops = 1; + } else if (loop->isUnknownCountingLoop && !loop->x4E && !loop->x4D) { + rewriteunknownloopwithBDNZ(loop); + if (copts.ppc_unroll_speculative && !copts.optimize_for_size) { + if (loop->x4F && !loop->x57 && !loop->x52) + unrollunknownBDNZ(loop); + } + optimizedloops = 1; + } + + eliminateinductionvariables(loop); +} + +typedef struct LocalArray { + struct LocalArray *next; + Object *object; + unsigned int invalid:1; + unsigned int isFloat:1; + unsigned int isSigned:1; + SInt32 elementSize; + SInt32 arraySize; + SInt32 elementCount; + int totalUses; + int elements[1]; +} LocalArray; + +static LocalArray *scanforlocalarrays(void) { + SInt32 elementCount; + LocalArray *head; + LocalArray *array; + ObjectList *list; + int i; + SInt32 arraySize; + SInt32 elementSize; + + head = NULL; + + for (list = locals; list; list = list->next) { + if ( + list->object && + !(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) && + list->object->type && + IS_TYPE_ARRAY(list->object->type) && + TPTR_TARGET(list->object->type) && + TPTR_TARGET(list->object->type)->type < TYPESTRUCT + ) { + arraySize = list->object->type->size; + elementSize = TPTR_TARGET(list->object->type)->size; + elementCount = arraySize / elementSize; + if (elementCount > 0 && elementCount <= 8) { + array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalArray)); + array->next = head; + head = array; + + array->object = list->object; + array->elementSize = elementSize; + array->arraySize = arraySize; + array->elementCount = elementCount; + array->totalUses = 0; + array->isSigned = 1; + array->isFloat = IS_TYPE_FLOAT(TPTR_TARGET(list->object->type)); + array->invalid = 0; + array->isSigned = 1; + if (!array->isFloat && is_unsigned(TPTR_TARGET(list->object->type))) + array->isSigned = 0; + + for (i = 0; i < elementCount; i++) { + array->elements[i] = 0; + } + } + } + } + + return head; +} + +static LocalArray *lookup_array_object(LocalArray *arrays, Object *object) { + while (arrays) { + if (arrays->object == object) + return arrays; + arrays = arrays->next; + } + return NULL; +} + +void changearraytoregisters(void) { + LocalArray *arrays; + PCodeBlock *block; + PCode *instr; + int i; + PCodeArg *op; + LocalArray **ptr; + LocalArray *array; + int intCounter; + int floatCounter; + int reg; + int reg2; + PCode *newInstr; + + arrays = NULL; + if ((arrays = scanforlocalarrays())) { + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (!(instr->flags & fPCodeFlag1) && instr->argCount) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_MEMORY && + (unsigned char) op->arg == 1 && + (array = lookup_array_object(arrays, op->data.mem.obj)) && + !array->invalid + ) { + if ( + (instr->flags & (fPCodeFlag2 | fPCodeFlag4)) && + (op->data.mem.offset % array->elementSize) == 0 && + op->data.mem.offset < array->arraySize + ) { + switch (instr->op) { + case PC_LBZ: + case PC_STB: + if (array->elementSize != 1) + array->invalid = 1; + break; + case PC_LHZ: + case PC_LHA: + case PC_STH: + if (array->elementSize != 2) + array->invalid = 1; + break; + case PC_LWZ: + case PC_STW: + if (array->elementSize != 4) + array->invalid = 1; + break; + case PC_LFD: + case PC_STFD: + if (array->elementSize != 8) + array->invalid = 1; + break; + default: + array->invalid = 1; + break; + } + + if (!array->invalid) + array->elements[op->data.mem.offset / array->elementSize]++; + } else { + array->invalid = 1; + } + } + op++; + } + } + } + } + + intCounter = 0; + floatCounter = 0; + ptr = &arrays; + array = *ptr; + while (array) { + if (array->invalid) { + *ptr = array = array->next; + continue; + } + + if (array->isFloat) + floatCounter += array->elementCount; + if (!array->isFloat) + intCounter += array->elementCount; + + for (i = 0; i < array->elementCount; i++) + array->totalUses += array->elements[i]; + + array = array->next; + } + + if (arrays) { + while (intCounter > 8) { + LocalArray *best; + int score; + score = 0; + best = NULL; + for (array = arrays; array; array = array->next) { + if (!array->isFloat) { + if (best) { + if (array->totalUses < score) { + score = array->totalUses; + best = array; + } + } else { + best = array; + score = array->totalUses; + } + } + } + + if (!best) + break; + + if (best == arrays) { + arrays = best->next; + } else { + for (array = arrays; array; array = array->next) { + if (array->next == best) { + array->next = best->next; + break; + } + } + } + + intCounter -= best->elementCount; + } + + while (floatCounter > 8) { + LocalArray *best; + int score; + score = 0; + best = NULL; + for (array = arrays; array; array = array->next) { + if (array->isFloat) { + if (best) { + if (array->totalUses < score) { + score = array->totalUses; + best = array; + } + } else { + best = array; + score = array->totalUses; + } + } + } + + if (!best) + break; + + if (best == arrays) { + arrays = best->next; + } else { + for (array = arrays; array; array = array->next) { + if (array->next == best) { + array->next = best->next; + break; + } + } + } + + floatCounter -= best->elementCount; + } + +#line 2394 + CError_ASSERT(intCounter <= 8 && floatCounter <= 8); + + if (!arrays) + return; + + optimizedloop_trans_regs = 1; + + for (array = arrays; array; array = array->next) { + for (i = 0; i < array->elementCount; i++) { + if (array->isFloat) + array->elements[i] = used_virtual_registers[RegClass_FPR]++; + else + array->elements[i] = used_virtual_registers[RegClass_GPR]++; + } + } + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if ( + !(instr->flags & fPCodeFlag1) && + instr->argCount && + (instr->flags & (fPCodeFlag2 | fPCodeFlag4)) && + instr->args[2].kind == PCOp_MEMORY && + (unsigned char) instr->args[2].arg == 1 && + (array = lookup_array_object(arrays, instr->args[2].data.mem.obj)) && + !(array->invalid) + ) + { + reg2 = instr->args[0].data.reg.reg; + reg = array->elements[instr->args[2].data.mem.offset / array->elementSize]; + newInstr = NULL; + switch (instr->op) { + case PC_LBZ: + if (array->isSigned) + newInstr = makepcode(PC_MR, reg2, reg); + else + newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 24, 31); + break; + case PC_STB: + if (array->isSigned) + newInstr = makepcode(PC_EXTSB, reg, reg2); + else + newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 24, 31); + break; + case PC_LHZ: + newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 16, 31); + break; + case PC_LHA: + newInstr = makepcode(PC_EXTSH, reg2, reg); + break; + case PC_STH: + if (array->isSigned) + newInstr = makepcode(PC_EXTSH, reg, reg2); + else + newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 16, 31); + break; + case PC_LWZ: + newInstr = makepcode(PC_MR, reg2, reg); + break; + case PC_STW: + newInstr = makepcode(PC_MR, reg, reg2); + break; + case PC_LFD: + newInstr = makepcode(PC_FMR, reg2, reg); + break; + case PC_STFD: + newInstr = makepcode(PC_FMR, reg, reg2); + break; + default: +#line 2494 + CError_FATAL(); + break; + } + + if (newInstr) { + insertpcodebefore(instr, newInstr); + deletepcode(instr); + instr = newInstr; + } + } + } + } + } + } + + freeoheap(); +} + +static void optimizenestedloops(Loop *loop) { + while (loop) { + if (loop->children) + optimizenestedloops(loop->children); + optimizeloop(loop); + loop = loop->nextSibling; + } +} + +void optimizeloops(void) { + optimizedloops = 0; + optimizedloop_full_unroll = 0; + optimizedloop_trans_regs = 0; + if (loopsinflowgraph) { + computeusedefchains(0); + last_virtual_GPR = used_virtual_registers[RegClass_GPR]; + liveonexit = oalloc(4 * ((used_virtual_registers[RegClass_GPR] + 31) >> 5)); + inductionvariables = oalloc(4 * ((last_virtual_GPR + 31) >> 5)); + optimizenestedloops(loopsinflowgraph); + freeoheap(); + } +} + -- cgit v1.2.3