diff options
Diffstat (limited to 'compiler_and_linker/unsorted/LoopOptimization.c')
-rw-r--r-- | compiler_and_linker/unsorted/LoopOptimization.c | 1553 |
1 files changed, 0 insertions, 1553 deletions
diff --git a/compiler_and_linker/unsorted/LoopOptimization.c b/compiler_and_linker/unsorted/LoopOptimization.c deleted file mode 100644 index b2aef1e..0000000 --- a/compiler_and_linker/unsorted/LoopOptimization.c +++ /dev/null @@ -1,1553 +0,0 @@ -#include "compiler/LoopOptimization.h" -#include "compiler/CFunc.h" -#include "compiler/CParser.h" -#include "compiler/BitVectors.h" -#include "compiler/CompilerTools.h" -#include "compiler/LoopDetection.h" -#include "compiler/PCode.h" -#include "compiler/Registers.h" -#include "compiler/UseDefChains.h" -#include "compiler/objects.h" -#include "compiler/types.h" - -int optimizedloops; -int optimizedloop_full_unroll; -int optimizedloop_trans_regs; -static UInt32 *liveonexit; -static UInt32 *inductionvariables; -static int last_virtual_GPR; - -static int ispowerof2(SInt32 value) { - int bit = getbit(value); - return (bit > 0 && bit < 31) ? bit : 0; -} - -static void insertupdateinstructions(Loop *loop) { - // nothing -} - -static void computeliveonexit(Loop *loop) { - UInt32 *usevec; - UInt32 *defvec; - BlockList *blocklist; - RegUseOrDef *list; - int gpr; - - bitvectorinitialize(usevec = oalloc(4 * ((number_of_Uses + 31) >> 5)), number_of_Uses, 0); - for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { - if (bitvectorgetbit(blocklist->block->blockIndex, loop->vec24)) - bitvectorunion(usevec, usedefinfo[blocklist->block->blockIndex].usevec1C, number_of_Uses); - } - - bitvectorinitialize(defvec = oalloc(4 * ((number_of_Defs + 31) >> 5)), number_of_Defs, 0); - if (loop->preheader) - bitvectorunion(defvec, usedefinfo[loop->preheader->blockIndex].defvec8, number_of_Defs); - - bitvectorinitialize(liveonexit, last_virtual_GPR, 0); - - for (gpr = 32; gpr < last_virtual_GPR; gpr++) { - for (list = reg_Defs[RegClass_GPR][gpr]; list; list = list->next) { - if (bitvectorgetbit(list->id, defvec)) { - if (!Defs[list->id].pcode->block || bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) { - bitvectorsetbit(gpr, liveonexit); - break; - } - } - } - - for (list = reg_Uses[RegClass_GPR][gpr]; list; list = list->next) { - if (bitvectorgetbit(list->id, usevec)) { - if (!Uses[list->id].pcode->block || !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) { - bitvectorsetbit(gpr, liveonexit); - break; - } - } - } - } -} - -static void eliminateinductionvariables(Loop *loop) { - BlockList *blocklist; - PCode *instr; - PCode *nextInstr; - PCodeArg *op; - int i; - - bitvectorinitialize(inductionvariables, last_virtual_GPR, 0xFFFFFFFF); - - for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { - for (instr = blocklist->block->firstPCode; instr; instr = instr->nextPCode) { - if ( - instr->op != PC_ADDI || - instr->args[0].data.reg.reg < 32 || - instr->args[0].data.reg.reg >= last_virtual_GPR || - instr->args[1].data.reg.reg != instr->args[0].data.reg.reg - ) { - op = instr->args; - i = instr->argCount; - while (i--) { - if ( - op->kind == PCOp_REGISTER && - op->arg == RegClass_GPR && - op->data.reg.reg >= n_real_registers[RegClass_GPR] && - op->data.reg.reg < last_virtual_GPR - ) - bitvectorclearbit(op->data.reg.reg, inductionvariables); - op++; - } - } - } - } - - if (loop->parent) { - for (instr = loop->preheader->firstPCode; instr; instr = nextInstr) { - nextInstr = instr->nextPCode; - op = instr->args; - i = instr->argCount; - while (i--) { - if ( - op->kind == PCOp_REGISTER && - op->arg == RegClass_GPR && - op->data.reg.reg >= n_real_registers[RegClass_GPR] && - op->data.reg.reg < last_virtual_GPR - ) - bitvectorsetbit(op->data.reg.reg, liveonexit); - op++; - } - } - } - - for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { - for (instr = blocklist->block->firstPCode; instr; instr = nextInstr) { - nextInstr = instr->nextPCode; - if ( - instr->op == PC_ADDI && - instr->args[0].data.reg.reg >= 32 && - instr->args[0].data.reg.reg < last_virtual_GPR && - instr->args[1].data.reg.reg == instr->args[0].data.reg.reg && - bitvectorgetbit(instr->args[0].data.reg.reg, inductionvariables) && - !bitvectorgetbit(instr->args[0].data.reg.reg, liveonexit) - ) { - deletepcode(instr); - optimizedloops = 1; - } - } - } -} - -static void skiplooptest(Loop *loop) { - PCodeBlock *preheader; - PCodeLabel *label; - PCLink **ptr; - PCLink *link; - PCode *lastInstr; - PCode *instr; - - preheader = loop->preheader; - lastInstr = loop->body->lastPCode; - CError_ASSERT(340, lastInstr->args[2].kind == PCOp_LABEL); - - label = lastInstr->args[2].data.label.label; - preheader->lastPCode->args[0].data.label.label = label; - preheader->successors->block = label->block; - - ptr = &loop->body->predecessors; - while ((link = *ptr)) { - if (link->block == preheader) { - *ptr = link->nextLink; - break; - } - ptr = &link->nextLink; - } - - link->nextLink = label->block->predecessors; - label->block->predecessors = link; - - while (1) { - instr = loop->body->firstPCode; - CError_ASSERT(369, instr); - - if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) - break; - - deletepcode(instr); - insertpcodebefore(loop->preheader->lastPCode, instr); - loop->bodySize--; - } - - for (instr = instr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode) - insertpcodebefore(loop->preheader->lastPCode, copypcode(instr)); -} - -static void unrollloop(Loop *loop) { - PCodeBlock *newBlock; - int i; - int factor; - PCode *instr; - PCodeBlock *block; - PCode *firstInstr; - PCode *nextInstr; - - for (factor = copts.unroll_factor_limit; factor > 1; factor--) { - if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit) - break; - } - - if (factor == 1) - return; - if ((loop->iterationCount / factor) != 1 && loop->bodySize < 4) - return; - - newBlock = oalloc(sizeof(PCodeBlock)); - newBlock->firstPCode = newBlock->lastPCode = NULL; - for (i = 0; i < factor - 1; i++) { - firstInstr = loop->body->firstPCode; - CError_ASSERT(448, firstInstr); - if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI) - CError_FATAL(450); - - for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode) - appendpcode(newBlock, copypcode(instr)); - - for (block = loop->preheader->successors->block; block != loop->body; block = block->successors->block) { - for (instr = block->firstPCode; instr; instr = instr->nextPCode) { - if (instr->op != PC_B) - appendpcode(newBlock, copypcode(instr)); - } - } - } - - block = loop->body->predecessors->block; - for (instr = newBlock->firstPCode; instr; instr = nextInstr) { - nextInstr = instr->nextPCode; - appendpcode(block, instr); - } - loop->iterationCount /= factor; -} - -void pccomputepredecessors1(PCodeBlock *block) { - PCLink *succ; - PCLink *pred; - - for (succ = block->successors; succ; succ = succ->nextLink) { - if (!succ->block) { - CError_FATAL(496); - } else { - for (pred = succ->block->predecessors; pred; pred = pred->nextLink) { - if (pred->block == block) - break; - } - - if (!pred) { - pred = lalloc(sizeof(PCLink)); - pred->block = block; - pred->nextLink = succ->block->predecessors; - succ->block->predecessors = pred; - } - } - } -} - -static PCodeBlock *insertnewpcblock(PCodeBlock *block, int loopWeight) { - PCodeBlock *newBlock; - PCodeBlock *nextBlock; - PCodeLabel *label; - PCLink *prev; - PCLink *link; - - label = makepclabel(); - newBlock = lalloc(sizeof(PCodeBlock)); - - nextBlock = block->nextBlock; - newBlock->nextBlock = nextBlock; - block->nextBlock = newBlock; - - newBlock->prevBlock = block; - nextBlock->prevBlock = newBlock; - - newBlock->labels = NULL; - newBlock->predecessors = newBlock->successors = NULL; - newBlock->firstPCode = newBlock->lastPCode = NULL; - newBlock->pcodeCount = 0; - newBlock->loopWeight = loopWeight; - newBlock->flags = 0; - newBlock->blockIndex = pcblockcount++; - pclabel(newBlock, label); - - prev = NULL; - for (link = block->successors; link; link = link->nextLink) { - if (link->block == nextBlock) { - if (!prev) - block->successors = link->nextLink; - else - prev->nextLink = link->nextLink; - link->nextLink = NULL; - newBlock->successors = link; - break; - } - prev = link; - } - - prev = NULL; - for (link = nextBlock->predecessors; link; link = link->nextLink) { - if (link->block == block) { - if (!prev) - nextBlock->predecessors = link->nextLink; - else - prev->nextLink = link->nextLink; - link->nextLink = NULL; - newBlock->predecessors = link; - break; - } - prev = link; - } - - link = lalloc(sizeof(PCLink)); - link->block = newBlock; - link->nextLink = block->successors; - block->successors = link; - - link = lalloc(sizeof(PCLink)); - link->block = newBlock; - link->nextLink = nextBlock->predecessors; - nextBlock->predecessors = link; - - return newBlock; -} - -static void unrollloopconditional(Loop *loop) { - PCodeBlock *lastBlock; - PCodeLabel *label24; - PCode *instr; - PCode *instrCopy; - int outputBlockCount; - int inputBlockCount; - PCodeBlock **blocks; - PCodeBlock **blocks2; - PCodeBlock **blocks3; - PCode *firstInstr; - int factor; - PCodeBlock *block; - PCLink *link; - PCLink *prev; - int i; - int j; - int k; - int instructionCount; - - label24 = NULL; - outputBlockCount = 0; - instructionCount = 0; - - for (factor = copts.unroll_factor_limit; factor > 1; factor--) { - if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit) - break; - } - - if (factor == 1) - return; - - inputBlockCount = 0; - for (block = loop->preheader->successors->block; block != loop->body; block = block->nextBlock) { - inputBlockCount++; - if (!bitvectorgetbit(block->blockIndex, loop->memberblocks)) - instructionCount += block->pcodeCount; - } - - if ((loop->bodySize - instructionCount - 2) < instructionCount || instructionCount > 8) - return; - - blocks = oalloc(inputBlockCount * sizeof(PCodeBlock *)); - blocks2 = oalloc(inputBlockCount * sizeof(PCodeBlock *)); - blocks3 = oalloc(factor * (inputBlockCount * sizeof(PCodeBlock *))); - memclrw(blocks, inputBlockCount * sizeof(PCodeBlock *)); - memclrw(blocks2, inputBlockCount * sizeof(PCodeBlock *)); - memclrw(blocks3, factor * (inputBlockCount * sizeof(PCodeBlock *))); - - block = loop->preheader->nextBlock; - for (i = 0; i < inputBlockCount; i++) { - blocks[i] = block; - block = block->nextBlock; - } - - lastBlock = blocks[inputBlockCount - 1]; - for (i = 0; i < factor - 1; i++) { - for (j = 0; j < inputBlockCount; j++) { - blocks2[j] = insertnewpcblock(lastBlock, loop->loopWeight); - blocks3[outputBlockCount++] = blocks2[j]; - lastBlock = blocks2[j]; - } - if (label24) { - pclabel(blocks2[0], label24); - label24 = NULL; - } - - for (j = 0; j < inputBlockCount; j++) { - for (instr = blocks[j]->firstPCode; instr; instr = instr->nextPCode) { - if (instr->flags & fIsBranch) { - PCodeArg *op; - int opID; - instrCopy = copypcode(instr); - op = NULL; - for (opID = 0; opID < instr->argCount; opID++) { - if (instr->args[opID].kind == PCOp_LABEL) { - op = &instr->args[opID]; - break; - } - } - - if (op) { - if (op->data.label.label->block == loop->body) { - if (!label24) - label24 = makepclabel(); - instrCopy->args[opID].data.label.label = label24; - } else { - for (k = 0; k < inputBlockCount; k++) { - if (op->data.label.label->block == blocks[k]) { - instrCopy->args[opID].data.label.label = blocks2[k]->labels; - break; - } - } - } - } - - appendpcode(blocks2[j], instrCopy); - if (op) - pcbranch(blocks2[j], instrCopy->args[opID].data.label.label); - } else { - appendpcode(blocks2[j], copypcode(instr)); - } - } - } - - firstInstr = loop->body->firstPCode; - CError_ASSERT(762, firstInstr != NULL); - if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI) - CError_FATAL(764); - - for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode) - appendpcode(blocks2[inputBlockCount - 1], copypcode(instr)); - - for (j = 0; j < inputBlockCount; j++) { - for (link = blocks[j]->successors; link; link = link->nextLink) { - if (link->block == blocks[j]->nextBlock) - break; - } - - if (!link) { - for (link = blocks2[j]->successors, prev = NULL; link; link = link->nextLink) { - if (link->block == blocks2[j]->nextBlock) { - if (prev) - prev->nextLink = link->nextLink; - else - blocks2[j]->successors = link->nextLink; - } else { - prev = link; - } - } - - for (link = blocks2[j]->nextBlock->predecessors, prev = NULL; link; link = link->nextLink) { - if (link->block == blocks2[j]) { - if (prev) - prev->nextLink = link->nextLink; - else - blocks2[j]->nextBlock->predecessors = link->nextLink; - } else { - prev = link; - } - } - } - } - } - - if (label24) - pclabel(loop->body, label24); - - for (i = 0; i < inputBlockCount; i++) { - for (instr = blocks[i]->firstPCode; instr; instr = instr->nextPCode) { - if (instr->flags & fIsBranch) { - PCodeArg *op; - int opID; - op = NULL; - for (opID = 0; opID < instr->argCount; opID++) { - if (instr->args[opID].kind == PCOp_LABEL) { - op = &instr->args[opID]; - break; - } - } - - if (op && op->data.label.label->block == loop->body) { - instr->args[opID].data.label.label = blocks3[0]->labels; - - for (link = blocks[i]->successors, prev = NULL; link; link = link->nextLink) { - if (link->block == loop->body) { - if (prev) - prev->nextLink = link->nextLink; - else - blocks[i]->successors = link->nextLink; - } else { - prev = link; - } - } - - for (link = loop->body->predecessors, prev = NULL; link; link = link->nextLink) { - if (link->block == blocks[i]) { - if (prev) - prev->nextLink = link->nextLink; - else - loop->body->predecessors = link->nextLink; - } else { - prev = link; - } - } - - link = blocks[i]->successors; - while (link && link->block != blocks3[0]) - link = link->nextLink; - - if (!link) { - pcbranch(blocks[i], op->data.label.label); - pccomputepredecessors1(blocks[i]); - } - } - } - } - } - - for (i = 0; i < outputBlockCount; i++) - pccomputepredecessors1(blocks3[i]); - - loop->iterationCount /= factor; -} - -static void unrollunknownBDNZ(Loop *loop) { - int factor; // r29 - PCodeBlock *preheader; // r17 - PCodeBlock *blockA; // r23 - PCodeBlock *postheader; // r22 - PCodeBlock *newblock1; // r26 - PCodeBlock *newblock2; // r31 - PCodeBlock *newblock3; // r19 - PCodeBlock *newblock4; // r20 - PCodeBlock *newblock5; // r24 - PCode *mtctr; // r21 - int mtctr_reg; // r27 - PCode *instr28; // r28 - PCode *instr6; // r6 - PCodeBlock *block; - PCode *instr; - PCodeArg *op; - int i; - int val; - short mtctr_shifted_reg; // r16 - short reg25; // r25 - PCLink *link; - - if (loop->bodySize < 4) - return; - - factor = 128; - while (factor > copts.unroll_factor_limit) - factor >>= 1; - while (factor > 1 && (loop->bodySize - 2) * factor > copts.unroll_instr_limit) - factor >>= 1; - - if (factor < 2) - return; - - preheader = loop->preheader; - blockA = loop->body->nextBlock; - postheader = preheader->nextBlock; - - newblock1 = insertnewpcblock(preheader, loop->loopWeight); - newblock2 = insertnewpcblock(newblock1, loop->loopWeight); - newblock3 = insertnewpcblock(newblock2, loop->loopWeight); - newblock4 = insertnewpcblock(newblock3, loop->loopWeight); - newblock5 = insertnewpcblock(newblock4, loop->loopWeight); - addblocktoloop(loop, newblock1); - addblocktoloop(loop, newblock2); - addblocktoloop(loop, newblock3); - addblocktoloop(loop, newblock4); - addblocktoloop(loop, newblock5); - - for (instr = preheader->lastPCode; instr; instr = instr->prevPCode) { - if (instr->op == PC_MTCTR) { - mtctr = instr; - mtctr_reg = instr->args[0].data.reg.reg; - } - } - - if (!mtctr) - return; - - instr28 = NULL; - for (block = postheader; block != loop->body; block = block->successors->block) { - for (instr = block->firstPCode; instr; instr = instr->nextPCode) { - if (instr->op != PC_B) { - appendpcode(newblock2, copypcode(instr)); - if (instr == loop->pc18) - instr28 = newblock2->lastPCode; - } - } - } - - if (!instr28) { - appendpcode(newblock2, copypcode(loop->pc18)); - instr28 = newblock2->lastPCode; - } - - instr6 = NULL; - for (instr = newblock2->firstPCode; instr; instr = instr->nextPCode) { - if (instr != instr28) { - op = instr->args; - i = instr->argCount; - while (i--) { - if ( - op->kind == PCOp_REGISTER && - op->arg == RegClass_GPR && - op->data.reg.reg == loop->pc18->args[0].data.reg.reg && - (op->data.reg.effect & (EffectRead | EffectWrite)) - ) { - instr6 = instr; - break; - } - op++; - } - } - if (instr6) - break; - } - - if (!instr6) { - deletepcode(instr28); - deletepcode(loop->pc18); - if (loop->footer) - blockA = loop->footer; - else - blockA = insertnewpcblock(loop->body, loop->loopWeight); - } else { - instr28 = NULL; - } - - for (i = 1; i < factor; i++) { - for (block = postheader; block != loop->body; block = block->successors->block) { - for (instr = block->firstPCode; instr; instr = instr->nextPCode) { - if (instr->op != PC_B) - appendpcode(newblock2, copypcode(instr)); - } - } - } - - mtctr_shifted_reg = used_virtual_registers[RegClass_GPR]++; - appendpcode(newblock1, makepcode( - PC_RLWINM, - mtctr_shifted_reg, - mtctr_reg, - 32 - ispowerof2(factor), - ispowerof2(factor), - 31)); - - appendpcode(newblock1, makepcode(PC_CMPLI, 0, mtctr_shifted_reg, 0)); - - if (instr28) { - reg25 = used_virtual_registers[RegClass_GPR]++; - if (loop->step == 1) { - instr = makepcode(PC_MR, reg25, mtctr_reg); - } else if (loop->step == -1) { - instr = makepcode(PC_NEG, reg25, mtctr_reg); - } else { - val = ispowerof2(abs(loop->step)); - if (val > 0) { - instr = makepcode(PC_RLWINM, reg25, mtctr_reg, val, 0, 31 - val); - if (loop->step < 0) { - appendpcode(newblock1, instr); - instr = makepcode(PC_NEG, reg25, reg25); - } - } else { - instr = makepcode(PC_MULLI, reg25, mtctr_reg, loop->step); - } - } - appendpcode(newblock1, instr); - } - - appendpcode(newblock1, makepcode(PC_MTCTR, mtctr_shifted_reg)); - appendpcode(newblock1, makepcode(PC_BT, 0, 2, newblock5->labels)); - pcbranch(newblock1, newblock5->labels); - - link = lalloc(sizeof(PCLink)); - link->block = newblock1; - link->nextLink = newblock5->predecessors; - newblock5->predecessors = link; - - appendpcode(newblock3, makepcode(PC_BDNZ, newblock2->labels)); - pcbranch(newblock3, newblock2->labels); - - link = lalloc(sizeof(PCLink)); - link->block = newblock3; - link->nextLink = newblock2->predecessors; - newblock2->predecessors = link; - - appendpcode(newblock4, makepcode(PC_ANDI, mtctr_reg, mtctr_reg, factor - 1)); - appendpcode(newblock4, makepcode(PC_BT, 0, 2, blockA->labels)); - pcbranch(newblock4, blockA->labels); - - link = lalloc(sizeof(PCLink)); - link->block = newblock4; - link->nextLink = blockA->predecessors; - blockA->predecessors = link; - - deletepcode(mtctr); - appendpcode(newblock5, mtctr); - - if (instr28 && bitvectorgetbit(instr28->args[0].data.reg.reg, liveonexit)) { - instr = makepcode(PC_ADD, instr28->args[0].data.reg.reg, instr28->args[0].data.reg.reg, reg25); - if (blockA->firstPCode) - insertpcodebefore(blockA->firstPCode, instr); - else - appendpcode(blockA, instr); - } -} - -static void deleteloop(Loop *loop) { - PCodeBlock *body; - PCodeBlock *preheader; - PCodeBlock *nextblock; - PCodeLabel *label; - PCode *instr; - PCLink **ptr; - PCLink *link; - PCodeBlock *block; - - body = loop->body; - preheader = loop->preheader; - nextblock = body->nextBlock; - label = makepclabel(); - - while (1) { - instr = body->firstPCode; - CError_ASSERT(1294, instr != NULL); - - if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) - break; - - deletepcode(instr); - insertpcodebefore(loop->preheader->lastPCode, instr); - loop->bodySize--; - } - - pclabel(nextblock, label); - preheader->lastPCode->args[0].data.label.label = label; - preheader->successors->block = nextblock; - - ptr = &nextblock->predecessors; - while ((link = *ptr)) { - if (link->block == body) { - link->block = preheader; - break; - } - ptr = &link->nextLink; - } - - block = pcbasicblocks; - while ((nextblock = block->nextBlock)) { - if (bitvectorgetbit(nextblock->blockIndex, loop->memberblocks)) - block->nextBlock = nextblock->nextBlock; - else - block = nextblock; - } -} - -static void deleteloopheader(Loop *loop) { - PCLink *link; - PCLink **ptr; - - ptr = &loop->body->successors; - while ((link = *ptr)) { - if (link->block == loop->body->lastPCode->args[2].data.label.label->block) { - *ptr = link->nextLink; - break; - } - ptr = &link->nextLink; - } - - ptr = &loop->body->lastPCode->args[2].data.label.label->block->predecessors; - while ((link = *ptr)) { - if (link->block == loop->body) { - *ptr = link->nextLink; - break; - } - ptr = &link->nextLink; - } - - deletepcode(loop->body->firstPCode); - deletepcode(loop->body->lastPCode); - optimizedloop_full_unroll = 1; -} - -static void rewriteloopwithBDNZ(Loop *loop) { - PCode *instr; - - if (!FITS_IN_SHORT(loop->iterationCount)) { - instr = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(loop->iterationCount)); - insertpcodebefore(loop->preheader->lastPCode, instr); - - if (loop->iterationCount != 0) - insertpcodeafter(instr, makepcode(PC_ADDI, instr->args[0].data.reg.reg, instr->args[0].data.reg.reg, 0, LOW_PART(loop->iterationCount))); - } else { - instr = makepcode(PC_LI, used_virtual_registers[RegClass_GPR]++, loop->iterationCount); - insertpcodebefore(loop->preheader->lastPCode, instr); - } - - insertpcodebefore(loop->preheader->lastPCode, makepcode(PC_MTCTR, instr->args[0].data.reg.reg)); - - instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label); - deletepcode(loop->body->firstPCode); - deletepcode(loop->body->lastPCode); - appendpcode(loop->body, instr); - - for (loop = loop->parent; loop; loop = loop->parent) - loop->x4E = 1; -} - -static void rewriteunknownloopwithBDNZ(Loop *loop) { - SInt32 value1; // r24 - SInt32 value2; // r30 - Opcode branchOpcode; // r19 - SInt32 absStep; // r28 - int branchCondition; // r20 - int counterReg; // r27 - int reg1; // r26 - int reg2; // r22 - unsigned char mode; // r23 - PCodeBlock *afterLoop; // r25 - PCode *addiInstr; - PCode *instr; - PCLink *link; - PCLink **ptr; - - absStep = abs(loop->step); - afterLoop = NULL; - - for (addiInstr = loop->body->lastPCode->prevPCode; addiInstr->op == PC_ADDI; addiInstr = loop->body->lastPCode->prevPCode) { - deletepcode(addiInstr); - if (loop->body->lastPCode->args[2].data.label.label->block->firstPCode) - insertpcodebefore(loop->body->lastPCode->args[2].data.label.label->block->firstPCode, addiInstr); - else - appendpcode(loop->body->lastPCode->args[2].data.label.label->block, addiInstr); - - afterLoop = insertnewpcblock(loop->body, loop->loopWeight); - appendpcode(afterLoop, copypcode(addiInstr)); - loop->footer = afterLoop; - } - - if (loop->unknownCondition == ELESS) { - branchOpcode = PC_BF; - if (loop->lowerType == LOOP_BOUND_CONSTANT) { - branchCondition = 1; // GT - value1 = loop->lower; - reg1 = addiInstr->args[2].data.reg.reg; - value2 = absStep - 1 - loop->lower; - mode = 0; - } else if (loop->upperType == LOOP_BOUND_CONSTANT) { - branchCondition = 0; // LT - value1 = loop->upper; - reg1 = addiInstr->args[1].data.reg.reg; - value2 = absStep - 1 + loop->upper; - mode = 1; - } else { - branchCondition = 0; // LT - reg1 = addiInstr->args[1].data.reg.reg; - reg2 = addiInstr->args[2].data.reg.reg; - value2 = absStep - 1; - mode = 2; - } - } else if (loop->unknownCondition == ELESSEQU) { - branchOpcode = PC_BT; - if (loop->lowerType == LOOP_BOUND_CONSTANT) { - branchCondition = 0; // LT - value1 = loop->lower; - reg1 = addiInstr->args[2].data.reg.reg; - value2 = absStep - loop->lower; - mode = 0; - } else if (loop->upperType == LOOP_BOUND_CONSTANT) { - branchCondition = 1; // GT - value1 = loop->upper; - reg1 = addiInstr->args[1].data.reg.reg; - value2 = absStep + loop->upper; - mode = 1; - } else { - branchCondition = 1; // GT - value1 = 0; - reg1 = addiInstr->args[1].data.reg.reg; - reg2 = addiInstr->args[2].data.reg.reg; - value2 = absStep; - mode = 2; - } - } else if (loop->unknownCondition == EGREATER) { - branchOpcode = PC_BF; - if (loop->lowerType == LOOP_BOUND_CONSTANT) { - branchCondition = 0; // LT - value1 = loop->lower; - reg1 = addiInstr->args[2].data.reg.reg; - value2 = absStep - 1 + loop->lower; - mode = 1; - } else if (loop->upperType == LOOP_BOUND_CONSTANT) { - branchCondition = 1; // GT - value1 = loop->upper; - reg1 = addiInstr->args[1].data.reg.reg; - value2 = absStep - 1 - loop->upper; - mode = 0; - } else { - branchCondition = 1; // GT - value1 = 0; - reg1 = addiInstr->args[1].data.reg.reg; - reg2 = addiInstr->args[2].data.reg.reg; - value2 = absStep - 1; - mode = 3; - } - } else if (loop->unknownCondition == EGREATEREQU) { - branchOpcode = PC_BT; - if (loop->lowerType == LOOP_BOUND_CONSTANT) { - branchCondition = 1; // GT - value1 = loop->lower; - reg1 = addiInstr->args[2].data.reg.reg; - value2 = absStep + loop->lower; - mode = 1; - } else if (loop->upperType == LOOP_BOUND_CONSTANT) { - branchCondition = 0; // LT - value1 = loop->upper; - reg1 = addiInstr->args[1].data.reg.reg; - value2 = absStep - loop->upper; - mode = 0; - } else { - branchCondition = 0; // LT - reg1 = addiInstr->args[1].data.reg.reg; - reg2 = addiInstr->args[2].data.reg.reg; - value2 = absStep; - mode = 3; - } - } else if (loop->unknownCondition == ENOTEQU) { - branchOpcode = PC_BT; - branchCondition = 2; // EQ - if (loop->step > 0) { - if (loop->lowerType == LOOP_BOUND_CONSTANT) { - value1 = loop->lower; - reg1 = addiInstr->args[2].data.reg.reg; - value2 = absStep - 1 - loop->lower; - mode = 0; - } else if (loop->upperType == LOOP_BOUND_CONSTANT) { - value1 = loop->upper; - reg1 = addiInstr->args[1].data.reg.reg; - value2 = absStep - 1 + loop->upper; - mode = 1; - } else { - reg1 = addiInstr->args[1].data.reg.reg; - reg2 = addiInstr->args[2].data.reg.reg; - value2 = absStep - 1; - mode = 2; - } - } else { - if (loop->lowerType == LOOP_BOUND_CONSTANT) { - value1 = loop->lower; - reg1 = addiInstr->args[2].data.reg.reg; - value2 = absStep - 1 + loop->lower; - mode = 1; - } else if (loop->upperType == LOOP_BOUND_CONSTANT) { - value1 = loop->upper; - reg1 = addiInstr->args[1].data.reg.reg; - value2 = absStep - 1 - loop->upper; - mode = 0; - } else { - reg1 = addiInstr->args[1].data.reg.reg; - reg2 = addiInstr->args[2].data.reg.reg; - value2 = absStep - 1; - mode = 3; - } - } - } - - while (1) { - instr = loop->body->firstPCode; - CError_ASSERT(1857, instr); - - if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) - break; - - deletepcode(instr); - insertpcodebefore(loop->preheader->lastPCode, instr); - loop->bodySize--; - } - - // build a value for mtctr - counterReg = used_virtual_registers[RegClass_GPR]++; - - if (mode == 1) { - if (value2 == 0) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_NEG, counterReg, reg1)); - } else if (FITS_IN_SHORT(value2)) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_SUBFIC, counterReg, reg1, value2)); - } else { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_LIS, counterReg, 0, HIGH_PART(value2))); - if (value2 != 0) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); - } - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_SUBF, counterReg, reg1, counterReg)); - } - } else if (mode == 0) { - if (value2 == 0) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_MR, counterReg, reg1)); - } else if (FITS_IN_SHORT(value2)) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_ADDI, counterReg, reg1, 0, value2)); - } else { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2))); - if (value2 != 0) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); - } - } - } else if (mode == 2) { - if (value2 == 0) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_SUBF, counterReg, reg1, reg2)); - } else if (FITS_IN_SHORT(value2)) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_ADDI, counterReg, reg2, 0, value2)); - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_SUBF, counterReg, reg1, counterReg)); - } else { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_ADDIS, counterReg, reg2, 0, HIGH_PART(value2))); - if (value2 != 0) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); - } - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_SUBF, counterReg, reg1, counterReg)); - } - } else { - if (value2 == 0) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_SUBF, counterReg, reg2, reg1)); - } else { - if (FITS_IN_SHORT(value2)) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_ADDI, counterReg, reg1, 0, value2)); - } else { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2))); - if (value2 != 0) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); - } - } - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_SUBF, counterReg, reg2, counterReg)); - } - } - - if (absStep > 1) { - unsigned char bits; - if ((bits = ispowerof2(absStep))) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_RLWINM, counterReg, counterReg, 32 - bits, bits, 31)); - } else { - int reg = used_virtual_registers[RegClass_GPR]++; - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_LI, reg, absStep)); - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_DIVWU, counterReg, counterReg, reg)); - } - } - - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_MTCTR, counterReg)); - - if (mode < 2) { - if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) { - if (FITS_IN_USHORT(value1)) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_CMPLI, 0, reg1, value1)); - } else { - PCode *tmp; - tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1)); - insertpcodebefore(loop->preheader->lastPCode, tmp); - - if (loop->iterationCount != 0) - insertpcodeafter(tmp, - makepcode(PC_ADDI, - tmp->args[0].data.reg.reg, - tmp->args[0].data.reg.reg, - 0, LOW_PART(value1))); - - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_CMPL, 0, reg1, tmp->args[0].data.reg.reg)); - } - } else { - if (FITS_IN_SHORT(value1)) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_CMPI, 0, reg1, value1)); - } else { - PCode *tmp; - tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1)); - insertpcodebefore(loop->preheader->lastPCode, tmp); - - if (loop->iterationCount != 0) - insertpcodeafter(tmp, - makepcode(PC_ADDI, - tmp->args[0].data.reg.reg, - tmp->args[0].data.reg.reg, - 0, LOW_PART(value1))); - - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_CMP, 0, reg1, tmp->args[0].data.reg.reg)); - } - } - } else { - if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_CMPL, 0, reg1, reg2)); - } else { - insertpcodebefore(loop->preheader->lastPCode, - makepcode(PC_CMP, 0, reg1, reg2)); - } - } - - if (!afterLoop) - afterLoop = loop->body->nextBlock; - - instr = makepcode(branchOpcode, 0, branchCondition, afterLoop->labels); - deletepcode(loop->preheader->lastPCode); - appendpcode(loop->preheader, instr); - - instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label); - deletepcode(loop->body->firstPCode); - deletepcode(loop->body->lastPCode); - appendpcode(loop->body, instr); - - loop->preheader->successors = NULL; - ptr = &loop->body->predecessors; - while ((link = *ptr)) { - if (link->block == loop->preheader) { - *ptr = link->nextLink; - break; - } - ptr = &link->nextLink; - } - - link = lalloc(sizeof(PCLink)); - link->block = loop->preheader->nextBlock; - link->nextLink = loop->preheader->successors; - loop->preheader->successors = link; - - link = lalloc(sizeof(PCLink)); - link->block = loop->preheader; - link->nextLink = loop->preheader->nextBlock->predecessors; - loop->preheader->nextBlock->predecessors = link; - - link = lalloc(sizeof(PCLink)); - link->block = afterLoop; - link->nextLink = loop->preheader->successors; - loop->preheader->successors = link; - - link = lalloc(sizeof(PCLink)); - link->block = loop->preheader; - link->nextLink = afterLoop->predecessors; - afterLoop->predecessors = link; - - for (loop = loop->parent; loop; loop = loop->parent) - loop->x4E = 1; -} - -static void optimizeloop(Loop *loop) { - computeliveonexit(loop); - - if (loop->x53 || loop->x54) - insertupdateinstructions(loop); - - if (loop->isKnownCountingLoop) { - if (loop->iterationCount > 0) { - skiplooptest(loop); - if (!copts.optimizesize && !loop->x4D && !loop->x57) { - if (loop->x4F) - unrollloop(loop); - else if (!loop->x4E) - unrollloopconditional(loop); - } - } - - if (loop->iterationCount != 0) { - if (loop->iterationCount == 1) - deleteloopheader(loop); - else if (!loop->x4E && !loop->x4D) - rewriteloopwithBDNZ(loop); - } - - optimizedloops = 1; - } else if (loop->isUnknownCountingLoop && !loop->x4E && !loop->x4D) { - rewriteunknownloopwithBDNZ(loop); - if (copts.unroll_speculative && !copts.optimizesize) { - if (loop->x4F && !loop->x57 && !loop->x52) - unrollunknownBDNZ(loop); - } - optimizedloops = 1; - } - - eliminateinductionvariables(loop); -} - -typedef struct LocalArray { - struct LocalArray *next; - Object *object; - unsigned int invalid:1; - unsigned int isFloat:1; - unsigned int isSigned:1; - SInt32 elementSize; - SInt32 arraySize; - SInt32 elementCount; - int totalUses; - int elements[1]; -} LocalArray; - -static LocalArray *scanforlocalarrays(void) { - SInt32 elementCount; - LocalArray *head; - LocalArray *array; - ObjectList *list; - int i; - SInt32 arraySize; - SInt32 elementSize; - - head = NULL; - - for (list = locals; list; list = list->next) { - if ( - list->object && - !(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) && - list->object->type && - IS_TYPE_ARRAY(list->object->type) && - TPTR_TARGET(list->object->type) && - TPTR_TARGET(list->object->type)->type < TYPESTRUCT - ) { - arraySize = list->object->type->size; - elementSize = TPTR_TARGET(list->object->type)->size; - elementCount = arraySize / elementSize; - if (elementCount > 0 && elementCount <= 8) { - array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalArray)); - array->next = head; - head = array; - - array->object = list->object; - array->elementSize = elementSize; - array->arraySize = arraySize; - array->elementCount = elementCount; - array->totalUses = 0; - array->isSigned = 1; - array->isFloat = IS_TYPE_FLOAT(TPTR_TARGET(list->object->type)); - array->invalid = 0; - array->isSigned = 1; - if (!array->isFloat && is_unsigned(TPTR_TARGET(list->object->type))) - array->isSigned = 0; - - for (i = 0; i < elementCount; i++) { - array->elements[i] = 0; - } - } - } - } - - return head; -} - -static LocalArray *lookup_array_object(LocalArray *arrays, Object *object) { - while (arrays) { - if (arrays->object == object) - return arrays; - arrays = arrays->next; - } - return NULL; -} - -void changearraytoregisters(void) { - LocalArray *arrays; - PCodeBlock *block; - PCode *instr; - int i; - PCodeArg *op; - LocalArray **ptr; - LocalArray *array; - int intCounter; - int floatCounter; - int reg; - int reg2; - PCode *newInstr; - - arrays = NULL; - if ((arrays = scanforlocalarrays())) { - for (block = pcbasicblocks; block; block = block->nextBlock) { - for (instr = block->firstPCode; instr; instr = instr->nextPCode) { - if (!(instr->flags & fIsBranch) && instr->argCount) { - op = instr->args; - i = instr->argCount; - while (i--) { - if ( - op->kind == PCOp_MEMORY && - (PCOpMemoryArg) op->arg == PCOpMemory1 && - (array = lookup_array_object(arrays, op->data.mem.obj)) && - !array->invalid - ) { - if ( - (instr->flags & (fIsRead | fIsWrite)) && - (op->data.mem.offset % array->elementSize) == 0 && - op->data.mem.offset < array->arraySize - ) { - switch (instr->op) { - case PC_LBZ: - case PC_STB: - if (array->elementSize != 1) - array->invalid = 1; - break; - case PC_LHZ: - case PC_LHA: - case PC_STH: - if (array->elementSize != 2) - array->invalid = 1; - break; - case PC_LWZ: - case PC_STW: - if (array->elementSize != 4) - array->invalid = 1; - break; - case PC_LFD: - case PC_STFD: - if (array->elementSize != 8) - array->invalid = 1; - break; - default: - array->invalid = 1; - break; - } - - if (!array->invalid) - array->elements[op->data.mem.offset / array->elementSize]++; - } else { - array->invalid = 1; - } - } - op++; - } - } - } - } - - intCounter = 0; - floatCounter = 0; - ptr = &arrays; - array = *ptr; - while (array) { - if (array->invalid) { - *ptr = array = array->next; - continue; - } - - if (array->isFloat) - floatCounter += array->elementCount; - if (!array->isFloat) - intCounter += array->elementCount; - - for (i = 0; i < array->elementCount; i++) - array->totalUses += array->elements[i]; - - array = array->next; - } - - if (arrays) { - while (intCounter > 8) { - LocalArray *best; - int score; - score = 0; - best = NULL; - for (array = arrays; array; array = array->next) { - if (!array->isFloat) { - if (best) { - if (array->totalUses < score) { - score = array->totalUses; - best = array; - } - } else { - best = array; - score = array->totalUses; - } - } - } - - if (!best) - break; - - if (best == arrays) { - arrays = best->next; - } else { - for (array = arrays; array; array = array->next) { - if (array->next == best) { - array->next = best->next; - break; - } - } - } - - intCounter -= best->elementCount; - } - - while (floatCounter > 8) { - LocalArray *best; - int score; - score = 0; - best = NULL; - for (array = arrays; array; array = array->next) { - if (array->isFloat) { - if (best) { - if (array->totalUses < score) { - score = array->totalUses; - best = array; - } - } else { - best = array; - score = array->totalUses; - } - } - } - - if (!best) - break; - - if (best == arrays) { - arrays = best->next; - } else { - for (array = arrays; array; array = array->next) { - if (array->next == best) { - array->next = best->next; - break; - } - } - } - - floatCounter -= best->elementCount; - } - - CError_ASSERT(2394, intCounter <= 8 && floatCounter <= 8); - - if (!arrays) - return; - - optimizedloop_trans_regs = 1; - - for (array = arrays; array; array = array->next) { - for (i = 0; i < array->elementCount; i++) { - if (array->isFloat) - array->elements[i] = used_virtual_registers[RegClass_FPR]++; - else - array->elements[i] = used_virtual_registers[RegClass_GPR]++; - } - } - - for (block = pcbasicblocks; block; block = block->nextBlock) { - for (instr = block->firstPCode; instr; instr = instr->nextPCode) { - if ( - !(instr->flags & fIsBranch) && - instr->argCount && - (instr->flags & (fIsRead | fIsWrite)) && - instr->args[2].kind == PCOp_MEMORY && - (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 && - (array = lookup_array_object(arrays, instr->args[2].data.mem.obj)) && - !(array->invalid) - ) - { - reg2 = instr->args[0].data.reg.reg; - reg = array->elements[instr->args[2].data.mem.offset / array->elementSize]; - newInstr = NULL; - switch (instr->op) { - case PC_LBZ: - if (array->isSigned) - newInstr = makepcode(PC_MR, reg2, reg); - else - newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 24, 31); - break; - case PC_STB: - if (array->isSigned) - newInstr = makepcode(PC_EXTSB, reg, reg2); - else - newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 24, 31); - break; - case PC_LHZ: - newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 16, 31); - break; - case PC_LHA: - newInstr = makepcode(PC_EXTSH, reg2, reg); - break; - case PC_STH: - if (array->isSigned) - newInstr = makepcode(PC_EXTSH, reg, reg2); - else - newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 16, 31); - break; - case PC_LWZ: - newInstr = makepcode(PC_MR, reg2, reg); - break; - case PC_STW: - newInstr = makepcode(PC_MR, reg, reg2); - break; - case PC_LFD: - newInstr = makepcode(PC_FMR, reg2, reg); - break; - case PC_STFD: - newInstr = makepcode(PC_FMR, reg, reg2); - break; - default: - CError_FATAL(2494); - break; - } - - if (newInstr) { - insertpcodebefore(instr, newInstr); - deletepcode(instr); - instr = newInstr; - } - } - } - } - } - } - - freeoheap(); -} - -static void optimizenestedloops(Loop *loop) { - while (loop) { - if (loop->children) - optimizenestedloops(loop->children); - optimizeloop(loop); - loop = loop->nextSibling; - } -} - -void optimizeloops(void) { - optimizedloops = 0; - optimizedloop_full_unroll = 0; - optimizedloop_trans_regs = 0; - if (loopsinflowgraph) { - computeusedefchains(0); - last_virtual_GPR = used_virtual_registers[RegClass_GPR]; - liveonexit = oalloc(4 * ((used_virtual_registers[RegClass_GPR] + 31) >> 5)); - inductionvariables = oalloc(4 * ((last_virtual_GPR + 31) >> 5)); - optimizenestedloops(loopsinflowgraph); - freeoheap(); - } -} - |