summaryrefslogtreecommitdiff
path: root/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c
diff options
context:
space:
mode:
authorAsh Wolf <ninji@wuffs.org>2023-01-26 11:30:47 +0000
committerAsh Wolf <ninji@wuffs.org>2023-01-26 11:30:47 +0000
commit094b96ca1df4a035b5f93c351f773306c0241f3f (patch)
tree95ce05e3ebe816c7ee7996206bb37ea17d8ca33c /compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c
parentfc0c4c0df7b583b55a08317cf1ef6a71d27c0440 (diff)
downloadMWCC-main.tar.gz
MWCC-main.zip
move lots of source files around to match their actual placement in the original treemain
Diffstat (limited to 'compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c')
-rw-r--r--compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c1553
1 files changed, 1553 insertions, 0 deletions
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c
new file mode 100644
index 0000000..b2aef1e
--- /dev/null
+++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c
@@ -0,0 +1,1553 @@
+#include "compiler/LoopOptimization.h"
+#include "compiler/CFunc.h"
+#include "compiler/CParser.h"
+#include "compiler/BitVectors.h"
+#include "compiler/CompilerTools.h"
+#include "compiler/LoopDetection.h"
+#include "compiler/PCode.h"
+#include "compiler/Registers.h"
+#include "compiler/UseDefChains.h"
+#include "compiler/objects.h"
+#include "compiler/types.h"
+
+int optimizedloops;
+int optimizedloop_full_unroll;
+int optimizedloop_trans_regs;
+static UInt32 *liveonexit;
+static UInt32 *inductionvariables;
+static int last_virtual_GPR;
+
+static int ispowerof2(SInt32 value) {
+ int bit = getbit(value);
+ return (bit > 0 && bit < 31) ? bit : 0;
+}
+
+static void insertupdateinstructions(Loop *loop) {
+ // nothing
+}
+
+static void computeliveonexit(Loop *loop) {
+ UInt32 *usevec;
+ UInt32 *defvec;
+ BlockList *blocklist;
+ RegUseOrDef *list;
+ int gpr;
+
+ bitvectorinitialize(usevec = oalloc(4 * ((number_of_Uses + 31) >> 5)), number_of_Uses, 0);
+ for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
+ if (bitvectorgetbit(blocklist->block->blockIndex, loop->vec24))
+ bitvectorunion(usevec, usedefinfo[blocklist->block->blockIndex].usevec1C, number_of_Uses);
+ }
+
+ bitvectorinitialize(defvec = oalloc(4 * ((number_of_Defs + 31) >> 5)), number_of_Defs, 0);
+ if (loop->preheader)
+ bitvectorunion(defvec, usedefinfo[loop->preheader->blockIndex].defvec8, number_of_Defs);
+
+ bitvectorinitialize(liveonexit, last_virtual_GPR, 0);
+
+ for (gpr = 32; gpr < last_virtual_GPR; gpr++) {
+ for (list = reg_Defs[RegClass_GPR][gpr]; list; list = list->next) {
+ if (bitvectorgetbit(list->id, defvec)) {
+ if (!Defs[list->id].pcode->block || bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) {
+ bitvectorsetbit(gpr, liveonexit);
+ break;
+ }
+ }
+ }
+
+ for (list = reg_Uses[RegClass_GPR][gpr]; list; list = list->next) {
+ if (bitvectorgetbit(list->id, usevec)) {
+ if (!Uses[list->id].pcode->block || !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) {
+ bitvectorsetbit(gpr, liveonexit);
+ break;
+ }
+ }
+ }
+ }
+}
+
+static void eliminateinductionvariables(Loop *loop) {
+ BlockList *blocklist;
+ PCode *instr;
+ PCode *nextInstr;
+ PCodeArg *op;
+ int i;
+
+ bitvectorinitialize(inductionvariables, last_virtual_GPR, 0xFFFFFFFF);
+
+ for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
+ for (instr = blocklist->block->firstPCode; instr; instr = instr->nextPCode) {
+ if (
+ instr->op != PC_ADDI ||
+ instr->args[0].data.reg.reg < 32 ||
+ instr->args[0].data.reg.reg >= last_virtual_GPR ||
+ instr->args[1].data.reg.reg != instr->args[0].data.reg.reg
+ ) {
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ op->data.reg.reg >= n_real_registers[RegClass_GPR] &&
+ op->data.reg.reg < last_virtual_GPR
+ )
+ bitvectorclearbit(op->data.reg.reg, inductionvariables);
+ op++;
+ }
+ }
+ }
+ }
+
+ if (loop->parent) {
+ for (instr = loop->preheader->firstPCode; instr; instr = nextInstr) {
+ nextInstr = instr->nextPCode;
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ op->data.reg.reg >= n_real_registers[RegClass_GPR] &&
+ op->data.reg.reg < last_virtual_GPR
+ )
+ bitvectorsetbit(op->data.reg.reg, liveonexit);
+ op++;
+ }
+ }
+ }
+
+ for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
+ for (instr = blocklist->block->firstPCode; instr; instr = nextInstr) {
+ nextInstr = instr->nextPCode;
+ if (
+ instr->op == PC_ADDI &&
+ instr->args[0].data.reg.reg >= 32 &&
+ instr->args[0].data.reg.reg < last_virtual_GPR &&
+ instr->args[1].data.reg.reg == instr->args[0].data.reg.reg &&
+ bitvectorgetbit(instr->args[0].data.reg.reg, inductionvariables) &&
+ !bitvectorgetbit(instr->args[0].data.reg.reg, liveonexit)
+ ) {
+ deletepcode(instr);
+ optimizedloops = 1;
+ }
+ }
+ }
+}
+
+static void skiplooptest(Loop *loop) {
+ PCodeBlock *preheader;
+ PCodeLabel *label;
+ PCLink **ptr;
+ PCLink *link;
+ PCode *lastInstr;
+ PCode *instr;
+
+ preheader = loop->preheader;
+ lastInstr = loop->body->lastPCode;
+ CError_ASSERT(340, lastInstr->args[2].kind == PCOp_LABEL);
+
+ label = lastInstr->args[2].data.label.label;
+ preheader->lastPCode->args[0].data.label.label = label;
+ preheader->successors->block = label->block;
+
+ ptr = &loop->body->predecessors;
+ while ((link = *ptr)) {
+ if (link->block == preheader) {
+ *ptr = link->nextLink;
+ break;
+ }
+ ptr = &link->nextLink;
+ }
+
+ link->nextLink = label->block->predecessors;
+ label->block->predecessors = link;
+
+ while (1) {
+ instr = loop->body->firstPCode;
+ CError_ASSERT(369, instr);
+
+ if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
+ break;
+
+ deletepcode(instr);
+ insertpcodebefore(loop->preheader->lastPCode, instr);
+ loop->bodySize--;
+ }
+
+ for (instr = instr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode)
+ insertpcodebefore(loop->preheader->lastPCode, copypcode(instr));
+}
+
+static void unrollloop(Loop *loop) {
+ PCodeBlock *newBlock;
+ int i;
+ int factor;
+ PCode *instr;
+ PCodeBlock *block;
+ PCode *firstInstr;
+ PCode *nextInstr;
+
+ for (factor = copts.unroll_factor_limit; factor > 1; factor--) {
+ if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit)
+ break;
+ }
+
+ if (factor == 1)
+ return;
+ if ((loop->iterationCount / factor) != 1 && loop->bodySize < 4)
+ return;
+
+ newBlock = oalloc(sizeof(PCodeBlock));
+ newBlock->firstPCode = newBlock->lastPCode = NULL;
+ for (i = 0; i < factor - 1; i++) {
+ firstInstr = loop->body->firstPCode;
+ CError_ASSERT(448, firstInstr);
+ if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI)
+ CError_FATAL(450);
+
+ for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode)
+ appendpcode(newBlock, copypcode(instr));
+
+ for (block = loop->preheader->successors->block; block != loop->body; block = block->successors->block) {
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->op != PC_B)
+ appendpcode(newBlock, copypcode(instr));
+ }
+ }
+ }
+
+ block = loop->body->predecessors->block;
+ for (instr = newBlock->firstPCode; instr; instr = nextInstr) {
+ nextInstr = instr->nextPCode;
+ appendpcode(block, instr);
+ }
+ loop->iterationCount /= factor;
+}
+
+void pccomputepredecessors1(PCodeBlock *block) {
+ PCLink *succ;
+ PCLink *pred;
+
+ for (succ = block->successors; succ; succ = succ->nextLink) {
+ if (!succ->block) {
+ CError_FATAL(496);
+ } else {
+ for (pred = succ->block->predecessors; pred; pred = pred->nextLink) {
+ if (pred->block == block)
+ break;
+ }
+
+ if (!pred) {
+ pred = lalloc(sizeof(PCLink));
+ pred->block = block;
+ pred->nextLink = succ->block->predecessors;
+ succ->block->predecessors = pred;
+ }
+ }
+ }
+}
+
+static PCodeBlock *insertnewpcblock(PCodeBlock *block, int loopWeight) {
+ PCodeBlock *newBlock;
+ PCodeBlock *nextBlock;
+ PCodeLabel *label;
+ PCLink *prev;
+ PCLink *link;
+
+ label = makepclabel();
+ newBlock = lalloc(sizeof(PCodeBlock));
+
+ nextBlock = block->nextBlock;
+ newBlock->nextBlock = nextBlock;
+ block->nextBlock = newBlock;
+
+ newBlock->prevBlock = block;
+ nextBlock->prevBlock = newBlock;
+
+ newBlock->labels = NULL;
+ newBlock->predecessors = newBlock->successors = NULL;
+ newBlock->firstPCode = newBlock->lastPCode = NULL;
+ newBlock->pcodeCount = 0;
+ newBlock->loopWeight = loopWeight;
+ newBlock->flags = 0;
+ newBlock->blockIndex = pcblockcount++;
+ pclabel(newBlock, label);
+
+ prev = NULL;
+ for (link = block->successors; link; link = link->nextLink) {
+ if (link->block == nextBlock) {
+ if (!prev)
+ block->successors = link->nextLink;
+ else
+ prev->nextLink = link->nextLink;
+ link->nextLink = NULL;
+ newBlock->successors = link;
+ break;
+ }
+ prev = link;
+ }
+
+ prev = NULL;
+ for (link = nextBlock->predecessors; link; link = link->nextLink) {
+ if (link->block == block) {
+ if (!prev)
+ nextBlock->predecessors = link->nextLink;
+ else
+ prev->nextLink = link->nextLink;
+ link->nextLink = NULL;
+ newBlock->predecessors = link;
+ break;
+ }
+ prev = link;
+ }
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newBlock;
+ link->nextLink = block->successors;
+ block->successors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newBlock;
+ link->nextLink = nextBlock->predecessors;
+ nextBlock->predecessors = link;
+
+ return newBlock;
+}
+
+static void unrollloopconditional(Loop *loop) {
+ PCodeBlock *lastBlock;
+ PCodeLabel *label24;
+ PCode *instr;
+ PCode *instrCopy;
+ int outputBlockCount;
+ int inputBlockCount;
+ PCodeBlock **blocks;
+ PCodeBlock **blocks2;
+ PCodeBlock **blocks3;
+ PCode *firstInstr;
+ int factor;
+ PCodeBlock *block;
+ PCLink *link;
+ PCLink *prev;
+ int i;
+ int j;
+ int k;
+ int instructionCount;
+
+ label24 = NULL;
+ outputBlockCount = 0;
+ instructionCount = 0;
+
+ for (factor = copts.unroll_factor_limit; factor > 1; factor--) {
+ if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit)
+ break;
+ }
+
+ if (factor == 1)
+ return;
+
+ inputBlockCount = 0;
+ for (block = loop->preheader->successors->block; block != loop->body; block = block->nextBlock) {
+ inputBlockCount++;
+ if (!bitvectorgetbit(block->blockIndex, loop->memberblocks))
+ instructionCount += block->pcodeCount;
+ }
+
+ if ((loop->bodySize - instructionCount - 2) < instructionCount || instructionCount > 8)
+ return;
+
+ blocks = oalloc(inputBlockCount * sizeof(PCodeBlock *));
+ blocks2 = oalloc(inputBlockCount * sizeof(PCodeBlock *));
+ blocks3 = oalloc(factor * (inputBlockCount * sizeof(PCodeBlock *)));
+ memclrw(blocks, inputBlockCount * sizeof(PCodeBlock *));
+ memclrw(blocks2, inputBlockCount * sizeof(PCodeBlock *));
+ memclrw(blocks3, factor * (inputBlockCount * sizeof(PCodeBlock *)));
+
+ block = loop->preheader->nextBlock;
+ for (i = 0; i < inputBlockCount; i++) {
+ blocks[i] = block;
+ block = block->nextBlock;
+ }
+
+ lastBlock = blocks[inputBlockCount - 1];
+ for (i = 0; i < factor - 1; i++) {
+ for (j = 0; j < inputBlockCount; j++) {
+ blocks2[j] = insertnewpcblock(lastBlock, loop->loopWeight);
+ blocks3[outputBlockCount++] = blocks2[j];
+ lastBlock = blocks2[j];
+ }
+ if (label24) {
+ pclabel(blocks2[0], label24);
+ label24 = NULL;
+ }
+
+ for (j = 0; j < inputBlockCount; j++) {
+ for (instr = blocks[j]->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->flags & fIsBranch) {
+ PCodeArg *op;
+ int opID;
+ instrCopy = copypcode(instr);
+ op = NULL;
+ for (opID = 0; opID < instr->argCount; opID++) {
+ if (instr->args[opID].kind == PCOp_LABEL) {
+ op = &instr->args[opID];
+ break;
+ }
+ }
+
+ if (op) {
+ if (op->data.label.label->block == loop->body) {
+ if (!label24)
+ label24 = makepclabel();
+ instrCopy->args[opID].data.label.label = label24;
+ } else {
+ for (k = 0; k < inputBlockCount; k++) {
+ if (op->data.label.label->block == blocks[k]) {
+ instrCopy->args[opID].data.label.label = blocks2[k]->labels;
+ break;
+ }
+ }
+ }
+ }
+
+ appendpcode(blocks2[j], instrCopy);
+ if (op)
+ pcbranch(blocks2[j], instrCopy->args[opID].data.label.label);
+ } else {
+ appendpcode(blocks2[j], copypcode(instr));
+ }
+ }
+ }
+
+ firstInstr = loop->body->firstPCode;
+ CError_ASSERT(762, firstInstr != NULL);
+ if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI)
+ CError_FATAL(764);
+
+ for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode)
+ appendpcode(blocks2[inputBlockCount - 1], copypcode(instr));
+
+ for (j = 0; j < inputBlockCount; j++) {
+ for (link = blocks[j]->successors; link; link = link->nextLink) {
+ if (link->block == blocks[j]->nextBlock)
+ break;
+ }
+
+ if (!link) {
+ for (link = blocks2[j]->successors, prev = NULL; link; link = link->nextLink) {
+ if (link->block == blocks2[j]->nextBlock) {
+ if (prev)
+ prev->nextLink = link->nextLink;
+ else
+ blocks2[j]->successors = link->nextLink;
+ } else {
+ prev = link;
+ }
+ }
+
+ for (link = blocks2[j]->nextBlock->predecessors, prev = NULL; link; link = link->nextLink) {
+ if (link->block == blocks2[j]) {
+ if (prev)
+ prev->nextLink = link->nextLink;
+ else
+ blocks2[j]->nextBlock->predecessors = link->nextLink;
+ } else {
+ prev = link;
+ }
+ }
+ }
+ }
+ }
+
+ if (label24)
+ pclabel(loop->body, label24);
+
+ for (i = 0; i < inputBlockCount; i++) {
+ for (instr = blocks[i]->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->flags & fIsBranch) {
+ PCodeArg *op;
+ int opID;
+ op = NULL;
+ for (opID = 0; opID < instr->argCount; opID++) {
+ if (instr->args[opID].kind == PCOp_LABEL) {
+ op = &instr->args[opID];
+ break;
+ }
+ }
+
+ if (op && op->data.label.label->block == loop->body) {
+ instr->args[opID].data.label.label = blocks3[0]->labels;
+
+ for (link = blocks[i]->successors, prev = NULL; link; link = link->nextLink) {
+ if (link->block == loop->body) {
+ if (prev)
+ prev->nextLink = link->nextLink;
+ else
+ blocks[i]->successors = link->nextLink;
+ } else {
+ prev = link;
+ }
+ }
+
+ for (link = loop->body->predecessors, prev = NULL; link; link = link->nextLink) {
+ if (link->block == blocks[i]) {
+ if (prev)
+ prev->nextLink = link->nextLink;
+ else
+ loop->body->predecessors = link->nextLink;
+ } else {
+ prev = link;
+ }
+ }
+
+ link = blocks[i]->successors;
+ while (link && link->block != blocks3[0])
+ link = link->nextLink;
+
+ if (!link) {
+ pcbranch(blocks[i], op->data.label.label);
+ pccomputepredecessors1(blocks[i]);
+ }
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < outputBlockCount; i++)
+ pccomputepredecessors1(blocks3[i]);
+
+ loop->iterationCount /= factor;
+}
+
+static void unrollunknownBDNZ(Loop *loop) {
+ int factor; // r29
+ PCodeBlock *preheader; // r17
+ PCodeBlock *blockA; // r23
+ PCodeBlock *postheader; // r22
+ PCodeBlock *newblock1; // r26
+ PCodeBlock *newblock2; // r31
+ PCodeBlock *newblock3; // r19
+ PCodeBlock *newblock4; // r20
+ PCodeBlock *newblock5; // r24
+ PCode *mtctr; // r21
+ int mtctr_reg; // r27
+ PCode *instr28; // r28
+ PCode *instr6; // r6
+ PCodeBlock *block;
+ PCode *instr;
+ PCodeArg *op;
+ int i;
+ int val;
+ short mtctr_shifted_reg; // r16
+ short reg25; // r25
+ PCLink *link;
+
+ if (loop->bodySize < 4)
+ return;
+
+ factor = 128;
+ while (factor > copts.unroll_factor_limit)
+ factor >>= 1;
+ while (factor > 1 && (loop->bodySize - 2) * factor > copts.unroll_instr_limit)
+ factor >>= 1;
+
+ if (factor < 2)
+ return;
+
+ preheader = loop->preheader;
+ blockA = loop->body->nextBlock;
+ postheader = preheader->nextBlock;
+
+ newblock1 = insertnewpcblock(preheader, loop->loopWeight);
+ newblock2 = insertnewpcblock(newblock1, loop->loopWeight);
+ newblock3 = insertnewpcblock(newblock2, loop->loopWeight);
+ newblock4 = insertnewpcblock(newblock3, loop->loopWeight);
+ newblock5 = insertnewpcblock(newblock4, loop->loopWeight);
+ addblocktoloop(loop, newblock1);
+ addblocktoloop(loop, newblock2);
+ addblocktoloop(loop, newblock3);
+ addblocktoloop(loop, newblock4);
+ addblocktoloop(loop, newblock5);
+
+ for (instr = preheader->lastPCode; instr; instr = instr->prevPCode) {
+ if (instr->op == PC_MTCTR) {
+ mtctr = instr;
+ mtctr_reg = instr->args[0].data.reg.reg;
+ }
+ }
+
+ if (!mtctr)
+ return;
+
+ instr28 = NULL;
+ for (block = postheader; block != loop->body; block = block->successors->block) {
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->op != PC_B) {
+ appendpcode(newblock2, copypcode(instr));
+ if (instr == loop->pc18)
+ instr28 = newblock2->lastPCode;
+ }
+ }
+ }
+
+ if (!instr28) {
+ appendpcode(newblock2, copypcode(loop->pc18));
+ instr28 = newblock2->lastPCode;
+ }
+
+ instr6 = NULL;
+ for (instr = newblock2->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr != instr28) {
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ op->data.reg.reg == loop->pc18->args[0].data.reg.reg &&
+ (op->data.reg.effect & (EffectRead | EffectWrite))
+ ) {
+ instr6 = instr;
+ break;
+ }
+ op++;
+ }
+ }
+ if (instr6)
+ break;
+ }
+
+ if (!instr6) {
+ deletepcode(instr28);
+ deletepcode(loop->pc18);
+ if (loop->footer)
+ blockA = loop->footer;
+ else
+ blockA = insertnewpcblock(loop->body, loop->loopWeight);
+ } else {
+ instr28 = NULL;
+ }
+
+ for (i = 1; i < factor; i++) {
+ for (block = postheader; block != loop->body; block = block->successors->block) {
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->op != PC_B)
+ appendpcode(newblock2, copypcode(instr));
+ }
+ }
+ }
+
+ mtctr_shifted_reg = used_virtual_registers[RegClass_GPR]++;
+ appendpcode(newblock1, makepcode(
+ PC_RLWINM,
+ mtctr_shifted_reg,
+ mtctr_reg,
+ 32 - ispowerof2(factor),
+ ispowerof2(factor),
+ 31));
+
+ appendpcode(newblock1, makepcode(PC_CMPLI, 0, mtctr_shifted_reg, 0));
+
+ if (instr28) {
+ reg25 = used_virtual_registers[RegClass_GPR]++;
+ if (loop->step == 1) {
+ instr = makepcode(PC_MR, reg25, mtctr_reg);
+ } else if (loop->step == -1) {
+ instr = makepcode(PC_NEG, reg25, mtctr_reg);
+ } else {
+ val = ispowerof2(abs(loop->step));
+ if (val > 0) {
+ instr = makepcode(PC_RLWINM, reg25, mtctr_reg, val, 0, 31 - val);
+ if (loop->step < 0) {
+ appendpcode(newblock1, instr);
+ instr = makepcode(PC_NEG, reg25, reg25);
+ }
+ } else {
+ instr = makepcode(PC_MULLI, reg25, mtctr_reg, loop->step);
+ }
+ }
+ appendpcode(newblock1, instr);
+ }
+
+ appendpcode(newblock1, makepcode(PC_MTCTR, mtctr_shifted_reg));
+ appendpcode(newblock1, makepcode(PC_BT, 0, 2, newblock5->labels));
+ pcbranch(newblock1, newblock5->labels);
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newblock1;
+ link->nextLink = newblock5->predecessors;
+ newblock5->predecessors = link;
+
+ appendpcode(newblock3, makepcode(PC_BDNZ, newblock2->labels));
+ pcbranch(newblock3, newblock2->labels);
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newblock3;
+ link->nextLink = newblock2->predecessors;
+ newblock2->predecessors = link;
+
+ appendpcode(newblock4, makepcode(PC_ANDI, mtctr_reg, mtctr_reg, factor - 1));
+ appendpcode(newblock4, makepcode(PC_BT, 0, 2, blockA->labels));
+ pcbranch(newblock4, blockA->labels);
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newblock4;
+ link->nextLink = blockA->predecessors;
+ blockA->predecessors = link;
+
+ deletepcode(mtctr);
+ appendpcode(newblock5, mtctr);
+
+ if (instr28 && bitvectorgetbit(instr28->args[0].data.reg.reg, liveonexit)) {
+ instr = makepcode(PC_ADD, instr28->args[0].data.reg.reg, instr28->args[0].data.reg.reg, reg25);
+ if (blockA->firstPCode)
+ insertpcodebefore(blockA->firstPCode, instr);
+ else
+ appendpcode(blockA, instr);
+ }
+}
+
+static void deleteloop(Loop *loop) {
+ PCodeBlock *body;
+ PCodeBlock *preheader;
+ PCodeBlock *nextblock;
+ PCodeLabel *label;
+ PCode *instr;
+ PCLink **ptr;
+ PCLink *link;
+ PCodeBlock *block;
+
+ body = loop->body;
+ preheader = loop->preheader;
+ nextblock = body->nextBlock;
+ label = makepclabel();
+
+ while (1) {
+ instr = body->firstPCode;
+ CError_ASSERT(1294, instr != NULL);
+
+ if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
+ break;
+
+ deletepcode(instr);
+ insertpcodebefore(loop->preheader->lastPCode, instr);
+ loop->bodySize--;
+ }
+
+ pclabel(nextblock, label);
+ preheader->lastPCode->args[0].data.label.label = label;
+ preheader->successors->block = nextblock;
+
+ ptr = &nextblock->predecessors;
+ while ((link = *ptr)) {
+ if (link->block == body) {
+ link->block = preheader;
+ break;
+ }
+ ptr = &link->nextLink;
+ }
+
+ block = pcbasicblocks;
+ while ((nextblock = block->nextBlock)) {
+ if (bitvectorgetbit(nextblock->blockIndex, loop->memberblocks))
+ block->nextBlock = nextblock->nextBlock;
+ else
+ block = nextblock;
+ }
+}
+
+static void deleteloopheader(Loop *loop) {
+ PCLink *link;
+ PCLink **ptr;
+
+ ptr = &loop->body->successors;
+ while ((link = *ptr)) {
+ if (link->block == loop->body->lastPCode->args[2].data.label.label->block) {
+ *ptr = link->nextLink;
+ break;
+ }
+ ptr = &link->nextLink;
+ }
+
+ ptr = &loop->body->lastPCode->args[2].data.label.label->block->predecessors;
+ while ((link = *ptr)) {
+ if (link->block == loop->body) {
+ *ptr = link->nextLink;
+ break;
+ }
+ ptr = &link->nextLink;
+ }
+
+ deletepcode(loop->body->firstPCode);
+ deletepcode(loop->body->lastPCode);
+ optimizedloop_full_unroll = 1;
+}
+
+static void rewriteloopwithBDNZ(Loop *loop) {
+ PCode *instr;
+
+ if (!FITS_IN_SHORT(loop->iterationCount)) {
+ instr = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(loop->iterationCount));
+ insertpcodebefore(loop->preheader->lastPCode, instr);
+
+ if (loop->iterationCount != 0)
+ insertpcodeafter(instr, makepcode(PC_ADDI, instr->args[0].data.reg.reg, instr->args[0].data.reg.reg, 0, LOW_PART(loop->iterationCount)));
+ } else {
+ instr = makepcode(PC_LI, used_virtual_registers[RegClass_GPR]++, loop->iterationCount);
+ insertpcodebefore(loop->preheader->lastPCode, instr);
+ }
+
+ insertpcodebefore(loop->preheader->lastPCode, makepcode(PC_MTCTR, instr->args[0].data.reg.reg));
+
+ instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label);
+ deletepcode(loop->body->firstPCode);
+ deletepcode(loop->body->lastPCode);
+ appendpcode(loop->body, instr);
+
+ for (loop = loop->parent; loop; loop = loop->parent)
+ loop->x4E = 1;
+}
+
+static void rewriteunknownloopwithBDNZ(Loop *loop) {
+ SInt32 value1; // r24
+ SInt32 value2; // r30
+ Opcode branchOpcode; // r19
+ SInt32 absStep; // r28
+ int branchCondition; // r20
+ int counterReg; // r27
+ int reg1; // r26
+ int reg2; // r22
+ unsigned char mode; // r23
+ PCodeBlock *afterLoop; // r25
+ PCode *addiInstr;
+ PCode *instr;
+ PCLink *link;
+ PCLink **ptr;
+
+ absStep = abs(loop->step);
+ afterLoop = NULL;
+
+ for (addiInstr = loop->body->lastPCode->prevPCode; addiInstr->op == PC_ADDI; addiInstr = loop->body->lastPCode->prevPCode) {
+ deletepcode(addiInstr);
+ if (loop->body->lastPCode->args[2].data.label.label->block->firstPCode)
+ insertpcodebefore(loop->body->lastPCode->args[2].data.label.label->block->firstPCode, addiInstr);
+ else
+ appendpcode(loop->body->lastPCode->args[2].data.label.label->block, addiInstr);
+
+ afterLoop = insertnewpcblock(loop->body, loop->loopWeight);
+ appendpcode(afterLoop, copypcode(addiInstr));
+ loop->footer = afterLoop;
+ }
+
+ if (loop->unknownCondition == ELESS) {
+ branchOpcode = PC_BF;
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 1; // GT
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1 - loop->lower;
+ mode = 0;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 0; // LT
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep - 1 + loop->upper;
+ mode = 1;
+ } else {
+ branchCondition = 0; // LT
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1;
+ mode = 2;
+ }
+ } else if (loop->unknownCondition == ELESSEQU) {
+ branchOpcode = PC_BT;
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 0; // LT
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - loop->lower;
+ mode = 0;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 1; // GT
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep + loop->upper;
+ mode = 1;
+ } else {
+ branchCondition = 1; // GT
+ value1 = 0;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep;
+ mode = 2;
+ }
+ } else if (loop->unknownCondition == EGREATER) {
+ branchOpcode = PC_BF;
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 0; // LT
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1 + loop->lower;
+ mode = 1;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 1; // GT
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep - 1 - loop->upper;
+ mode = 0;
+ } else {
+ branchCondition = 1; // GT
+ value1 = 0;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1;
+ mode = 3;
+ }
+ } else if (loop->unknownCondition == EGREATEREQU) {
+ branchOpcode = PC_BT;
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 1; // GT
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep + loop->lower;
+ mode = 1;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 0; // LT
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep - loop->upper;
+ mode = 0;
+ } else {
+ branchCondition = 0; // LT
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep;
+ mode = 3;
+ }
+ } else if (loop->unknownCondition == ENOTEQU) {
+ branchOpcode = PC_BT;
+ branchCondition = 2; // EQ
+ if (loop->step > 0) {
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1 - loop->lower;
+ mode = 0;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep - 1 + loop->upper;
+ mode = 1;
+ } else {
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1;
+ mode = 2;
+ }
+ } else {
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1 + loop->lower;
+ mode = 1;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep - 1 - loop->upper;
+ mode = 0;
+ } else {
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1;
+ mode = 3;
+ }
+ }
+ }
+
+ while (1) {
+ instr = loop->body->firstPCode;
+ CError_ASSERT(1857, instr);
+
+ if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
+ break;
+
+ deletepcode(instr);
+ insertpcodebefore(loop->preheader->lastPCode, instr);
+ loop->bodySize--;
+ }
+
+ // build a value for mtctr
+ counterReg = used_virtual_registers[RegClass_GPR]++;
+
+ if (mode == 1) {
+ if (value2 == 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_NEG, counterReg, reg1));
+ } else if (FITS_IN_SHORT(value2)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBFIC, counterReg, reg1, value2));
+ } else {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_LIS, counterReg, 0, HIGH_PART(value2)));
+ if (value2 != 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
+ }
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg1, counterReg));
+ }
+ } else if (mode == 0) {
+ if (value2 == 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_MR, counterReg, reg1));
+ } else if (FITS_IN_SHORT(value2)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, reg1, 0, value2));
+ } else {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2)));
+ if (value2 != 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
+ }
+ }
+ } else if (mode == 2) {
+ if (value2 == 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg1, reg2));
+ } else if (FITS_IN_SHORT(value2)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, reg2, 0, value2));
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg1, counterReg));
+ } else {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDIS, counterReg, reg2, 0, HIGH_PART(value2)));
+ if (value2 != 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
+ }
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg1, counterReg));
+ }
+ } else {
+ if (value2 == 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg2, reg1));
+ } else {
+ if (FITS_IN_SHORT(value2)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, reg1, 0, value2));
+ } else {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2)));
+ if (value2 != 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
+ }
+ }
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg2, counterReg));
+ }
+ }
+
+ if (absStep > 1) {
+ unsigned char bits;
+ if ((bits = ispowerof2(absStep))) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_RLWINM, counterReg, counterReg, 32 - bits, bits, 31));
+ } else {
+ int reg = used_virtual_registers[RegClass_GPR]++;
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_LI, reg, absStep));
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_DIVWU, counterReg, counterReg, reg));
+ }
+ }
+
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_MTCTR, counterReg));
+
+ if (mode < 2) {
+ if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) {
+ if (FITS_IN_USHORT(value1)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMPLI, 0, reg1, value1));
+ } else {
+ PCode *tmp;
+ tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1));
+ insertpcodebefore(loop->preheader->lastPCode, tmp);
+
+ if (loop->iterationCount != 0)
+ insertpcodeafter(tmp,
+ makepcode(PC_ADDI,
+ tmp->args[0].data.reg.reg,
+ tmp->args[0].data.reg.reg,
+ 0, LOW_PART(value1)));
+
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMPL, 0, reg1, tmp->args[0].data.reg.reg));
+ }
+ } else {
+ if (FITS_IN_SHORT(value1)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMPI, 0, reg1, value1));
+ } else {
+ PCode *tmp;
+ tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1));
+ insertpcodebefore(loop->preheader->lastPCode, tmp);
+
+ if (loop->iterationCount != 0)
+ insertpcodeafter(tmp,
+ makepcode(PC_ADDI,
+ tmp->args[0].data.reg.reg,
+ tmp->args[0].data.reg.reg,
+ 0, LOW_PART(value1)));
+
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMP, 0, reg1, tmp->args[0].data.reg.reg));
+ }
+ }
+ } else {
+ if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMPL, 0, reg1, reg2));
+ } else {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMP, 0, reg1, reg2));
+ }
+ }
+
+ if (!afterLoop)
+ afterLoop = loop->body->nextBlock;
+
+ instr = makepcode(branchOpcode, 0, branchCondition, afterLoop->labels);
+ deletepcode(loop->preheader->lastPCode);
+ appendpcode(loop->preheader, instr);
+
+ instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label);
+ deletepcode(loop->body->firstPCode);
+ deletepcode(loop->body->lastPCode);
+ appendpcode(loop->body, instr);
+
+ loop->preheader->successors = NULL;
+ ptr = &loop->body->predecessors;
+ while ((link = *ptr)) {
+ if (link->block == loop->preheader) {
+ *ptr = link->nextLink;
+ break;
+ }
+ ptr = &link->nextLink;
+ }
+
+ link = lalloc(sizeof(PCLink));
+ link->block = loop->preheader->nextBlock;
+ link->nextLink = loop->preheader->successors;
+ loop->preheader->successors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = loop->preheader;
+ link->nextLink = loop->preheader->nextBlock->predecessors;
+ loop->preheader->nextBlock->predecessors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = afterLoop;
+ link->nextLink = loop->preheader->successors;
+ loop->preheader->successors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = loop->preheader;
+ link->nextLink = afterLoop->predecessors;
+ afterLoop->predecessors = link;
+
+ for (loop = loop->parent; loop; loop = loop->parent)
+ loop->x4E = 1;
+}
+
+static void optimizeloop(Loop *loop) {
+ computeliveonexit(loop);
+
+ if (loop->x53 || loop->x54)
+ insertupdateinstructions(loop);
+
+ if (loop->isKnownCountingLoop) {
+ if (loop->iterationCount > 0) {
+ skiplooptest(loop);
+ if (!copts.optimizesize && !loop->x4D && !loop->x57) {
+ if (loop->x4F)
+ unrollloop(loop);
+ else if (!loop->x4E)
+ unrollloopconditional(loop);
+ }
+ }
+
+ if (loop->iterationCount != 0) {
+ if (loop->iterationCount == 1)
+ deleteloopheader(loop);
+ else if (!loop->x4E && !loop->x4D)
+ rewriteloopwithBDNZ(loop);
+ }
+
+ optimizedloops = 1;
+ } else if (loop->isUnknownCountingLoop && !loop->x4E && !loop->x4D) {
+ rewriteunknownloopwithBDNZ(loop);
+ if (copts.unroll_speculative && !copts.optimizesize) {
+ if (loop->x4F && !loop->x57 && !loop->x52)
+ unrollunknownBDNZ(loop);
+ }
+ optimizedloops = 1;
+ }
+
+ eliminateinductionvariables(loop);
+}
+
+typedef struct LocalArray {
+ struct LocalArray *next;
+ Object *object;
+ unsigned int invalid:1;
+ unsigned int isFloat:1;
+ unsigned int isSigned:1;
+ SInt32 elementSize;
+ SInt32 arraySize;
+ SInt32 elementCount;
+ int totalUses;
+ int elements[1];
+} LocalArray;
+
+static LocalArray *scanforlocalarrays(void) {
+ SInt32 elementCount;
+ LocalArray *head;
+ LocalArray *array;
+ ObjectList *list;
+ int i;
+ SInt32 arraySize;
+ SInt32 elementSize;
+
+ head = NULL;
+
+ for (list = locals; list; list = list->next) {
+ if (
+ list->object &&
+ !(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) &&
+ list->object->type &&
+ IS_TYPE_ARRAY(list->object->type) &&
+ TPTR_TARGET(list->object->type) &&
+ TPTR_TARGET(list->object->type)->type < TYPESTRUCT
+ ) {
+ arraySize = list->object->type->size;
+ elementSize = TPTR_TARGET(list->object->type)->size;
+ elementCount = arraySize / elementSize;
+ if (elementCount > 0 && elementCount <= 8) {
+ array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalArray));
+ array->next = head;
+ head = array;
+
+ array->object = list->object;
+ array->elementSize = elementSize;
+ array->arraySize = arraySize;
+ array->elementCount = elementCount;
+ array->totalUses = 0;
+ array->isSigned = 1;
+ array->isFloat = IS_TYPE_FLOAT(TPTR_TARGET(list->object->type));
+ array->invalid = 0;
+ array->isSigned = 1;
+ if (!array->isFloat && is_unsigned(TPTR_TARGET(list->object->type)))
+ array->isSigned = 0;
+
+ for (i = 0; i < elementCount; i++) {
+ array->elements[i] = 0;
+ }
+ }
+ }
+ }
+
+ return head;
+}
+
+static LocalArray *lookup_array_object(LocalArray *arrays, Object *object) {
+ while (arrays) {
+ if (arrays->object == object)
+ return arrays;
+ arrays = arrays->next;
+ }
+ return NULL;
+}
+
+void changearraytoregisters(void) {
+ LocalArray *arrays;
+ PCodeBlock *block;
+ PCode *instr;
+ int i;
+ PCodeArg *op;
+ LocalArray **ptr;
+ LocalArray *array;
+ int intCounter;
+ int floatCounter;
+ int reg;
+ int reg2;
+ PCode *newInstr;
+
+ arrays = NULL;
+ if ((arrays = scanforlocalarrays())) {
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (!(instr->flags & fIsBranch) && instr->argCount) {
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_MEMORY &&
+ (PCOpMemoryArg) op->arg == PCOpMemory1 &&
+ (array = lookup_array_object(arrays, op->data.mem.obj)) &&
+ !array->invalid
+ ) {
+ if (
+ (instr->flags & (fIsRead | fIsWrite)) &&
+ (op->data.mem.offset % array->elementSize) == 0 &&
+ op->data.mem.offset < array->arraySize
+ ) {
+ switch (instr->op) {
+ case PC_LBZ:
+ case PC_STB:
+ if (array->elementSize != 1)
+ array->invalid = 1;
+ break;
+ case PC_LHZ:
+ case PC_LHA:
+ case PC_STH:
+ if (array->elementSize != 2)
+ array->invalid = 1;
+ break;
+ case PC_LWZ:
+ case PC_STW:
+ if (array->elementSize != 4)
+ array->invalid = 1;
+ break;
+ case PC_LFD:
+ case PC_STFD:
+ if (array->elementSize != 8)
+ array->invalid = 1;
+ break;
+ default:
+ array->invalid = 1;
+ break;
+ }
+
+ if (!array->invalid)
+ array->elements[op->data.mem.offset / array->elementSize]++;
+ } else {
+ array->invalid = 1;
+ }
+ }
+ op++;
+ }
+ }
+ }
+ }
+
+ intCounter = 0;
+ floatCounter = 0;
+ ptr = &arrays;
+ array = *ptr;
+ while (array) {
+ if (array->invalid) {
+ *ptr = array = array->next;
+ continue;
+ }
+
+ if (array->isFloat)
+ floatCounter += array->elementCount;
+ if (!array->isFloat)
+ intCounter += array->elementCount;
+
+ for (i = 0; i < array->elementCount; i++)
+ array->totalUses += array->elements[i];
+
+ array = array->next;
+ }
+
+ if (arrays) {
+ while (intCounter > 8) {
+ LocalArray *best;
+ int score;
+ score = 0;
+ best = NULL;
+ for (array = arrays; array; array = array->next) {
+ if (!array->isFloat) {
+ if (best) {
+ if (array->totalUses < score) {
+ score = array->totalUses;
+ best = array;
+ }
+ } else {
+ best = array;
+ score = array->totalUses;
+ }
+ }
+ }
+
+ if (!best)
+ break;
+
+ if (best == arrays) {
+ arrays = best->next;
+ } else {
+ for (array = arrays; array; array = array->next) {
+ if (array->next == best) {
+ array->next = best->next;
+ break;
+ }
+ }
+ }
+
+ intCounter -= best->elementCount;
+ }
+
+ while (floatCounter > 8) {
+ LocalArray *best;
+ int score;
+ score = 0;
+ best = NULL;
+ for (array = arrays; array; array = array->next) {
+ if (array->isFloat) {
+ if (best) {
+ if (array->totalUses < score) {
+ score = array->totalUses;
+ best = array;
+ }
+ } else {
+ best = array;
+ score = array->totalUses;
+ }
+ }
+ }
+
+ if (!best)
+ break;
+
+ if (best == arrays) {
+ arrays = best->next;
+ } else {
+ for (array = arrays; array; array = array->next) {
+ if (array->next == best) {
+ array->next = best->next;
+ break;
+ }
+ }
+ }
+
+ floatCounter -= best->elementCount;
+ }
+
+ CError_ASSERT(2394, intCounter <= 8 && floatCounter <= 8);
+
+ if (!arrays)
+ return;
+
+ optimizedloop_trans_regs = 1;
+
+ for (array = arrays; array; array = array->next) {
+ for (i = 0; i < array->elementCount; i++) {
+ if (array->isFloat)
+ array->elements[i] = used_virtual_registers[RegClass_FPR]++;
+ else
+ array->elements[i] = used_virtual_registers[RegClass_GPR]++;
+ }
+ }
+
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (
+ !(instr->flags & fIsBranch) &&
+ instr->argCount &&
+ (instr->flags & (fIsRead | fIsWrite)) &&
+ instr->args[2].kind == PCOp_MEMORY &&
+ (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
+ (array = lookup_array_object(arrays, instr->args[2].data.mem.obj)) &&
+ !(array->invalid)
+ )
+ {
+ reg2 = instr->args[0].data.reg.reg;
+ reg = array->elements[instr->args[2].data.mem.offset / array->elementSize];
+ newInstr = NULL;
+ switch (instr->op) {
+ case PC_LBZ:
+ if (array->isSigned)
+ newInstr = makepcode(PC_MR, reg2, reg);
+ else
+ newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 24, 31);
+ break;
+ case PC_STB:
+ if (array->isSigned)
+ newInstr = makepcode(PC_EXTSB, reg, reg2);
+ else
+ newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 24, 31);
+ break;
+ case PC_LHZ:
+ newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 16, 31);
+ break;
+ case PC_LHA:
+ newInstr = makepcode(PC_EXTSH, reg2, reg);
+ break;
+ case PC_STH:
+ if (array->isSigned)
+ newInstr = makepcode(PC_EXTSH, reg, reg2);
+ else
+ newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 16, 31);
+ break;
+ case PC_LWZ:
+ newInstr = makepcode(PC_MR, reg2, reg);
+ break;
+ case PC_STW:
+ newInstr = makepcode(PC_MR, reg, reg2);
+ break;
+ case PC_LFD:
+ newInstr = makepcode(PC_FMR, reg2, reg);
+ break;
+ case PC_STFD:
+ newInstr = makepcode(PC_FMR, reg, reg2);
+ break;
+ default:
+ CError_FATAL(2494);
+ break;
+ }
+
+ if (newInstr) {
+ insertpcodebefore(instr, newInstr);
+ deletepcode(instr);
+ instr = newInstr;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ freeoheap();
+}
+
+static void optimizenestedloops(Loop *loop) {
+ while (loop) {
+ if (loop->children)
+ optimizenestedloops(loop->children);
+ optimizeloop(loop);
+ loop = loop->nextSibling;
+ }
+}
+
+void optimizeloops(void) {
+ optimizedloops = 0;
+ optimizedloop_full_unroll = 0;
+ optimizedloop_trans_regs = 0;
+ if (loopsinflowgraph) {
+ computeusedefchains(0);
+ last_virtual_GPR = used_virtual_registers[RegClass_GPR];
+ liveonexit = oalloc(4 * ((used_virtual_registers[RegClass_GPR] + 31) >> 5));
+ inductionvariables = oalloc(4 * ((last_virtual_GPR + 31) >> 5));
+ optimizenestedloops(loopsinflowgraph);
+ freeoheap();
+ }
+}
+