summaryrefslogtreecommitdiff
path: root/compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c
diff options
context:
space:
mode:
authorAsh Wolf <ninji@wuffs.org>2023-01-26 11:30:47 +0000
committerAsh Wolf <ninji@wuffs.org>2023-01-26 11:30:47 +0000
commit094b96ca1df4a035b5f93c351f773306c0241f3f (patch)
tree95ce05e3ebe816c7ee7996206bb37ea17d8ca33c /compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c
parentfc0c4c0df7b583b55a08317cf1ef6a71d27c0440 (diff)
downloadMWCC-main.tar.gz
MWCC-main.zip
move lots of source files around to match their actual placement in the original treemain
Diffstat (limited to 'compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c')
-rw-r--r--compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c2305
1 files changed, 2305 insertions, 0 deletions
diff --git a/compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c b/compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c
new file mode 100644
index 0000000..cf3f1bf
--- /dev/null
+++ b/compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c
@@ -0,0 +1,2305 @@
+#include "IroUnrollLoop.h"
+#include "compiler/CError.h"
+#include "IroFlowgraph.h"
+#include "IroLinearForm.h"
+#include "IroUtil.h"
+#include "compiler/LoopDetection.h"
+#include "IroLoop.h"
+#include "IroDump.h"
+#include "IroVars.h"
+#include "compiler/CFunc.h"
+#include "compiler/CMachine.h"
+
+#ifdef __MWERKS__
+#pragma options align=mac68k
+#endif
+typedef struct LoopList {
+ UInt8 flags;
+ BitVector *bv;
+ struct LoopList *next;
+ IRONode *fnode;
+ int xE;
+} LoopList;
+#ifdef __MWERKS__
+#pragma options align=reset
+#endif
+
+// forward decls
+static void IRO_FindLoops_Unroll(void);
+static void LoopUnroll(int count, IRONode *fnode);
+static int IsLoopUnrollable(IROLoop *loop);
+static int IsDifferenceOfTermsConstant(IROAddrRecord *lowerRec, IROAddrRecord *upperRec, int isUnsigned, CInt64 *pval);
+static IROLinear *BuildOrigIterationCount_DoWhile(IROList *list, IROLoop *loop);
+static IROLinear *BuildPreAlignTemp(IROLoopInd *ind, UInt32 unrollFactor, IROList *list);
+static IROLinear *BuildNewFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop);
+static IROLinear *BuildUnrolledFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop);
+
+void IRO_LoopUnroller(void) {
+ VectorPhaseCalledFromUnroll = 1;
+ IRO_FindLoops_Unroll();
+ IRO_CheckForUserBreak();
+}
+
+static void IRO_FindLoops_Unroll(void) {
+ IRONode *fnode;
+ IRONode *pred;
+ UInt16 i;
+ UInt16 flag;
+ LoopList *list;
+ LoopList *list2;
+
+ fnode = IRO_FirstNode;
+ LoopList_First = NULL;
+
+ while (fnode) {
+ flag = 0;
+ for (i = 0; i < fnode->numpred; i++) {
+ pred = IRO_NodeTable[fnode->pred[i]];
+ if (Bv_IsBitSet(fnode->index, pred->dom)) {
+ if (!flag) {
+ Bv_AllocVector(&InLoop, IRO_NumNodes + 1);
+ Bv_Clear(InLoop);
+ Bv_SetBit(fnode->index, InLoop);
+ }
+ flag = 1;
+ Bv_SetBit(pred->index, InLoop);
+ if (pred != fnode)
+ AddPreds(pred);
+ }
+ }
+
+ if (flag) {
+ if (!LoopList_First) {
+ list = oalloc(sizeof(LoopList));
+ list->next = NULL;
+ } else {
+ list = oalloc(sizeof(LoopList));
+ list->next = LoopList_First;
+ }
+ LoopList_First = list;
+
+ Bv_AllocVector(&list->bv, IRO_NumNodes + 1);
+ list->flags |= 1;
+ Bv_Copy(InLoop, list->bv);
+ list->fnode = fnode;
+ list->xE = 0;
+ }
+
+ fnode = fnode->nextnode;
+ }
+
+ list = LoopList_First;
+ Bv_AllocVector(&LoopTemp, IRO_NumNodes + 1);
+ while (list) {
+ for (list2 = LoopList_First; list2; list2 = list2->next) {
+ if (list2 != list) {
+ IRO_Dump(" header = %d \n", list2->fnode->index);
+ IRO_Dump(" l1 bit vector=\n");
+ IRO_DumpBits("", list2->bv);
+ IRO_Dump(" l bit vector=\n");
+ IRO_DumpBits("", list->bv);
+ if (Bv_IsSubset(list->bv, list2->bv))
+ list2->flags &= ~1;
+ }
+ }
+ list = list->next;
+ }
+
+ for (list = LoopList_First; list; list = list->next) {
+ if (list->flags & 1) {
+ IRONode *listfnode;
+ Bv_Copy(list->bv, InLoop);
+ listfnode = list->fnode;
+ IRO_Dump("IRO_FindLoops_Unroll:Found loop with header %d\n", listfnode->index);
+ IRO_DumpBits("Loop includes: ", InLoop);
+ LoopUnroll(copts.unrollfactor, listfnode);
+ IRO_UpdateFlagsOnInts();
+ }
+ }
+}
+
+static int CheckConstant(CInt64 a, CInt64 b, CInt64 *result) {
+ CInt64 shl = cint64_zero;
+ CInt64 work = cint64_zero;
+ CInt64 and = cint64_zero;
+ CInt64 i;
+
+ for (i = cint64_zero; CInt64_Less(i, a); i = CInt64_Add(i, cint64_one)) {
+ shl = CInt64_Shl(b, i);
+ and = CInt64_And(shl, work);
+ if (CInt64_NotEqual(and, cint64_zero))
+ return 0;
+ work = CInt64_Or(shl, work);
+ }
+
+ *result = work;
+ return 1;
+}
+
+typedef struct LoopPattern {
+ IROLinear *nd0;
+ IROLinear *nd4;
+ Type *type;
+ IROLinear *ndC;
+ IROLinear *nd10;
+ CInt64 val14;
+ CInt64 val1C;
+} LoopPattern;
+
+static void UnrollWhileLoopBody(IRONode *header, IRONode *fnode2, IRONode *fnode3, IROLoop *loop, LoopPattern *pattern, UInt32 unrollFactor) {
+ IRONode *scan;
+ int pass;
+ IROLinear *firstnode;
+ IROLinear *lastnd;
+ IROLinear *nd;
+ IROLinear *nd1;
+ IROLinear *nd2;
+ IROLinear *nd3;
+ IROLinear *nd4;
+ IROLinear *nd5;
+ IROLinear *nd6;
+ IROLinear *nd8;
+ IROLinear *nd7;
+ ENode *expr;
+ IROList list;
+ CInt64 zero;
+ CInt64 shiftval;
+
+ CInt64_SetLong(&zero, 0);
+
+ pass = 0;
+
+ do {
+ firstnode = NULL;
+ for (scan = fnode3; scan && scan != header; scan = scan->nextnode) {
+ IRO_InitList(&list);
+ lastnd = scan->last;
+ nd = scan->first;
+ while (1) {
+ if (nd->stmt)
+ nd->stmt->flags |= StmtFlag_10;
+
+ if (
+ (nd->index < loop->index20 || nd->index > loop->index24) &&
+ nd->type != IROLinearLabel &&
+ nd->type != IROLinearNop &&
+ !(nd->flags & IROLF_Reffed)
+ )
+ {
+ CError_ASSERT(345, nd->nodetype == EORASS || nd->nodetype == EANDASS || nd->nodetype == EXORASS);
+
+ IRO_DuplicateExpr(pattern->nd0, &list);
+ nd1 = list.tail;
+
+ shiftval = cint64_one;
+ shiftval = CInt64_Shl(shiftval, pattern->val1C);
+
+ nd2 = IRO_NewLinear(IROLinearOperand);
+ nd2->index = ++IRO_NumLinear;
+ nd2->rtype = pattern->nd0->rtype;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = pattern->nd0->rtype;
+ CInt64_SetLong(&expr->data.intval, pass * CInt64_GetULong(&shiftval));
+ nd2->u.node = expr;
+ IRO_AddToList(nd2, &list);
+
+ IRO_DuplicateExpr(pattern->nd4, &list);
+
+ nd3 = IRO_NewLinear(IROLinearOp2Arg);
+ nd3->index = ++IRO_NumLinear;
+ nd3->nodetype = EADD;
+ nd3->rtype = pattern->type;
+ nd3->u.diadic.left = list.tail;
+ nd3->u.diadic.right = nd2;
+ IRO_AddToList(nd3, &list);
+
+ nd4 = IRO_NewLinear(IROLinearOp2Arg);
+ nd4->index = ++IRO_NumLinear;
+ nd4->nodetype = EADD;
+ nd4->rtype = pattern->type;
+ nd4->u.diadic.left = nd3;
+ nd4->u.diadic.right = nd1;
+ IRO_AddToList(nd4, &list);
+
+ nd5 = IRO_NewLinear(IROLinearOp1Arg);
+ nd5->index = ++IRO_NumLinear;
+ nd5->nodetype = EINDIRECT;
+ nd5->rtype = nd->rtype;
+ nd5->u.monadic = nd4;
+ IRO_AddToList(nd5, &list);
+
+ nd6 = IRO_NewLinear(IROLinearOp2Arg);
+ *nd6 = *nd;
+ nd6->index = ++IRO_NumLinear;
+ nd6->u.diadic.left = list.tail;
+ nd6->next = NULL;
+
+ nd7 = IRO_NewLinear(IROLinearOperand);
+ nd7->index = ++IRO_NumLinear;
+ nd7->rtype = pattern->ndC->rtype;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = pattern->ndC->rtype;
+ nd7->u.node = expr;
+ nd7->next = NULL;
+ expr->data.intval = pattern->val14;
+
+ if (
+ IS_LINEAR_DIADIC(nd, EANDASS) &&
+ CInt64_Equal(pattern->val14, cint64_zero)
+ )
+ {
+ nd6->nodetype = EASS;
+ } else if (
+ IS_LINEAR_DIADIC(nd, EORASS) &&
+ !CTool_EndianReadWord32(&pattern->val14.hi)
+ )
+ {
+ UInt32 tmp = CInt64_GetULong(&pattern->val14);
+ if (
+ (nd->rtype->size == 1 && tmp == 0xFF) ||
+ (nd->rtype->size == 2 && tmp == 0xFFFF) ||
+ (nd->rtype->size == 4 && tmp == 0xFFFFFFFF)
+ )
+ {
+ nd6->nodetype = EASS;
+ }
+ }
+
+ IRO_AddToList(nd7, &list);
+
+ if (IS_LINEAR_MONADIC(pattern->nd10, ETYPCON)) {
+ nd8 = IRO_NewLinear(IROLinearOp1Arg);
+ *nd8 = *pattern->nd10;
+ nd8->index = ++IRO_NumLinear;
+ nd8->u.monadic = nd7;
+ nd8->next = NULL;
+ IRO_AddToList(nd8, &list);
+ } else {
+ nd8 = nd7;
+ }
+ nd6->u.diadic.right = nd8;
+ IRO_AddToList(nd6, &list);
+
+ if (!firstnode)
+ firstnode = list.head;
+ }
+
+ if (nd == lastnd)
+ break;
+ nd = nd->next;
+ }
+
+ if (list.head && list.tail)
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ }
+ } while (++pass < 8);
+}
+
+static int PatternMatchLoop(IRONode *fnode, IROLoop *loop, IROLoopInd *ind, UInt32 *unrollFactor, SInt32 *result1, SInt32 *result2, LoopPattern *pattern) {
+ IROLinear *scan;
+ IROLinear *varnode;
+ IROLinear *nd1;
+ IROLinear *nd2;
+ IROLinear *left1;
+ IROLinear *left2;
+ IROLinear *right1;
+ IROLinear *right2;
+ Object *obj1;
+ Object *obj2;
+ CInt64 shl;
+ CInt64 val;
+
+ *result1 = 0;
+ *result2 = 0;
+
+ if ((scan = fnode->first)) {
+ while (1) {
+ if (
+ (scan->index < loop->index20 || scan->index > loop->index24) &&
+ !(scan->flags & IROLF_Reffed) &&
+ scan->type != IROLinearNop &&
+ scan->type != IROLinearLabel
+ )
+ {
+ if (IS_LINEAR_DIADIC_3(scan, EORASS, EXORASS, EANDASS)) {
+ (*result2)++;
+ if (IS_LINEAR_MONADIC(scan->u.diadic.left, EINDIRECT)) {
+ varnode = scan->u.diadic.left->u.monadic;
+ if (IS_LINEAR_DIADIC(varnode, EADD)) {
+ pattern->nd4 = varnode->u.diadic.left;
+ pattern->type = varnode->rtype;
+ if (IRO_IsVariable(varnode->u.diadic.left)) {
+ pattern->nd0 = varnode->u.diadic.right;
+ if (
+ IS_LINEAR_DIADIC(pattern->nd0, ESHL) &&
+ IRO_IsConstant(pattern->nd0->u.diadic.right)
+ )
+ {
+ pattern->val1C = pattern->nd0->u.diadic.right->u.node->data.intval;
+ nd1 = pattern->nd0->u.diadic.left;
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+
+ pattern->nd10 = scan->u.diadic.right;
+
+ if (IS_LINEAR_MONADIC(pattern->nd10, ETYPCON)) {
+ if (IS_LINEAR_DIADIC(scan, EANDASS)) {
+ if (IS_LINEAR_MONADIC(pattern->nd10->u.monadic, EBINNOT)) {
+ pattern->ndC = pattern->nd10->u.monadic->u.monadic;
+ } else {
+ return 0;
+ }
+ } else {
+ pattern->ndC = pattern->nd10->u.monadic;
+ }
+
+ if (IS_LINEAR_DIADIC(pattern->ndC, ESHL) && IRO_IsConstant(pattern->ndC->u.diadic.left)) {
+ val = pattern->ndC->u.diadic.left->u.node->data.intval;
+ nd2 = pattern->ndC->u.diadic.right;
+ } else {
+ return 0;
+ }
+ } else if (IS_LINEAR_DIADIC(pattern->nd10, ESHL) && IS_LINEAR_DIADIC_2(scan, EORASS, EXORASS)) {
+ pattern->ndC = pattern->nd10;
+ if (IRO_IsConstant(pattern->ndC->u.diadic.left)) {
+ val = pattern->ndC->u.diadic.left->u.node->data.intval;
+ nd2 = pattern->ndC->u.diadic.right;
+ } else {
+ return 0;
+ }
+ } else if (IS_LINEAR_MONADIC(pattern->nd10, EBINNOT) && IS_LINEAR_DIADIC(scan, EANDASS)) {
+ pattern->ndC = pattern->nd10->u.monadic;
+ if (IS_LINEAR_DIADIC(pattern->ndC, ESHL) && IRO_IsConstant(pattern->ndC->u.diadic.left)) {
+ val = pattern->ndC->u.diadic.left->u.node->data.intval;
+ nd2 = pattern->ndC->u.diadic.right;
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+
+ if (IS_LINEAR_DIADIC(nd2, EAND) && IS_LINEAR_DIADIC(nd1, ESHR)) {
+ left1 = nd1->u.diadic.left;
+ left2 = nd2->u.diadic.left;
+ obj1 = IRO_IsVariable(left1);
+ obj2 = IRO_IsVariable(left2);
+ if (obj1 == obj2 && obj1 == ind->var->object) {
+ right1 = nd1->u.diadic.right;
+ right2 = nd2->u.diadic.right;
+ if (IRO_IsConstant(right1) && IRO_IsConstant(right2)) {
+ shl = cint64_one;
+ shl = CInt64_Shl(shl, right1->u.node->data.intval);
+ shl = CInt64_Sub(shl, cint64_one);
+ if (CInt64_Equal(shl, right2->u.node->data.intval)) {
+ if (CTool_EndianReadWord32(&shl.hi) == 0) {
+ *unrollFactor = CInt64_GetULong(&shl) + 1;
+ if (CheckConstant(CInt64_Add(shl, cint64_one), val, &pattern->val14)) {
+ (*result1)++;
+ if (IS_LINEAR_DIADIC(scan, EANDASS))
+ pattern->val14 = CInt64_Not(pattern->val14);
+ }
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ }
+
+ if (scan == fnode->last)
+ break;
+ scan = scan->next;
+ }
+ }
+
+ return 1;
+}
+
+static UInt32 UnrollWhileLoop(IRONode *header, IRONode *fnode2, IRONode *fnode3, IROLoop *loop, UInt32 unrollFactor) {
+ IROLoopInd *ind;
+ IRONode *scan;
+ CLabel *lastlabel;
+ IROLinear *lastlabelnode;
+ IROLinear *earlyLoopExitTest;
+ CLabel *earlyLoopExitTestLabel;
+ IROLinear *origIterationCount;
+ IROLinear *unrolledFinalValue;
+ IROLinear *preAlignTemp;
+ IROLinear *newFinalValue;
+ IROLinear *savedHead60;
+ IROLinear *unrolledBodyEntryTest;
+ CLabel *label;
+ IROLinear *savedHead2;
+ IROLinear *loophead25;
+ IROLinear *loopend;
+ IROLinear *loopscan;
+ IROLinear *indvar;
+ IROLinear *less;
+ IROLinear *loopExitTest;
+ IROLinear *saveTail;
+ CLabel *label2;
+ IROLinear *gotond;
+ CLabel *label3;
+ IROLinear *savedHead3;
+ IROLinear *updIndInc;
+ IROLinear *label2nd;
+ IROLinear *less2;
+ IROLinear *saveTail2;
+ IROLinear *less3;
+ IROLinear *wtf;
+ IROLinear *constnd;
+ IROLinear *ass;
+ IROLinear *nd18;
+ IRONode *fn19;
+ IRONode *newfnode1;
+ IRONode *newfnode2;
+ IRONode *newfnode3;
+ IRONode *newfnode4;
+ IRONode *newfnode5;
+ IRONode *newfnode6;
+ IRONode *newfnode7;
+ IRONode *newfnode8;
+ IROLinear *lastnd;
+ ENode *expr;
+ SInt32 result1;
+ SInt32 result2;
+ LoopPattern pattern;
+ IROList list;
+
+ IRO_Dump("while(n--) loop \n");
+
+ if (loop->flags & LoopFlags_800) {
+ IRO_Dump("loop not unrolled because induction used in loop \n");
+ return 0;
+ }
+ if (loop->flags & LoopFlags_1000) {
+ IRO_Dump("loop not unrolled because loop has multiple exits \n");
+ return 0;
+ }
+
+ if (!(loop->flags & LP_HAS_MULTIPLE_INDUCTIONS))
+ return 0;
+
+ for (ind = FirstInd; ind; ind = ind->next) {
+ if ((ind->flags & LoopInd_HasMod) && (ind->flags & LoopInd_HasDiv))
+ break;
+ }
+
+ if (!ind) {
+ IRO_Dump("Could not find loop with and induction with MOD and DIV operation\n");
+ return 0;
+ }
+
+ if (!IRO_IsUnsignedType(ind->nd->rtype))
+ return 0;
+
+ if (ind->nd->type == IROLinearOp2Arg) {
+ if (ind->nd->nodetype == EADDASS && IRO_IsConstant(ind->nd->u.diadic.right)) {
+ if (ind->addConst != 1)
+ return 0;
+ } else if (ind->nd->nodetype == EASS) {
+ if (
+ ind->nd->u.diadic.right->type != IROLinearOp2Arg ||
+ ind->nd->u.diadic.right->nodetype != EADD ||
+ !IRO_IsConstant(ind->nd->u.diadic.right->u.diadic.right)
+ )
+ return 0;
+
+ if (ind->addConst != 1)
+ return 0;
+ } else {
+ return 0;
+ }
+ } else if (ind->nd->type == IROLinearOp1Arg && ind->nd->nodetype != EPREINC && ind->nd->nodetype != EPOSTINC) {
+ return 0;
+ }
+
+ loop->induction = ind;
+ loop->index24 = ind->nd->index;
+ loop->index20 = IRO_FindStart(ind->nd)->index;
+
+ scan = IRO_FirstNode;
+ memset(&pattern, 0, sizeof(pattern));
+ while (scan) {
+ if (Bv_IsBitSet(scan->index, InLoop) && scan != header) {
+ if (!PatternMatchLoop(scan, loop, ind, &unrollFactor, &result1, &result2, &pattern))
+ return 0;
+ }
+ scan = scan->nextnode;
+ }
+
+ if (result1 > 1 || result2 > 1)
+ return 0;
+
+ lastlabel = fnode2->last->u.label.label;
+ lastlabelnode = IRO_FindLabelNode(fnode2->last->u.label.label, fnode2->last);
+
+ IRO_InitList(&list);
+ IRO_DuplicateExprRange(lastlabelnode->next, LoopNode->last->u.label.x4->u.diadic.left, &list);
+ IRO_DuplicateExpr(LoopNode->last->u.label.x4, &list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ lastlabelnode = list.tail;
+
+ IRO_InitList(&list);
+ earlyLoopExitTest = BuildEarlyLoopExitTest(LoopNode->last->type, &list);
+ earlyLoopExitTestLabel = IRO_NewLabel();
+ earlyLoopExitTest->u.label.label = earlyLoopExitTestLabel;
+ earlyLoopExitTest->u.label.x4 = lastlabelnode;
+ earlyLoopExitTest->u.label.x4->flags |= IROLF_Reffed;
+ earlyLoopExitTest->rtype = LoopNode->last->rtype;
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ IRO_InitList(&list);
+ origIterationCount = BuildOrigIterationCount_DoWhile(&list, loop);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ savedHead60 = list.head;
+
+ IRO_InitList(&list);
+ preAlignTemp = BuildPreAlignTemp(ind, unrollFactor, &list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ IRO_InitList(&list);
+ unrolledFinalValue = BuildUnrolledFinalvalue_DoWhile(origIterationCount, unrollFactor, &list, loop);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ IRO_InitList(&list);
+ newFinalValue = BuildNewFinalvalue_DoWhile(origIterationCount, unrollFactor, &list, loop);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ IRO_InitList(&list);
+ BuildUnrolledBodyEntryTest(&list, origIterationCount, unrollFactor, lastlabel);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ unrolledBodyEntryTest = list.tail;
+
+ IRO_InitList(&list);
+ label = BuildLabel(&list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ savedHead2 = list.head;
+ loophead25 = NULL;
+ for (scan = fnode3; scan && scan != header; scan = scan->nextnode) {
+ IRO_InitList(&list);
+ loopend = scan->last;
+ loopscan = scan->first;
+ while (1) {
+ if (loopscan->stmt)
+ loopscan->stmt->flags |= StmtFlag_10;
+ if (loopscan->type != IROLinearLabel && !(loopscan->flags & IROLF_Reffed)) {
+ IRO_DuplicateExpr(loopscan, &list);
+ if (!loophead25)
+ loophead25 = list.head;
+ }
+ if (loopscan == loopend)
+ break;
+ loopscan = loopscan->next;
+ }
+
+ if (list.head && list.tail)
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ }
+
+ IRO_InitList(&list);
+
+ if (ind->nd->type == IROLinearOp1Arg)
+ IRO_DuplicateExpr(ind->nd->u.monadic, &list);
+ else
+ IRO_DuplicateExpr(ind->nd->u.diadic.left, &list);
+ list.tail->flags &= ~IROLF_Assigned;
+ indvar = list.tail;
+
+ IRO_DuplicateExpr(preAlignTemp, &list);
+ list.tail->flags &= ~IROLF_Assigned;
+
+ less = IRO_NewLinear(IROLinearOp2Arg);
+ less->nodetype = ELESS;
+ less->rtype = TYPE(&stbool);
+ less->index = ++IRO_NumLinear;
+ less->next = NULL;
+ less->u.diadic.left = indvar;
+ less->u.diadic.right = list.tail;
+ IRO_AddToList(less, &list);
+ less->flags |= IROLF_Reffed;
+
+ loopExitTest = BuildLoopExitTest(LoopNode->last->type, &list);
+ loopExitTest->u.label.label = label;
+ loopExitTest->u.label.x4 = less;
+ loopExitTest->u.label.x4->flags |= IROLF_Reffed;
+ loopExitTest->rtype = LoopNode->last->rtype;
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ saveTail = list.tail;
+
+ IRO_InitList(&list);
+ label2 = IRO_NewLabel();
+ gotond = IRO_NewLinear(IROLinearOp1Arg);
+ gotond->index = ++IRO_NumLinear;
+ gotond->type = IROLinearGoto;
+ gotond->u.label.label = label2;
+ IRO_AddToList(gotond, &list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ IRO_InitList(&list);
+ label3 = BuildLabel(&list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ savedHead3 = list.head;
+
+ UnrollWhileLoopBody(header, fnode2, fnode3, loop, &pattern, unrollFactor);
+ updIndInc = UpdateInductionIncrement(loop, 8 * unrollFactor, fnode2->last);
+
+ IRO_InitList(&list);
+ label2nd = IRO_NewLinear(IROLinearLabel);
+ label2nd->index = IRO_NumLinear++;
+ label2nd->u.label.label = label2;
+ label2nd->flags |= IROLF_1;
+ IRO_AddToList(label2nd, &list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ IRO_InitList(&list);
+
+ if (ind->nd->type == IROLinearOp1Arg)
+ IRO_DuplicateExpr(ind->nd->u.monadic, &list);
+ else
+ IRO_DuplicateExpr(ind->nd->u.diadic.left, &list);
+ list.tail->flags &= ~IROLF_Assigned;
+ indvar = list.tail;
+
+ IRO_DuplicateExpr(unrolledFinalValue, &list);
+ list.tail->flags &= ~IROLF_Assigned;
+
+ less2 = IRO_NewLinear(IROLinearOp2Arg);
+ less2->nodetype = ELESS;
+ less2->rtype = TYPE(&stbool);
+ less2->index = ++IRO_NumLinear;
+ less2->next = NULL;
+ less2->u.diadic.left = indvar;
+ less2->u.diadic.right = list.tail;
+ IRO_AddToList(less2, &list);
+ less2->flags |= IROLF_Reffed;
+
+ loopExitTest = BuildLoopExitTest(LoopNode->last->type, &list);
+ loopExitTest->u.label.label = label3;
+ loopExitTest->u.label.x4 = less2;
+ loopExitTest->u.label.x4->flags |= IROLF_Reffed;
+ loopExitTest->rtype = LoopNode->last->rtype;
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ saveTail2 = list.tail;
+
+ IRO_InitList(&list);
+
+ if (ind->nd->type == IROLinearOp1Arg)
+ IRO_DuplicateExpr(ind->nd->u.monadic, &list);
+ else
+ IRO_DuplicateExpr(ind->nd->u.diadic.left, &list);
+ list.tail->flags &= ~IROLF_Assigned;
+ indvar = list.tail;
+
+ IRO_DuplicateExpr(newFinalValue, &list);
+ list.tail->flags &= ~IROLF_Assigned;
+
+ less3 = IRO_NewLinear(IROLinearOp2Arg);
+ less3->nodetype = ELESS;
+ less3->rtype = TYPE(&stbool);
+ less3->index = ++IRO_NumLinear;
+ less3->next = NULL;
+ less3->u.diadic.left = indvar;
+ less3->u.diadic.right = list.tail;
+ IRO_AddToList(less3, &list);
+ less3->flags |= IROLF_Reffed;
+
+ wtf = LoopNode->last->u.label.x4;
+ IRO_Paste(list.head, list.tail, LoopNode->last);
+ LoopNode->last->u.label.x4 = list.tail;
+
+ IRO_InitList(&list);
+
+ constnd = IRO_NewLinear(IROLinearOperand);
+ constnd->index = ++IRO_NumLinear;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = wtf->u.diadic.left->rtype;
+ expr->data.intval = cint64_zero;
+ constnd->u.node = expr;
+ constnd->rtype = expr->rtype;
+ IRO_AddToList(constnd, &list);
+ constnd->flags |= IROLF_Reffed;
+
+ IRO_DuplicateExpr(wtf->u.diadic.left, &list);
+
+ ass = IRO_NewLinear(IROLinearOp2Arg);
+ ass->nodetype = EASS;
+ ass->rtype = list.tail->rtype;
+ ass->index = ++IRO_NumLinear;
+ ass->next = NULL;
+ ass->u.diadic.left = list.tail;
+ ass->u.diadic.right = constnd;
+ IRO_AddToList(ass, &list);
+ ass->flags |= IROLF_Assigned;
+
+ IRO_NopOut(wtf);
+
+ fn19 = fnode2->nextnode;
+ nd18 = fnode2->last;
+ fnode2->last = earlyLoopExitTest;
+
+ newfnode1 = IRO_NewFlowGraphNode();
+ newfnode1->first = savedHead60;
+ newfnode1->last = unrolledBodyEntryTest;
+ fnode2->nextnode = newfnode1;
+
+ newfnode2 = IRO_NewFlowGraphNode();
+ newfnode2->first = savedHead2;
+ newfnode2->last = saveTail;
+ savedHead2->u.label.label->stmt = (Statement *) newfnode2;
+ newfnode1->nextnode = newfnode2;
+
+ newfnode3 = IRO_NewFlowGraphNode();
+ newfnode3->first = gotond;
+ newfnode3->last = gotond;
+ newfnode2->nextnode = newfnode3;
+
+ newfnode4 = IRO_NewFlowGraphNode();
+ newfnode4->first = savedHead3;
+ newfnode4->last = updIndInc;
+ savedHead3->u.label.label->stmt = (Statement *) newfnode4;
+ newfnode3->nextnode = newfnode4;
+
+ newfnode5 = IRO_NewFlowGraphNode();
+ newfnode5->first = label2nd;
+ newfnode5->last = saveTail2;
+ label2nd->u.label.label->stmt = (Statement *) newfnode5;
+ newfnode4->nextnode = newfnode5;
+
+ newfnode6 = IRO_NewFlowGraphNode();
+ newfnode6->first = nd18;
+ newfnode6->last = nd18;
+ newfnode5->nextnode = newfnode6;
+ newfnode6->nextnode = fn19;
+
+ newfnode7 = oalloc(sizeof(IRONode));
+ memset(newfnode7, 0, sizeof(IRONode));
+ newfnode7->index = IRO_NumNodes;
+ IRO_NumNodes++;
+
+ newfnode7->first = list.head;
+ newfnode7->last = list.tail;
+
+ list.tail->next = LoopNode->last->next;
+ LoopNode->last->next = list.head;
+
+ newfnode7->nextnode = LoopNode->nextnode;
+ LoopNode->nextnode = newfnode7;
+
+ newfnode8 = oalloc(sizeof(IRONode));
+ memset(newfnode8, 0, sizeof(IRONode));
+ newfnode8->index = IRO_NumNodes;
+ IRO_NumNodes++;
+
+ lastnd = IRO_NewLinear(IROLinearLabel);
+ lastnd->index = IRO_NumLinear++;
+ lastnd->next = NULL;
+ lastnd->u.label.label = earlyLoopExitTestLabel;
+ lastnd->flags |= IROLF_1;
+ earlyLoopExitTestLabel->stmt = (Statement *) newfnode8;
+
+ newfnode8->first = lastnd;
+ newfnode8->last = lastnd;
+
+ lastnd->next = newfnode7->last->next;
+ newfnode7->last->next = lastnd;
+
+ newfnode8->nextnode = newfnode7->nextnode;
+ newfnode7->nextnode = newfnode8;
+
+ return 1;
+}
+
+void IRO_IterateForLoopBody(IRONode *start, IRONode *end, IROLoop *loop, IROLinear *destnode, SInt32 addConst, CInt64 *val, Boolean funkyFlag) {
+ IROLinear *first = NULL;
+ IROLinear *last = NULL;
+ IRONode *fnode;
+ IROLinear *lastnd;
+ IROLinear *nd;
+ IROList list;
+
+ for (fnode = start; fnode && fnode != end; fnode = fnode->nextnode) {
+ IRO_InitList(&list);
+
+ lastnd = fnode->last;
+ nd = fnode->first;
+ while (1) {
+ if (nd->stmt)
+ nd->stmt->flags |= StmtFlag_10;
+
+ if (
+ (nd->index < loop->index20 || nd->index > loop->index24) &&
+ nd->type != IROLinearLabel &&
+ !(nd->flags & IROLF_Reffed)
+ )
+ {
+ IRO_DuplicateExpr(nd, &list);
+ if (!first)
+ first = list.head;
+ last = list.tail;
+ }
+
+ if (nd == lastnd)
+ break;
+ nd = nd->next;
+ }
+
+ if (list.head && list.tail)
+ IRO_Paste(list.head, list.tail, destnode);
+ }
+
+ if (funkyFlag) {
+ *val = CInt64_Add(*val, IRO_MakeLong(loop->induction->addConst));
+ ChangeInductionReference(first, last, *val, loop);
+ }
+}
+
+void IRO_LinearizeForLoopPostLoop(IRONode *fnode1, IRONode *fnode2, IROLoop *loop, IRONode *fnode3, UInt32 unrollFactor) {
+ IRONode *newfnode;
+ IROLinear *newnd;
+ SInt32 i;
+ CInt64 val;
+
+ newfnode = oalloc(sizeof(IRONode));
+ memset(newfnode, 0, sizeof(IRONode));
+ newfnode->index = IRO_NumNodes;
+ IRO_NumNodes++;
+
+ newnd = IRO_NewLinear(IROLinearNop);
+ newnd->index = IRO_NumLinear++;
+ newnd->next = NULL;
+ newnd->flags |= IROLF_1;
+
+ newfnode->first = newfnode->last = newnd;
+
+ newfnode->nextnode = fnode3->nextnode;
+ fnode3->nextnode = newfnode;
+
+ newnd->next = fnode3->last->next;
+ fnode3->last->next = newnd;
+
+ val = cint64_zero;
+ for (i = 0; i < unrollFactor; i++)
+ IRO_IterateForLoopBody(fnode2, fnode1, loop, newfnode->last, loop->induction->addConst, &val, i > 0);
+ UpdateInductionIncrement(loop, unrollFactor, newfnode->last);
+}
+
+static UInt32 UnrollForLoop(IRONode *header, IRONode *fnode2, IRONode *fnode3, IROLoop *loop, UInt32 unrollFactor) {
+ IROLinear *lastlabelnode;
+ IROLinear *earlyLoopExitTest;
+ IROLinear *origIterationCount;
+ IROLinear *saveHead1;
+ IROLinear *newFinalValue;
+ IROLinear *unrolledBodyEntryTest;
+ IROLinear *gotoNd;
+ IROLinear *saveHead2;
+ IROLinear *updIndInc;
+ IROLinear *labelNd;
+ IROLinear *saveTail2;
+ IROLinear *ndCopy;
+ IROLinear *saveTail3;
+ IROLinear *loopExitTest;
+ IROLinear *lastnd;
+ IROLinear *labelNd2;
+ IROLinear *saveTail4;
+ IROLinear *labelNd3;
+ IROLinear *scan;
+ IRONode *nd18;
+ IRONode *newfnode1;
+ IRONode *newfnode2;
+ IRONode *newfnode3;
+ IRONode *newfnode4;
+ IRONode *newfnode5;
+ IRONode *newfnode6;
+ CLabel *lastlabel;
+ CLabel *earlyLoopExitTestLabel;
+ CLabel *label;
+ CLabel *label2;
+ SInt32 i;
+
+ IROList list;
+ CInt64 iterCount;
+ int isConstant;
+ UInt32 needOrigLoop = 0;
+ UInt32 needUnrollBodyTest = 0;
+ UInt32 resetUnrolledFinalValue = 0;
+ SInt32 leftOver;
+ CInt64 val;
+
+ lastlabelnode = IRO_FindLabelNode(fnode2->last->u.label.label, fnode2->last);
+ lastlabel = IRO_NewLabel();
+
+ IRO_InitList(&list);
+ IRO_DuplicateExprRange(lastlabelnode->next, LoopNode->last->u.label.x4, &list);
+ IRO_DuplicateExpr(LoopNode->last->u.label.x4, &list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ lastlabelnode = list.tail;
+
+ IRO_InitList(&list);
+ earlyLoopExitTest = BuildEarlyLoopExitTest(LoopNode->last->type, &list);
+ earlyLoopExitTestLabel = IRO_NewLabel();
+ earlyLoopExitTest->u.label.label = earlyLoopExitTestLabel;
+ earlyLoopExitTest->u.label.x4 = lastlabelnode;
+ earlyLoopExitTest->u.label.x4->flags |= IROLF_Reffed;
+ earlyLoopExitTest->rtype = LoopNode->last->rtype;
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ isConstant = IsIterationCountConstant(loop, &iterCount);
+ needOrigLoop = 1;
+ needUnrollBodyTest = 1;
+ resetUnrolledFinalValue = 0;
+ if (isConstant)
+ IRO_TestConstantIterationCount(loop, &iterCount, 1, &unrollFactor, &leftOver, &needOrigLoop, &needUnrollBodyTest, &resetUnrolledFinalValue);
+
+ IRO_InitList(&list);
+ origIterationCount = BuildOrigIterationCount(&list, loop);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ saveHead1 = list.head;
+
+ IRO_InitList(&list);
+ newFinalValue = BuildNewFinalvalue(origIterationCount, unrollFactor, &list, loop);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ IRO_InitList(&list);
+ BuildUnrolledBodyEntryTest(&list, origIterationCount, unrollFactor, lastlabel);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ unrolledBodyEntryTest = list.tail;
+
+ label = IRO_NewLabel();
+ IRO_InitList(&list);
+ gotoNd = IRO_NewLinear(IROLinearOp1Arg);
+ gotoNd->index = ++IRO_NumLinear;
+ gotoNd->type = IROLinearGoto;
+ gotoNd->u.label.label = label;
+ IRO_AddToList(gotoNd, &list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ IRO_InitList(&list);
+ label2 = BuildLabel(&list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ saveHead2 = list.head;
+
+ val = cint64_zero;
+ for (i = 0; i < unrollFactor; i++)
+ IRO_IterateForLoopBody(fnode3, header, loop, fnode2->last, loop->induction->addConst, &val, i > 0);
+ updIndInc = UpdateInductionIncrement(loop, unrollFactor, fnode2->last);
+
+ IRO_InitList(&list);
+ labelNd = IRO_NewLinear(IROLinearLabel);
+ labelNd->index = IRO_NumLinear++;
+ labelNd->u.label.label = label;
+ labelNd->flags |= IROLF_1;
+ IRO_AddToList(labelNd, &list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ IRO_InitList(&list);
+
+ IRO_DuplicateExpr(LoopNode->last->u.label.x4->u.diadic.left, &list);
+ saveTail2 = list.tail;
+
+ if (resetUnrolledFinalValue)
+ IRO_DuplicateExpr(loop->nd18->u.diadic.right, &list);
+ else
+ IRO_DuplicateExpr(newFinalValue, &list);
+
+ ndCopy = IRO_NewLinear(LoopNode->last->u.label.x4->type);
+ *ndCopy = *LoopNode->last->u.label.x4;
+ ndCopy->index = ++IRO_NumLinear;
+ ndCopy->next = NULL;
+ ndCopy->expr = NULL;
+ ndCopy->u.diadic.left = saveTail2;
+ ndCopy->u.diadic.right = list.tail;
+ IRO_AddToList(ndCopy, &list);
+
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ saveTail3 = list.tail;
+
+ IRO_InitList(&list);
+ loopExitTest = BuildLoopExitTest(LoopNode->last->type, &list);
+ loopExitTest->u.label.label = label2;
+ loopExitTest->u.label.x4 = saveTail3;
+ loopExitTest->u.label.x4->flags |= IROLF_Reffed;
+ loopExitTest->rtype = LoopNode->last->rtype;
+ IRO_Paste(list.head, list.tail, fnode2->last);
+ saveTail4 = list.tail;
+
+ IRO_InitList(&list);
+ labelNd2 = IRO_NewLinear(IROLinearLabel);
+ labelNd2->index = IRO_NumLinear++;
+ labelNd2->u.label.label = lastlabel;
+ labelNd2->flags |= IROLF_1;
+ IRO_AddToList(labelNd2, &list);
+ IRO_Paste(list.head, list.tail, fnode2->last);
+
+ lastnd = fnode2->last;
+ nd18 = fnode2->nextnode;
+ fnode2->last = earlyLoopExitTest;
+
+ newfnode1 = IRO_NewFlowGraphNode();
+ newfnode1->first = saveHead1;
+ newfnode1->last = unrolledBodyEntryTest;
+ fnode2->nextnode = newfnode1;
+
+ newfnode2 = IRO_NewFlowGraphNode();
+ newfnode2->first = gotoNd;
+ newfnode2->last = gotoNd;
+ newfnode1->nextnode = newfnode2;
+
+ newfnode3 = IRO_NewFlowGraphNode();
+ newfnode3->first = saveHead2;
+ newfnode3->last = updIndInc;
+
+ saveHead2->u.label.label->stmt = (Statement *) newfnode3;
+ if (newfnode2)
+ newfnode2->nextnode = newfnode3;
+ else
+ newfnode1->nextnode = newfnode3;
+
+ newfnode4 = IRO_NewFlowGraphNode();
+ newfnode4->first = labelNd;
+ newfnode4->last = saveTail4;
+ labelNd->u.label.label->stmt = (Statement *) newfnode4;
+ newfnode3->nextnode = newfnode4;
+
+ newfnode5 = IRO_NewFlowGraphNode();
+ newfnode5->first = labelNd2;
+ newfnode5->last = lastnd;
+ newfnode4->nextnode = newfnode5;
+ newfnode5->nextnode = nd18;
+
+ newfnode6 = oalloc(sizeof(IRONode));
+ memset(newfnode6, 0, sizeof(IRONode));
+ newfnode6->index = IRO_NumNodes;
+ IRO_NumNodes++;
+
+ labelNd3 = IRO_NewLinear(IROLinearLabel);
+ labelNd3->index = IRO_NumLinear++;
+ labelNd3->next = NULL;
+ labelNd3->u.label.label = earlyLoopExitTestLabel;
+ labelNd3->flags |= IROLF_1;
+ earlyLoopExitTestLabel->stmt = (Statement *) newfnode6;
+
+ newfnode6->first = labelNd3;
+ newfnode6->last = labelNd3;
+
+ labelNd3->next = LoopNode->last->next;
+ LoopNode->last->next = labelNd3;
+
+ newfnode6->nextnode = LoopNode->nextnode;
+ LoopNode->nextnode = newfnode6;
+
+ if (!needOrigLoop) {
+ NoOpBlock(newfnode5);
+ NoOpBlock(header);
+ NoOpBlock(fnode3);
+ NoOpBlock(loop->induction->fnode);
+ IRO_NopOut(newfnode1->last->u.label.x4);
+ newfnode1->last->type = IROLinearNop;
+ }
+
+ if (!needUnrollBodyTest) {
+ IRO_NopOut(earlyLoopExitTest->u.label.x4);
+ earlyLoopExitTest->type = IROLinearNop;
+
+ IRO_NopOut(newfnode4->last->u.label.x4);
+ newfnode4->last->type = IROLinearNop;
+
+ if (newfnode2)
+ newfnode2->last->type = IROLinearNop;
+
+ for (scan = newfnode1->first; scan; scan = scan->next) {
+ if (!(scan->flags & IROLF_Reffed))
+ IRO_NopOut(scan);
+ if (scan == newfnode1->last)
+ break;
+ }
+ }
+
+ return 1;
+}
+
+static UInt32 UnrollStandardLoop(IRONode *header, IRONode *fnode2, IRONode *fnode3, int count) {
+ IROLoop *loop;
+
+ ConditionalHeaderAtBottom = 1;
+ loop = ExtractLoopInfo(header);
+ loop->xC = fnode2;
+ loop->x10 = fnode3;
+ FindAssignmenttoInductionVar(loop, fnode2);
+
+ if (!IsLoopUnrollable(loop)) {
+ IRO_Dump("LoopUnroll:loop with header %d not unrolled because IsLoopUnrollable failed\n", header->index);
+ return 0;
+ }
+
+ if (loop->flags & LoopFlags_10000)
+ return UnrollWhileLoop(header, fnode2, fnode3, loop, count);
+ else
+ return UnrollForLoop(header, fnode2, fnode3, loop, count);
+}
+
+static void LoopUnroll(int count, IRONode *header) {
+ VarRecord *var;
+ IRONode *tmp;
+ UInt16 i;
+ UInt16 j;
+ IRONode *prevpred;
+ IRONode *prevsucc;
+ int foundpred;
+ UInt32 predcount;
+ UInt32 success = 0;
+
+ LoopNode = header;
+ FindMustReach();
+
+ for (var = IRO_FirstVar; var; var = var->next)
+ var->xA = 1;
+
+ ComputeLoopKills();
+ ComputeLoopInvariance();
+ ComputeLoopInduction();
+
+ LoopNode = header;
+ ConditionalHeaderAtBottom = 0;
+
+ prevpred = NULL;
+ foundpred = 0;
+ for (i = 0; i < LoopNode->numpred; i++) {
+ tmp = IRO_NodeTable[LoopNode->pred[i]];
+ if (!Bv_IsBitSet(tmp->index, InLoop)) {
+ foundpred = 1;
+ if (tmp->nextnode == header) {
+ CError_ASSERT(2101, !prevpred || tmp == prevpred);
+ prevpred = tmp;
+ }
+ }
+ }
+
+ if (!foundpred) {
+ IRO_Dump("No predecessor outside the loop\n");
+ return;
+ }
+
+ if (LoopNode->last->type == IROLinearIf || LoopNode->last->type == IROLinearIfNot) {
+ if (LoopNode->nextnode && !Bv_IsBitSet(LoopNode->nextnode->index, InLoop)) {
+ prevsucc = NULL;
+ for (i = 0; i < LoopNode->numsucc; i++) {
+ tmp = IRO_NodeTable[LoopNode->succ[i]];
+ if (Bv_IsBitSet(tmp->index, InLoop)) {
+ CError_ASSERT(2159, !prevsucc);
+ prevsucc = tmp;
+ }
+ }
+
+ prevpred = NULL;
+ predcount = 0;
+ for (j = 0; j < LoopNode->numpred; j++) {
+ tmp = IRO_NodeTable[LoopNode->pred[j]];
+ if (!Bv_IsBitSet(tmp->index, InLoop)) {
+ prevpred = tmp;
+ predcount++;
+ }
+ }
+
+ if (
+ predcount == 1 &&
+ prevpred->last->type == IROLinearGoto &&
+ prevpred->nextnode == prevsucc &&
+ prevsucc != LoopNode
+ )
+ {
+ success = UnrollStandardLoop(header, prevpred, prevsucc, count);
+ }
+ }
+ } else {
+ IRO_Dump(" LoopUnroll:Loop with header = %d is not a conditional loop\n", header->index);
+ }
+
+ if (!success)
+ return;
+
+ IRO_NodeTable = oalloc(sizeof(IRONode *) * IRO_NumNodes);
+ memset(IRO_NodeTable, 0, sizeof(IRONode *) * IRO_NumNodes);
+ for (tmp = IRO_FirstNode; tmp; tmp = tmp->nextnode)
+ IRO_NodeTable[tmp->index] = tmp;
+ IRO_ComputeSuccPred();
+ IRO_ComputeDom();
+ if (success)
+ IRO_Dump(" LoopUnroll:Loop with header = %d Unrolled\n", header->index);
+}
+
+static int IsLoopUnrollable(IROLoop *loop) {
+ CInt64 tmp;
+
+ if (loop->flags & LP_LOOP_HAS_ASM) {
+ IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HAS_ASM \n");
+ return 0;
+ }
+ if (loop->flags & LP_IFEXPR_NON_CANONICAL) {
+ IRO_Dump("IsLoopUnrollable:No due to LP_IFEXPR_NON_CANONICAL \n");
+ return 0;
+ }
+ if (loop->flags & LP_LOOP_HAS_CALLS) {
+ IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HAS_CALLS \n");
+ return 0;
+ }
+ if (loop->flags & LP_LOOP_HAS_CNTRLFLOW) {
+ IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HAS_CNTRLFLOW \n");
+ return 0;
+ }
+ if (loop->flags & LP_INDUCTION_NOT_FOUND) {
+ IRO_Dump("IsLoopUnrollable:No due to LP_INDUCTION_NOT_FOUND \n");
+ return 0;
+ }
+ if (loop->flags & LP_LOOP_HDR_HAS_SIDEEFFECTS) {
+ IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HDR_HAS_SIDEEFFECTS \n");
+ return 0;
+ }
+ if (!(loop->flags & LoopFlags_200)) {
+ IRO_Dump("IsLoopUnrollable:No because header does not follow induction update \n");
+ return 0;
+ }
+
+ if (!(loop->flags & LoopFlags_10000)) {
+ IROLinear *upperBound = loop->nd18->u.diadic.right;
+ if (!IRO_IsIntConstant(upperBound) && !(upperBound->flags & IROLF_LoopInvariant)) {
+ IRO_Dump("IsLoopUnrollable:No because Loop Upper Bound is Variant in the loop\n");
+ return 0;
+ }
+ if (!loop->nd14) {
+ IRO_Dump("IsLoopUnrollable:No because there is no initialization of loop index in PreHeader\n");
+ return 0;
+ }
+ if (!IRO_IsVariable(loop->nd14->u.diadic.left)) {
+ IRO_Dump("IsLoopUnrollable:No because initial value of induction stored thru pointer\n");
+ return 0;
+ }
+
+ if (!IRO_IsUnsignedType(loop->nd14->rtype)) {
+ if (IRO_IsIntConstant(loop->nd14->u.diadic.right)) {
+ if (!CInt64_GreaterEqual(loop->nd14->u.diadic.right->u.node->data.intval, cint64_zero)) {
+ IRO_Dump("IsLoopUnrollable:No because initial value of induction is signed but init < 0\n");
+ return 0;
+ }
+ } else if (IsIterationCountConstant(loop, &tmp)) {
+ IRO_Dump("IsLoopUnrollable:Yes, the limits substract out to be constants\n");
+ } else {
+ IRO_Dump("IsLoopUnrollable:No because initial value of induction is signed and not constant\n");
+ return 0;
+ }
+ }
+
+ if (!(loop->flags & LP_LOOP_STEP_ISADD)) {
+ IRO_Dump("IsLoopUnrollable:No because LP_LOOP_STEP_ISADD is not set i.e induciton is not updated by 1\n");
+ return 0;
+ }
+
+ } else {
+ if (!IRO_IsUnsignedType(loop->nd18->u.diadic.left->rtype)) {
+ IRO_Dump("IsLoopUnrollable:No because the while loop induction is signed\n");
+ return 0;
+ }
+ if (!(loop->flags & LoopFlags_2000)) {
+ IRO_Dump("IsLoopUnrollable:No because the while loop operator is not of decrement form\n");
+ return 0;
+ }
+ }
+
+ if (loop->sizeBySomeMeasurement > copts.unrollinstrfactor) {
+ IRO_Dump("IsLoopUnrollable:No because loop size greater than threshold\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+IROLinear *BuildEarlyLoopExitTest(IROLinearType type, IROList *list) {
+ IROLinear *nd = IRO_NewLinear(IROLinearOp1Arg);
+ nd->index = ++IRO_NumLinear;
+ if (type == IROLinearIf)
+ nd->type = IROLinearIfNot;
+ else
+ nd->type = IROLinearIf;
+ IRO_AddToList(nd, list);
+ return nd;
+}
+
+IROLinear *BuildLoopExitTest(IROLinearType type, IROList *list) {
+ IROLinear *nd = IRO_NewLinear(IROLinearOp1Arg);
+ nd->index = ++IRO_NumLinear;
+ nd->type = type;
+ IRO_AddToList(nd, list);
+ return nd;
+}
+
+int IsIterationCountConstant(IROLoop *loop, CInt64 *pval) {
+ IROLinear *lowerBound;
+ IROLinear *upperBound;
+ Type *type;
+ int isUnsigned;
+ IROAddrRecord *lowerRec;
+ IROAddrRecord *upperRec;
+ CInt64 lowerval;
+ CInt64 upperval;
+ CInt64 incval;
+ CInt64 negOne;
+
+ lowerBound = loop->nd14->u.diadic.right;
+ if (loop->flags & LoopFlags_1) {
+ upperBound = loop->nd18->u.diadic.right;
+ type = loop->nd18->u.diadic.right->rtype;
+ } else {
+ upperBound = loop->nd18->u.diadic.left;
+ type = loop->nd18->u.diadic.left->rtype;
+ }
+
+ isUnsigned = IRO_IsUnsignedType(type);
+
+ if (IRO_IsIntConstant(lowerBound) && IRO_IsIntConstant(upperBound)) {
+ lowerval = lowerBound->u.node->data.intval;
+ upperval = upperBound->u.node->data.intval;
+ if (isUnsigned) {
+ if (CInt64_LessEqualU(upperval, lowerval))
+ return 0;
+ } else {
+ if (CInt64_LessEqual(upperval, lowerval))
+ return 0;
+ }
+
+ CInt64_SetLong(&incval, loop->induction->addConst);
+ CInt64_SetLong(&negOne, -1);
+ *pval = CInt64_Sub(upperval, lowerval);
+ *pval = CInt64_Add(*pval, incval);
+
+ if (IS_LINEAR_DIADIC(loop->nd18, ELESS))
+ *pval = CInt64_Add(*pval, negOne);
+
+ CError_ASSERT(2486, !CInt64_IsZero(&incval));
+
+ if (isUnsigned)
+ *pval = CInt64_DivU(*pval, incval);
+ else
+ *pval = CInt64_Div(*pval, incval);
+
+ if (CInt64_Equal(*pval, cint64_zero))
+ return 0;
+
+ if (isUnsigned) {
+ CError_ASSERT(2508, !CInt64_LessEqualU(*pval, cint64_zero));
+ } else {
+ CError_ASSERT(2517, !CInt64_LessEqual(*pval, cint64_zero));
+ }
+
+ return 1;
+ }
+
+ lowerRec = IRO_InitAddrRecordPointer(lowerBound);
+ upperRec = IRO_InitAddrRecordPointer(upperBound);
+
+ if (IS_LINEAR_DIADIC(lowerBound, EADD)) {
+ IRO_DecomposeAddressExpression(lowerBound, lowerRec);
+ } else if (IRO_IsIntConstant(lowerBound)) {
+ lowerRec->numInts++;
+ IRO_AddElmToList(lowerBound, &lowerRec->ints);
+ lowerRec->numObjRefs = 0;
+ lowerRec->numMisc = 0;
+ } else {
+ lowerRec->numMisc++;
+ IRO_AddElmToList(lowerBound, &lowerRec->misc);
+ lowerRec->numObjRefs = 0;
+ lowerRec->numInts = 0;
+ }
+
+ if (IS_LINEAR_DIADIC(upperBound, EADD)) {
+ IRO_DecomposeAddressExpression(upperBound, upperRec);
+ } else if (IRO_IsIntConstant(upperBound)) {
+ upperRec->numInts++;
+ IRO_AddElmToList(upperBound, &upperRec->ints);
+ upperRec->numObjRefs = 0;
+ upperRec->numMisc = 0;
+ } else {
+ upperRec->numMisc++;
+ IRO_AddElmToList(upperBound, &upperRec->misc);
+ upperRec->numObjRefs = 0;
+ upperRec->numInts = 0;
+ }
+
+ if (IsDifferenceOfTermsConstant(lowerRec, upperRec, isUnsigned, pval)) {
+ if (IS_LINEAR_DIADIC(loop->nd18, ELESSEQU))
+ *pval = CInt64_Add(*pval, cint64_one);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int IsDifferenceOfTermsConstant(IROAddrRecord *lowerRec, IROAddrRecord *upperRec, int isUnsigned, CInt64 *pval) {
+ UInt32 i;
+ CInt64 upperval;
+ CInt64 lowerval;
+ IROElmList *el;
+ IROLinear *nd;
+
+ if (upperRec->numObjRefs == lowerRec->numObjRefs && upperRec->numObjRefs != 0)
+ return 0;
+ else if (upperRec->numObjRefs != lowerRec->numObjRefs)
+ return 0;
+
+ if (upperRec->numMisc == lowerRec->numMisc && upperRec->numMisc != 0) {
+ for (i = 0; i < upperRec->numMisc; i++) {
+ // bug? surely this should index on i...?
+ if (!IRO_ExprsSame(lowerRec->misc->element, upperRec->misc->element))
+ return 0;
+ }
+ } else if (upperRec->numMisc != lowerRec->numMisc) {
+ return 0;
+ }
+
+ upperval = cint64_zero;
+ for (el = upperRec->ints; el; el = el->next) {
+ nd = el->element;
+ upperval = CMach_CalcIntDiadic(nd->rtype, upperval, '+', nd->u.node->data.intval);
+ }
+
+ lowerval = cint64_zero;
+ for (el = lowerRec->ints; el; el = el->next) {
+ nd = el->element;
+ lowerval = CMach_CalcIntDiadic(nd->rtype, lowerval, '+', nd->u.node->data.intval);
+ }
+
+ if (CInt64_Equal(upperval, lowerval))
+ return 0;
+
+ if (CInt64_Greater(upperval, lowerval)) {
+ *pval = CInt64_Sub(upperval, lowerval);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+void NoOpBlock(IRONode *fnode) {
+ IROLinear *last, *scan;
+
+ for (scan = fnode->first, last = fnode->last; scan; scan = scan->next) {
+ scan->type = IROLinearNop;
+ if (scan == last)
+ break;
+ }
+}
+
+void IRO_TestConstantIterationCount(IROLoop *loop, CInt64 *iterCount, SInt32 vectorStride, UInt32 *unrollFactor, SInt32 *leftOver, UInt32 *needOrigLoop, UInt32 *needUnrollBodyTest, UInt32 *resetUnrolledFinalValue) {
+ UInt32 isUnsigned;
+ CInt64 val;
+ CInt64 val3;
+ CInt64 mod;
+ CInt64 val2;
+ CInt64 loopvar3;
+ CInt64 loopvar1;
+ CInt64 loopvar2;
+ CInt64 strideVal;
+
+ CError_ASSERT(2737, *unrollFactor);
+
+ isUnsigned = IRO_IsUnsignedType(
+ (loop->flags & LoopFlags_1) ? loop->nd18->u.diadic.right->rtype :loop->nd18->u.diadic.left->rtype);
+
+ CError_ASSERT(2756, vectorStride);
+
+ strideVal = IRO_MakeLong(vectorStride);
+ if (isUnsigned ? CInt64_LessU(*iterCount, strideVal) : CInt64_Less(*iterCount, strideVal)) {
+ *needOrigLoop = 1;
+ *needUnrollBodyTest = 0;
+ *unrollFactor = 0;
+ *leftOver = CInt64_GetULong(iterCount);
+ } else {
+ switch (vectorStride) {
+ case 1:
+ val = *iterCount;
+ break;
+ case 2:
+ val = CInt64_ShrU(*iterCount, cint64_one);
+ break;
+ case 4:
+ val = CInt64_ShrU(*iterCount, IRO_MakeLong(2));
+ break;
+ case 8:
+ val = CInt64_ShrU(*iterCount, IRO_MakeLong(3));
+ break;
+ case 16:
+ val = CInt64_ShrU(*iterCount, IRO_MakeLong(4));
+ break;
+ default:
+ val = CInt64_Div(*iterCount, strideVal);
+ }
+
+ if (CInt64_LessU(val, IRO_MakeLong(*unrollFactor)))
+ *unrollFactor = CInt64_GetULong(&val);
+
+ CInt64_SetLong(&val2, *unrollFactor);
+ switch (vectorStride) {
+ case 1:
+ val3 = cint64_zero;
+ break;
+ case 2:
+ val3 = CInt64_And(*iterCount, cint64_one);
+ break;
+ case 4:
+ val3 = CInt64_And(*iterCount, IRO_MakeLong(3));
+ break;
+ case 8:
+ val3 = CInt64_And(*iterCount, IRO_MakeLong(7));
+ break;
+ case 16:
+ val3 = CInt64_And(*iterCount, IRO_MakeLong(15));
+ break;
+ default:
+ val3 = CInt64_Mod(*iterCount, strideVal);
+ }
+
+ if (CInt64_LessEqualU(val, IRO_MakeLong(8))) {
+ *needUnrollBodyTest = vectorStride > 1;
+ *unrollFactor = CInt64_GetULong(&val);
+ *leftOver = CInt64_GetULong(&val3);
+ *needOrigLoop = *leftOver != 0;
+ *resetUnrolledFinalValue = !(*needOrigLoop && *needUnrollBodyTest);
+ } else {
+ loopvar1 = IRO_MakeLong(0x7FFFFFFF);
+ loopvar2 = IRO_MakeLong(0x7FFFFFFF);
+ do {
+ mod = CInt64_Mod(val, val2);
+ loopvar3 = CInt64_Add(CInt64_Mul(mod, strideVal), val3);
+ if (CInt64_Less(loopvar3, loopvar2)) {
+ loopvar2 = loopvar3;
+ loopvar1 = val2;
+ }
+ if (vectorStride > 1)
+ break;
+ val2 = CInt64_Add(val2, cint64_negone);
+ } while (CInt64_GreaterEqualU(CInt64_Mul(val2, val2), val));
+
+ *unrollFactor = CInt64_GetULong(&loopvar1);
+ *leftOver = CInt64_GetULong(&loopvar2);
+ *needOrigLoop = *leftOver != 0;
+ *needUnrollBodyTest = CInt64_Less(loopvar1, val) || vectorStride > 1;
+ *resetUnrolledFinalValue = !(*needOrigLoop && *needUnrollBodyTest);
+ }
+ }
+
+ IRO_Dump(
+ "---- IterCount = %d, VectorStride = %d, UnrollFactor = %d, LeftOver = %d,\n"
+ "\tNeedOrigLoop = %d, NeedUnrollBodyTest = %d, ResetUnrolledFinalValue = %d\n",
+ CInt64_GetULong(iterCount), vectorStride, *unrollFactor, *leftOver,
+ *needOrigLoop, *needUnrollBodyTest, *resetUnrolledFinalValue
+ );
+}
+
+IROLinear *BuildOrigIterationCount(IROList *list, IROLoop *loop) {
+ IROLinear *upperBound;
+ IROLinear *nd29b;
+ IROLinear *lowerBound;
+ IROLinear *finalCount;
+ IROLinear *divisor;
+ Type *type;
+ IROLinear *nd25;
+ IROLinear *tmp;
+ Boolean isZeroBase;
+ Object *tempobj;
+ IROLinear *iterCount;
+ IROLinear *negone;
+ IROLinear *ass;
+ ENode *expr;
+ SInt32 powval;
+
+ isZeroBase = 0;
+ lowerBound = loop->nd14->u.diadic.right;
+ if (IRO_IsIntConstant(lowerBound) && CInt64_Equal(lowerBound->u.node->data.intval, cint64_zero))
+ isZeroBase = 1;
+
+ if (!isZeroBase)
+ lowerBound = IRO_DuplicateExpr(lowerBound, list);
+
+ if (loop->flags & LoopFlags_1) {
+ upperBound = IRO_DuplicateExpr(loop->nd18->u.diadic.right, list);
+ type = loop->nd18->u.diadic.right->rtype;
+ } else {
+ upperBound = IRO_DuplicateExpr(loop->nd18->u.diadic.left, list);
+ type = loop->nd18->u.diadic.left->rtype;
+ }
+
+ CError_ASSERT(2924, loop->induction);
+ CError_ASSERT(2929, loop->induction->addConst);
+
+ divisor = IRO_NewLinear(IROLinearOperand);
+ divisor->index = ++IRO_NumLinear;
+ divisor->rtype = type;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ CInt64_SetLong(&expr->data.intval, loop->induction->addConst);
+ divisor->u.node = expr;
+
+ if (isZeroBase) {
+ iterCount = upperBound;
+ } else {
+ iterCount = IRO_NewLinear(IROLinearOp2Arg);
+ iterCount->index = ++IRO_NumLinear;
+ iterCount->nodetype = ESUB;
+ iterCount->u.diadic.left = upperBound;
+ iterCount->u.diadic.right = lowerBound;
+ iterCount->rtype = type;
+ IRO_AddToList(iterCount, list);
+ }
+
+ nd25 = IRO_DuplicateExpr(divisor, list);
+
+ nd29b = IRO_NewLinear(IROLinearOp2Arg);
+ nd29b->index = ++IRO_NumLinear;
+ nd29b->nodetype = EADD;
+ nd29b->u.diadic.left = iterCount;
+ nd29b->u.diadic.right = nd25;
+ nd29b->rtype = type;
+ IRO_AddToList(nd29b, list);
+
+ if (loop->nd18->type == IROLinearOp2Arg && loop->nd18->nodetype == ELESS) {
+ tmp = nd29b;
+
+ negone = IRO_NewLinear(IROLinearOperand);
+ negone->index = ++IRO_NumLinear;
+ negone->rtype = type;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ CInt64_SetLong(&expr->data.intval, -1);
+ negone->u.node = expr;
+ IRO_AddToList(negone, list);
+
+ nd29b = IRO_NewLinear(IROLinearOp2Arg);
+ nd29b->index = ++IRO_NumLinear;
+ nd29b->nodetype = EADD;
+ nd29b->u.diadic.left = tmp;
+ nd29b->u.diadic.right = negone;
+ nd29b->rtype = type;
+ IRO_AddToList(nd29b, list);
+ }
+
+ if (CInt64_Equal(divisor->u.node->data.intval, cint64_one)) {
+ finalCount = nd29b;
+ } else {
+ if (divisor->rtype->size <= 4 && IS_TYPE_INT(divisor->rtype) && IRO_IsPow2(divisor, &powval)) {
+ finalCount = IRO_NewLinear(IROLinearOp2Arg);
+ finalCount->index = ++IRO_NumLinear;
+ finalCount->nodetype = ESHL;
+ finalCount->u.diadic.left = nd29b;
+ finalCount->u.diadic.right = divisor;
+ CInt64_SetLong(&divisor->u.node->data.intval, powval);
+ finalCount->rtype = type;
+ IRO_AddToList(divisor, list);
+ IRO_AddToList(finalCount, list);
+ } else {
+ finalCount = IRO_NewLinear(IROLinearOp2Arg);
+ finalCount->index = ++IRO_NumLinear;
+ finalCount->nodetype = EDIV;
+ finalCount->u.diadic.left = nd29b;
+ finalCount->u.diadic.right = divisor;
+ finalCount->rtype = type;
+ IRO_AddToList(divisor, list);
+ IRO_AddToList(finalCount, list);
+ }
+ }
+
+ tempobj = create_temp_object(type);
+ IRO_FindVar(tempobj, 1, 1);
+
+ ass = IRO_NewLinear(IROLinearOp2Arg);
+ ass->index = ++IRO_NumLinear;
+ ass->nodetype = EASS;
+ ass->u.diadic.left = IRO_TempReference(tempobj, list);
+ ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.right = finalCount;
+ ass->u.diadic.right->flags |= IROLF_Reffed;
+ ass->rtype = type;
+ IRO_AddToList(ass, list);
+
+ return ass->u.diadic.left;
+}
+
+static IROLinear *BuildOrigIterationCount_DoWhile(IROList *list, IROLoop *loop) {
+ IROLinear *finalCount;
+ IROLinear *count;
+ IROLinear *ass;
+ Type *type;
+ Object *tempobj;
+ ENode *expr;
+
+ type = loop->nd18->u.diadic.left->rtype;
+
+ count = IRO_NewLinear(IROLinearOperand);
+ count->index = ++IRO_NumLinear;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ expr->data.intval = cint64_one;
+ count->u.node = expr;
+ count->rtype = type;
+ IRO_AddToList(count, list);
+ count->flags |= IROLF_Reffed;
+
+ finalCount = IRO_NewLinear(IROLinearOp2Arg);
+ finalCount->index = ++IRO_NumLinear;
+ finalCount->nodetype = EADD;
+ finalCount->rtype = type;
+ finalCount->u.diadic.left = IRO_DuplicateExpr(loop->nd18->u.diadic.left, list);
+ finalCount->u.diadic.left->flags |= IROLF_Reffed;
+ finalCount->u.diadic.left->flags &= ~IROLF_Assigned;
+ finalCount->u.diadic.left->u.monadic->flags &= ~IROLF_Assigned;
+ finalCount->u.diadic.right = count;
+ IRO_AddToList(finalCount, list);
+
+ tempobj = create_temp_object(type);
+ IRO_FindVar(tempobj, 1, 1);
+
+ ass = IRO_NewLinear(IROLinearOp2Arg);
+ ass->index = ++IRO_NumLinear;
+ ass->nodetype = EASS;
+ ass->u.diadic.left = IRO_TempReference(tempobj, list);
+ ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.right = finalCount;
+ ass->rtype = type;
+ IRO_AddToList(ass, list);
+
+ return ass->u.diadic.left;
+}
+
+IROLinear *BuildNewFinalvalue(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop) {
+ IROLinear *sub;
+ IROLinear *addvalue;
+ Type *type;
+ IROLinear *ass;
+ IROLinear *dupbound;
+ Object *tempobj;
+ ENode *expr;
+
+ type = iterCount->rtype;
+
+ addvalue = IRO_NewLinear(IROLinearOperand);
+ addvalue->index = ++IRO_NumLinear;
+ addvalue->rtype = type;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ CInt64_SetLong(&expr->data.intval, loop->induction->addConst * unrollFactor);
+ addvalue->u.node = expr;
+ IRO_AddToList(addvalue, list);
+
+ if (loop->flags & LoopFlags_1)
+ dupbound = IRO_DuplicateExpr(loop->nd18->u.diadic.right, list);
+ else
+ dupbound = IRO_DuplicateExpr(loop->nd18->u.diadic.left, list);
+
+ sub = IRO_NewLinear(IROLinearOp2Arg);
+ sub->index = ++IRO_NumLinear;
+ sub->nodetype = ESUB;
+ sub->u.diadic.left = dupbound;
+ sub->u.diadic.right = addvalue;
+ sub->rtype = type;
+ IRO_AddToList(sub, list);
+
+ tempobj = create_temp_object(type);
+ IRO_FindVar(tempobj, 1, 1);
+
+ ass = IRO_NewLinear(IROLinearOp2Arg);
+ ass->index = ++IRO_NumLinear;
+ ass->nodetype = EASS;
+ ass->u.diadic.left = IRO_TempReference(tempobj, list);
+ ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.right = sub;
+ ass->u.diadic.right->flags |= IROLF_Reffed;
+ ass->rtype = type;
+ IRO_AddToList(ass, list);
+
+ return ass->u.diadic.left;
+}
+
+static IROLinear *BuildPreAlignTemp(IROLoopInd *ind, UInt32 unrollFactor, IROList *list) {
+ Type *type;
+ IROLinear *indnd;
+ IROLinear *factornd;
+ IROLinear *div;
+ IROLinear *constnd;
+ IROLinear *add;
+ IROLinear *mul;
+ IROLinear *ass;
+ Object *tempobj;
+ ENode *expr;
+
+ indnd = ind->nd;
+ type = indnd->rtype;
+
+ factornd = IRO_NewLinear(IROLinearOperand);
+ factornd->index = ++IRO_NumLinear;
+ factornd->rtype = type;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ CInt64_SetLong(&expr->data.intval, unrollFactor);
+ factornd->u.node = expr;
+ IRO_AddToList(factornd, list);
+
+ if (indnd->type == IROLinearOp1Arg)
+ IRO_DuplicateExpr(indnd->u.monadic, list);
+ else
+ IRO_DuplicateExpr(indnd->u.diadic.left, list);
+
+ list->tail->flags &= ~IROLF_Assigned;
+ list->tail->u.monadic->flags &= ~IROLF_Assigned;
+
+ div = IRO_NewLinear(IROLinearOp2Arg);
+ div->index = ++IRO_NumLinear;
+ div->nodetype = EDIV;
+ div->u.diadic.left = list->tail;
+ div->u.diadic.right = factornd;
+ div->rtype = type;
+ IRO_AddToList(div, list);
+ div->flags |= IROLF_Reffed;
+
+ constnd = IRO_NewLinear(IROLinearOperand);
+ constnd->index = ++IRO_NumLinear;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ expr->data.intval = cint64_one;
+ constnd->u.node = expr;
+ constnd->rtype = type;
+ IRO_AddToList(constnd, list);
+ constnd->flags |= IROLF_Reffed;
+
+ add = IRO_NewLinear(IROLinearOp2Arg);
+ add->index = ++IRO_NumLinear;
+ add->nodetype = EADD;
+ add->u.diadic.left = div;
+ add->u.diadic.right = constnd;
+ add->rtype = type;
+ IRO_AddToList(add, list);
+ add->flags |= IROLF_Reffed;
+
+ IRO_DuplicateExpr(factornd, list);
+
+ mul = IRO_NewLinear(IROLinearOp2Arg);
+ mul->index = ++IRO_NumLinear;
+ mul->nodetype = EMUL;
+ mul->u.diadic.left = add;
+ mul->u.diadic.right = list->tail;
+ mul->rtype = type;
+ IRO_AddToList(mul, list);
+ mul->flags |= IROLF_Reffed;
+
+ tempobj = create_temp_object(type);
+ IRO_FindVar(tempobj, 1, 1);
+
+ ass = IRO_NewLinear(IROLinearOp2Arg);
+ ass->index = ++IRO_NumLinear;
+ ass->nodetype = EASS;
+ ass->u.diadic.left = IRO_TempReference(tempobj, list);
+ ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.right = mul;
+ ass->u.diadic.right->flags |= IROLF_Reffed;
+ ass->rtype = type;
+ IRO_AddToList(ass, list);
+
+ return ass->u.diadic.left;
+}
+
+static IROLinear *BuildNewFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop) {
+ IROLinear *addvalue;
+ IROLinear *add;
+ IROLinear *mul;
+ IROLinear *ass;
+ Type *type;
+ Object *tempobj;
+ ENode *expr;
+
+ type = iterCount->rtype;
+
+ addvalue = IRO_NewLinear(IROLinearOperand);
+ addvalue->index = ++IRO_NumLinear;
+ addvalue->rtype = type;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ CInt64_SetLong(&expr->data.intval, loop->induction->addConst);
+ addvalue->u.node = expr;
+ IRO_AddToList(addvalue, list);
+ addvalue->flags |= IROLF_Reffed;
+
+ mul = IRO_NewLinear(IROLinearOp2Arg);
+ mul->index = ++IRO_NumLinear;
+ mul->nodetype = EMUL;
+ mul->u.diadic.left = IRO_DuplicateExpr(iterCount, list);
+ mul->u.diadic.right = addvalue;
+ mul->rtype = type;
+ IRO_AddToList(mul, list);
+ mul->flags |= IROLF_Reffed;
+ mul->u.diadic.left->flags &= ~IROLF_Assigned;
+ mul->u.diadic.left->u.diadic.left->flags &= ~IROLF_Assigned;
+
+ if (loop->induction->nd->type == IROLinearOp1Arg)
+ IRO_DuplicateExpr(loop->induction->nd->u.monadic, list);
+ else
+ IRO_DuplicateExpr(loop->induction->nd->u.diadic.left, list);
+ list->tail->flags &= ~IROLF_Assigned;
+ list->tail->u.diadic.left->flags &= ~IROLF_Assigned;
+
+ add = IRO_NewLinear(IROLinearOp2Arg);
+ add->index = ++IRO_NumLinear;
+ add->nodetype = EADD;
+ add->u.diadic.left = mul;
+ add->u.diadic.right = list->tail;
+ add->rtype = type;
+ IRO_AddToList(add, list);
+ add->flags |= IROLF_Reffed;
+
+ tempobj = create_temp_object(type);
+ IRO_FindVar(tempobj, 1, 1);
+
+ ass = IRO_NewLinear(IROLinearOp2Arg);
+ ass->index = ++IRO_NumLinear;
+ ass->nodetype = EASS;
+ ass->u.diadic.left = IRO_TempReference(tempobj, list);
+ ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.right = add;
+ ass->u.diadic.right->flags |= IROLF_Reffed;
+ ass->rtype = type;
+ IRO_AddToList(ass, list);
+
+ return ass->u.diadic.left;
+}
+
+static IROLinear *BuildUnrolledFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop) {
+ IROLinear *addvalue_mult;
+ IROLinear *addvalue;
+ IROLinear *mul;
+ IROLinear *sub;
+ IROLinear *add;
+ IROLinear *ass;
+ Type *type;
+ Object *tempobj;
+ ENode *expr;
+
+ type = iterCount->rtype;
+
+ addvalue_mult = IRO_NewLinear(IROLinearOperand);
+ addvalue_mult->index = ++IRO_NumLinear;
+ addvalue_mult->rtype = type;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ CInt64_SetLong(&expr->data.intval, loop->induction->addConst * unrollFactor);
+ addvalue_mult->u.node = expr;
+ IRO_AddToList(addvalue_mult, list);
+ addvalue_mult->flags |= IROLF_Reffed;
+
+ addvalue = IRO_NewLinear(IROLinearOperand);
+ addvalue->index = ++IRO_NumLinear;
+ addvalue->rtype = type;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ CInt64_SetLong(&expr->data.intval, loop->induction->addConst);
+ addvalue->u.node = expr;
+ IRO_AddToList(addvalue, list);
+ addvalue->flags |= IROLF_Reffed;
+
+ mul = IRO_NewLinear(IROLinearOp2Arg);
+ mul->index = ++IRO_NumLinear;
+ mul->nodetype = EMUL;
+ mul->u.diadic.left = IRO_DuplicateExpr(iterCount, list);
+ mul->u.diadic.right = addvalue;
+ mul->rtype = type;
+ IRO_AddToList(mul, list);
+ mul->flags |= IROLF_Reffed;
+ mul->u.diadic.left->flags &= ~IROLF_Assigned;
+ mul->u.diadic.left->u.diadic.left->flags &= ~IROLF_Assigned;
+
+ sub = IRO_NewLinear(IROLinearOp2Arg);
+ sub->index = ++IRO_NumLinear;
+ sub->nodetype = ESUB;
+ sub->u.diadic.left = mul;
+ sub->u.diadic.right = addvalue_mult;
+ sub->rtype = type;
+ IRO_AddToList(sub, list);
+ sub->flags |= IROLF_Reffed;
+
+ if (loop->induction->nd->type == IROLinearOp1Arg)
+ IRO_DuplicateExpr(loop->induction->nd->u.monadic, list);
+ else
+ IRO_DuplicateExpr(loop->induction->nd->u.diadic.left, list);
+ list->tail->flags &= ~IROLF_Assigned;
+ list->tail->u.diadic.left->flags &= ~IROLF_Assigned;
+
+ add = IRO_NewLinear(IROLinearOp2Arg);
+ add->index = ++IRO_NumLinear;
+ add->nodetype = EADD;
+ add->u.diadic.left = sub;
+ add->u.diadic.right = list->tail;
+ add->rtype = type;
+ IRO_AddToList(add, list);
+ add->flags |= IROLF_Reffed;
+
+ tempobj = create_temp_object(type);
+ IRO_FindVar(tempobj, 1, 1);
+
+ ass = IRO_NewLinear(IROLinearOp2Arg);
+ ass->index = ++IRO_NumLinear;
+ ass->nodetype = EASS;
+ ass->u.diadic.left = IRO_TempReference(tempobj, list);
+ ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
+ ass->u.diadic.right = add;
+ ass->u.diadic.right->flags |= IROLF_Reffed;
+ ass->rtype = type;
+ IRO_AddToList(ass, list);
+
+ return ass->u.diadic.left;
+}
+
+void BuildUnrolledBodyEntryTest(IROList *list, IROLinear *iterCount, UInt32 unrollFactor, CLabel *label) {
+ Type *type;
+ IROLinear *ifnot;
+ IROLinear *comp;
+ IROLinear *var;
+ IROLinear *value;
+ ENode *expr;
+
+ type = iterCount->rtype;
+
+ value = IRO_NewLinear(IROLinearOperand);
+ value->index = ++IRO_NumLinear;
+ value->rtype = type;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ CInt64_SetLong(&expr->data.intval, unrollFactor);
+ value->u.node = expr;
+ IRO_AddToList(value, list);
+
+ var = IRO_DuplicateExpr(iterCount, list);
+
+ comp = IRO_NewLinear(IROLinearOp2Arg);
+ comp->index = ++IRO_NumLinear;
+ comp->nodetype = EGREATER;
+ comp->u.diadic.left = var;
+ comp->u.diadic.right = value;
+ comp->u.diadic.right->flags |= IROLF_Reffed;
+ comp->rtype = type;
+ IRO_AddToList(comp, list);
+
+ ifnot = IRO_NewLinear(IROLinearOp1Arg);
+ ifnot->index = ++IRO_NumLinear;
+ ifnot->type = IROLinearIfNot;
+ ifnot->u.label.x4 = comp;
+ ifnot->u.label.x4->flags |= IROLF_Reffed;
+ ifnot->rtype = type;
+ ifnot->u.label.label = label;
+ IRO_AddToList(ifnot, list);
+}
+
+void ChangeInductionReference(IROLinear *first, IROLinear *last, CInt64 val, IROLoop *loop) {
+ IROLinear *nd;
+ IROLinear *value;
+ IROLinear *add;
+ UInt32 isUnsigned;
+ IROLinear *father;
+ Boolean flag;
+ IROLinear *father2;
+ IROLinear *father3;
+ Type *tmp;
+ UInt32 flag2;
+ Object *varobj;
+ IROLinear *next;
+ ENode *expr;
+ Type *type;
+
+ CInt64 val2;
+ CInt64 val1;
+ IROList list;
+
+ type = loop->induction->nd->rtype;
+ isUnsigned = IRO_IsUnsignedType(type);
+
+ for (nd = first; nd; nd = next) {
+ next = nd->next;
+
+ varobj = IRO_IsVariable(nd);
+ if (varobj && loop->induction->var->object == varobj) {
+ value = IRO_NewLinear(IROLinearOperand);
+ value->index = ++IRO_NumLinear;
+ value->rtype = type;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ expr->data.intval = val;
+ value->u.node = expr;
+
+ add = IRO_NewLinear(IROLinearOp2Arg);
+ add->index = ++IRO_NumLinear;
+ add->nodetype = EADD;
+ add->rtype = type;
+
+ father = IRO_LocateFather(nd);
+ flag = 1;
+ if (father && IS_LINEAR_MONADIC(father, ETYPCON)) {
+ tmp = father->rtype;
+ father = IRO_LocateFather(father);
+ if (tmp->type != nd->rtype->type || tmp->size < nd->rtype->size)
+ flag = 0;
+ }
+
+ flag2 = 0;
+ if (
+ flag &&
+ father &&
+ IS_LINEAR_DIADIC_2(father, ESHL, EMUL) &&
+ IRO_IsIntConstant(father->u.diadic.right) &&
+ (father2 = IRO_LocateFather(father)) &&
+ IS_LINEAR_DIADIC(father2, EADD) &&
+ father2->u.diadic.right == father &&
+ (father3 = IRO_LocateFather(father2))
+ )
+ {
+ IRO_InitList(&list);
+ val2 = father->u.diadic.right->u.node->data.intval;
+ if (father->nodetype == ESHL)
+ val2 = CInt64_Shl(cint64_one, val2);
+
+ val1 = value->u.node->data.intval;
+ if (isUnsigned)
+ val1 = CInt64_MulU(val2, val1);
+ else
+ val1 = CInt64_Mul(val2, val1);
+ value->u.node->data.intval = val1;
+
+ IRO_AddToList(value, &list);
+ IRO_AddToList(add, &list);
+ add->u.diadic.right = value;
+ IRO_Paste(list.head, list.tail, father3);
+ IRO_LocateFather_Cut_And_Paste_Without_Nopping(father2, add);
+ add->u.diadic.left = father2;
+ add->rtype = father2->rtype;
+ flag2 = 1;
+ }
+
+ if (!flag2) {
+ add->u.diadic.right = value;
+ add->u.diadic.right->flags |= IROLF_Reffed;
+ value->next = add;
+
+ add->u.diadic.left = nd;
+ IRO_LocateFather_Cut_And_Paste_Without_Nopping(nd, add);
+ add->flags |= IROLF_Reffed;
+
+ nd->next = value;
+ add->next = next;
+ }
+ }
+
+ if (nd == last)
+ break;
+ }
+}
+
+IROLinear *UpdateInductionIncrement(IROLoop *loop, SInt32 value, IROLinear *before) {
+ IROLinear *ind_nd;
+ IROLinear *addvalue;
+ IROLinear *ass;
+ Type *type;
+ ENode *expr;
+ IROList list;
+
+ IRO_InitList(&list);
+ ind_nd = loop->induction->nd;
+ type = ind_nd->rtype;
+
+ addvalue = IRO_NewLinear(IROLinearOperand);
+ addvalue->index = ++IRO_NumLinear;
+ addvalue->rtype = type;
+ expr = IRO_NewENode(EINTCONST);
+ expr->rtype = type;
+ CInt64_SetLong(&expr->data.intval, value * loop->induction->addConst);
+ addvalue->u.node = expr;
+ IRO_AddToList(addvalue, &list);
+
+ if (IS_LINEAR_MONADIC_2(ind_nd, EPREINC, EPOSTINC)) {
+ ind_nd = IRO_DuplicateExpr(ind_nd->u.monadic, &list);
+
+ ass = IRO_NewLinear(IROLinearOp2Arg);
+ ass->index = ++IRO_NumLinear;
+ ass->nodetype = EADDASS;
+ ass->u.diadic.left = ind_nd;
+ ass->u.diadic.right = addvalue;
+ ass->rtype = type;
+ IRO_AddToList(ass, &list);
+ } else if (IS_LINEAR_MONADIC_2(ind_nd, EPREDEC, EPOSTDEC)) {
+ ind_nd = IRO_DuplicateExpr(ind_nd->u.monadic, &list);
+
+ ass = IRO_NewLinear(IROLinearOp2Arg);
+ ass->index = ++IRO_NumLinear;
+ ass->nodetype = ESUBASS;
+ ass->u.diadic.left = ind_nd;
+ ass->u.diadic.right = addvalue;
+ ass->rtype = type;
+ IRO_AddToList(ass, &list);
+ } else if (IS_LINEAR_DIADIC(ind_nd, EADDASS)) {
+ ind_nd = IRO_DuplicateExpr(ind_nd->u.monadic, &list);
+
+ ass = IRO_NewLinear(IROLinearOp2Arg);
+ ass->index = ++IRO_NumLinear;
+ ass->nodetype = EADDASS;
+ ass->u.diadic.left = ind_nd;
+ ass->u.diadic.right = addvalue;
+ ass->rtype = type;
+ IRO_AddToList(ass, &list);
+ }
+
+ IRO_Paste(list.head, list.tail, before);
+ return list.tail;
+}
+
+void GenInitialAssignment(IROLoop *loop, Object *var, IROList *list) {
+ Type *type;
+ IROLinear *nd;
+
+ CError_ASSERT(3924, loop->nd14 && loop->nd14->type == IROLinearOp2Arg);
+
+ type = loop->induction->nd->rtype;
+
+ nd = IRO_NewLinear(IROLinearOp2Arg);
+ nd->index = ++IRO_NumLinear;
+ nd->nodetype = EASS;
+ nd->u.diadic.left = IRO_TempReference(var, list);
+ nd->u.diadic.right = IRO_DuplicateExpr(loop->nd14->u.diadic.right, list);
+ nd->rtype = type;
+ IRO_AddToList(nd, list);
+}
+
+void GenNewInduction(void) {
+ CError_FATAL(3941);
+}