diff options
Diffstat (limited to 'compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c')
-rw-r--r-- | compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c | 2305 |
1 files changed, 2305 insertions, 0 deletions
diff --git a/compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c b/compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c new file mode 100644 index 0000000..cf3f1bf --- /dev/null +++ b/compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c @@ -0,0 +1,2305 @@ +#include "IroUnrollLoop.h" +#include "compiler/CError.h" +#include "IroFlowgraph.h" +#include "IroLinearForm.h" +#include "IroUtil.h" +#include "compiler/LoopDetection.h" +#include "IroLoop.h" +#include "IroDump.h" +#include "IroVars.h" +#include "compiler/CFunc.h" +#include "compiler/CMachine.h" + +#ifdef __MWERKS__ +#pragma options align=mac68k +#endif +typedef struct LoopList { + UInt8 flags; + BitVector *bv; + struct LoopList *next; + IRONode *fnode; + int xE; +} LoopList; +#ifdef __MWERKS__ +#pragma options align=reset +#endif + +// forward decls +static void IRO_FindLoops_Unroll(void); +static void LoopUnroll(int count, IRONode *fnode); +static int IsLoopUnrollable(IROLoop *loop); +static int IsDifferenceOfTermsConstant(IROAddrRecord *lowerRec, IROAddrRecord *upperRec, int isUnsigned, CInt64 *pval); +static IROLinear *BuildOrigIterationCount_DoWhile(IROList *list, IROLoop *loop); +static IROLinear *BuildPreAlignTemp(IROLoopInd *ind, UInt32 unrollFactor, IROList *list); +static IROLinear *BuildNewFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop); +static IROLinear *BuildUnrolledFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop); + +void IRO_LoopUnroller(void) { + VectorPhaseCalledFromUnroll = 1; + IRO_FindLoops_Unroll(); + IRO_CheckForUserBreak(); +} + +static void IRO_FindLoops_Unroll(void) { + IRONode *fnode; + IRONode *pred; + UInt16 i; + UInt16 flag; + LoopList *list; + LoopList *list2; + + fnode = IRO_FirstNode; + LoopList_First = NULL; + + while (fnode) { + flag = 0; + for (i = 0; i < fnode->numpred; i++) { + pred = IRO_NodeTable[fnode->pred[i]]; + if (Bv_IsBitSet(fnode->index, pred->dom)) { + if (!flag) { + Bv_AllocVector(&InLoop, IRO_NumNodes + 1); + Bv_Clear(InLoop); + Bv_SetBit(fnode->index, InLoop); + } + flag = 1; + Bv_SetBit(pred->index, InLoop); + if (pred != fnode) + AddPreds(pred); + } + } + + if (flag) { + if (!LoopList_First) { + list = oalloc(sizeof(LoopList)); + list->next = NULL; + } else { + list = oalloc(sizeof(LoopList)); + list->next = LoopList_First; + } + LoopList_First = list; + + Bv_AllocVector(&list->bv, IRO_NumNodes + 1); + list->flags |= 1; + Bv_Copy(InLoop, list->bv); + list->fnode = fnode; + list->xE = 0; + } + + fnode = fnode->nextnode; + } + + list = LoopList_First; + Bv_AllocVector(&LoopTemp, IRO_NumNodes + 1); + while (list) { + for (list2 = LoopList_First; list2; list2 = list2->next) { + if (list2 != list) { + IRO_Dump(" header = %d \n", list2->fnode->index); + IRO_Dump(" l1 bit vector=\n"); + IRO_DumpBits("", list2->bv); + IRO_Dump(" l bit vector=\n"); + IRO_DumpBits("", list->bv); + if (Bv_IsSubset(list->bv, list2->bv)) + list2->flags &= ~1; + } + } + list = list->next; + } + + for (list = LoopList_First; list; list = list->next) { + if (list->flags & 1) { + IRONode *listfnode; + Bv_Copy(list->bv, InLoop); + listfnode = list->fnode; + IRO_Dump("IRO_FindLoops_Unroll:Found loop with header %d\n", listfnode->index); + IRO_DumpBits("Loop includes: ", InLoop); + LoopUnroll(copts.unrollfactor, listfnode); + IRO_UpdateFlagsOnInts(); + } + } +} + +static int CheckConstant(CInt64 a, CInt64 b, CInt64 *result) { + CInt64 shl = cint64_zero; + CInt64 work = cint64_zero; + CInt64 and = cint64_zero; + CInt64 i; + + for (i = cint64_zero; CInt64_Less(i, a); i = CInt64_Add(i, cint64_one)) { + shl = CInt64_Shl(b, i); + and = CInt64_And(shl, work); + if (CInt64_NotEqual(and, cint64_zero)) + return 0; + work = CInt64_Or(shl, work); + } + + *result = work; + return 1; +} + +typedef struct LoopPattern { + IROLinear *nd0; + IROLinear *nd4; + Type *type; + IROLinear *ndC; + IROLinear *nd10; + CInt64 val14; + CInt64 val1C; +} LoopPattern; + +static void UnrollWhileLoopBody(IRONode *header, IRONode *fnode2, IRONode *fnode3, IROLoop *loop, LoopPattern *pattern, UInt32 unrollFactor) { + IRONode *scan; + int pass; + IROLinear *firstnode; + IROLinear *lastnd; + IROLinear *nd; + IROLinear *nd1; + IROLinear *nd2; + IROLinear *nd3; + IROLinear *nd4; + IROLinear *nd5; + IROLinear *nd6; + IROLinear *nd8; + IROLinear *nd7; + ENode *expr; + IROList list; + CInt64 zero; + CInt64 shiftval; + + CInt64_SetLong(&zero, 0); + + pass = 0; + + do { + firstnode = NULL; + for (scan = fnode3; scan && scan != header; scan = scan->nextnode) { + IRO_InitList(&list); + lastnd = scan->last; + nd = scan->first; + while (1) { + if (nd->stmt) + nd->stmt->flags |= StmtFlag_10; + + if ( + (nd->index < loop->index20 || nd->index > loop->index24) && + nd->type != IROLinearLabel && + nd->type != IROLinearNop && + !(nd->flags & IROLF_Reffed) + ) + { + CError_ASSERT(345, nd->nodetype == EORASS || nd->nodetype == EANDASS || nd->nodetype == EXORASS); + + IRO_DuplicateExpr(pattern->nd0, &list); + nd1 = list.tail; + + shiftval = cint64_one; + shiftval = CInt64_Shl(shiftval, pattern->val1C); + + nd2 = IRO_NewLinear(IROLinearOperand); + nd2->index = ++IRO_NumLinear; + nd2->rtype = pattern->nd0->rtype; + expr = IRO_NewENode(EINTCONST); + expr->rtype = pattern->nd0->rtype; + CInt64_SetLong(&expr->data.intval, pass * CInt64_GetULong(&shiftval)); + nd2->u.node = expr; + IRO_AddToList(nd2, &list); + + IRO_DuplicateExpr(pattern->nd4, &list); + + nd3 = IRO_NewLinear(IROLinearOp2Arg); + nd3->index = ++IRO_NumLinear; + nd3->nodetype = EADD; + nd3->rtype = pattern->type; + nd3->u.diadic.left = list.tail; + nd3->u.diadic.right = nd2; + IRO_AddToList(nd3, &list); + + nd4 = IRO_NewLinear(IROLinearOp2Arg); + nd4->index = ++IRO_NumLinear; + nd4->nodetype = EADD; + nd4->rtype = pattern->type; + nd4->u.diadic.left = nd3; + nd4->u.diadic.right = nd1; + IRO_AddToList(nd4, &list); + + nd5 = IRO_NewLinear(IROLinearOp1Arg); + nd5->index = ++IRO_NumLinear; + nd5->nodetype = EINDIRECT; + nd5->rtype = nd->rtype; + nd5->u.monadic = nd4; + IRO_AddToList(nd5, &list); + + nd6 = IRO_NewLinear(IROLinearOp2Arg); + *nd6 = *nd; + nd6->index = ++IRO_NumLinear; + nd6->u.diadic.left = list.tail; + nd6->next = NULL; + + nd7 = IRO_NewLinear(IROLinearOperand); + nd7->index = ++IRO_NumLinear; + nd7->rtype = pattern->ndC->rtype; + expr = IRO_NewENode(EINTCONST); + expr->rtype = pattern->ndC->rtype; + nd7->u.node = expr; + nd7->next = NULL; + expr->data.intval = pattern->val14; + + if ( + IS_LINEAR_DIADIC(nd, EANDASS) && + CInt64_Equal(pattern->val14, cint64_zero) + ) + { + nd6->nodetype = EASS; + } else if ( + IS_LINEAR_DIADIC(nd, EORASS) && + !CTool_EndianReadWord32(&pattern->val14.hi) + ) + { + UInt32 tmp = CInt64_GetULong(&pattern->val14); + if ( + (nd->rtype->size == 1 && tmp == 0xFF) || + (nd->rtype->size == 2 && tmp == 0xFFFF) || + (nd->rtype->size == 4 && tmp == 0xFFFFFFFF) + ) + { + nd6->nodetype = EASS; + } + } + + IRO_AddToList(nd7, &list); + + if (IS_LINEAR_MONADIC(pattern->nd10, ETYPCON)) { + nd8 = IRO_NewLinear(IROLinearOp1Arg); + *nd8 = *pattern->nd10; + nd8->index = ++IRO_NumLinear; + nd8->u.monadic = nd7; + nd8->next = NULL; + IRO_AddToList(nd8, &list); + } else { + nd8 = nd7; + } + nd6->u.diadic.right = nd8; + IRO_AddToList(nd6, &list); + + if (!firstnode) + firstnode = list.head; + } + + if (nd == lastnd) + break; + nd = nd->next; + } + + if (list.head && list.tail) + IRO_Paste(list.head, list.tail, fnode2->last); + } + } while (++pass < 8); +} + +static int PatternMatchLoop(IRONode *fnode, IROLoop *loop, IROLoopInd *ind, UInt32 *unrollFactor, SInt32 *result1, SInt32 *result2, LoopPattern *pattern) { + IROLinear *scan; + IROLinear *varnode; + IROLinear *nd1; + IROLinear *nd2; + IROLinear *left1; + IROLinear *left2; + IROLinear *right1; + IROLinear *right2; + Object *obj1; + Object *obj2; + CInt64 shl; + CInt64 val; + + *result1 = 0; + *result2 = 0; + + if ((scan = fnode->first)) { + while (1) { + if ( + (scan->index < loop->index20 || scan->index > loop->index24) && + !(scan->flags & IROLF_Reffed) && + scan->type != IROLinearNop && + scan->type != IROLinearLabel + ) + { + if (IS_LINEAR_DIADIC_3(scan, EORASS, EXORASS, EANDASS)) { + (*result2)++; + if (IS_LINEAR_MONADIC(scan->u.diadic.left, EINDIRECT)) { + varnode = scan->u.diadic.left->u.monadic; + if (IS_LINEAR_DIADIC(varnode, EADD)) { + pattern->nd4 = varnode->u.diadic.left; + pattern->type = varnode->rtype; + if (IRO_IsVariable(varnode->u.diadic.left)) { + pattern->nd0 = varnode->u.diadic.right; + if ( + IS_LINEAR_DIADIC(pattern->nd0, ESHL) && + IRO_IsConstant(pattern->nd0->u.diadic.right) + ) + { + pattern->val1C = pattern->nd0->u.diadic.right->u.node->data.intval; + nd1 = pattern->nd0->u.diadic.left; + } else { + return 0; + } + } else { + return 0; + } + } else { + return 0; + } + } else { + return 0; + } + + pattern->nd10 = scan->u.diadic.right; + + if (IS_LINEAR_MONADIC(pattern->nd10, ETYPCON)) { + if (IS_LINEAR_DIADIC(scan, EANDASS)) { + if (IS_LINEAR_MONADIC(pattern->nd10->u.monadic, EBINNOT)) { + pattern->ndC = pattern->nd10->u.monadic->u.monadic; + } else { + return 0; + } + } else { + pattern->ndC = pattern->nd10->u.monadic; + } + + if (IS_LINEAR_DIADIC(pattern->ndC, ESHL) && IRO_IsConstant(pattern->ndC->u.diadic.left)) { + val = pattern->ndC->u.diadic.left->u.node->data.intval; + nd2 = pattern->ndC->u.diadic.right; + } else { + return 0; + } + } else if (IS_LINEAR_DIADIC(pattern->nd10, ESHL) && IS_LINEAR_DIADIC_2(scan, EORASS, EXORASS)) { + pattern->ndC = pattern->nd10; + if (IRO_IsConstant(pattern->ndC->u.diadic.left)) { + val = pattern->ndC->u.diadic.left->u.node->data.intval; + nd2 = pattern->ndC->u.diadic.right; + } else { + return 0; + } + } else if (IS_LINEAR_MONADIC(pattern->nd10, EBINNOT) && IS_LINEAR_DIADIC(scan, EANDASS)) { + pattern->ndC = pattern->nd10->u.monadic; + if (IS_LINEAR_DIADIC(pattern->ndC, ESHL) && IRO_IsConstant(pattern->ndC->u.diadic.left)) { + val = pattern->ndC->u.diadic.left->u.node->data.intval; + nd2 = pattern->ndC->u.diadic.right; + } else { + return 0; + } + } else { + return 0; + } + + if (IS_LINEAR_DIADIC(nd2, EAND) && IS_LINEAR_DIADIC(nd1, ESHR)) { + left1 = nd1->u.diadic.left; + left2 = nd2->u.diadic.left; + obj1 = IRO_IsVariable(left1); + obj2 = IRO_IsVariable(left2); + if (obj1 == obj2 && obj1 == ind->var->object) { + right1 = nd1->u.diadic.right; + right2 = nd2->u.diadic.right; + if (IRO_IsConstant(right1) && IRO_IsConstant(right2)) { + shl = cint64_one; + shl = CInt64_Shl(shl, right1->u.node->data.intval); + shl = CInt64_Sub(shl, cint64_one); + if (CInt64_Equal(shl, right2->u.node->data.intval)) { + if (CTool_EndianReadWord32(&shl.hi) == 0) { + *unrollFactor = CInt64_GetULong(&shl) + 1; + if (CheckConstant(CInt64_Add(shl, cint64_one), val, &pattern->val14)) { + (*result1)++; + if (IS_LINEAR_DIADIC(scan, EANDASS)) + pattern->val14 = CInt64_Not(pattern->val14); + } + } else { + return 0; + } + } else { + return 0; + } + } else { + return 0; + } + } else { + return 0; + } + } else { + return 0; + } + } else { + return 0; + } + } + + if (scan == fnode->last) + break; + scan = scan->next; + } + } + + return 1; +} + +static UInt32 UnrollWhileLoop(IRONode *header, IRONode *fnode2, IRONode *fnode3, IROLoop *loop, UInt32 unrollFactor) { + IROLoopInd *ind; + IRONode *scan; + CLabel *lastlabel; + IROLinear *lastlabelnode; + IROLinear *earlyLoopExitTest; + CLabel *earlyLoopExitTestLabel; + IROLinear *origIterationCount; + IROLinear *unrolledFinalValue; + IROLinear *preAlignTemp; + IROLinear *newFinalValue; + IROLinear *savedHead60; + IROLinear *unrolledBodyEntryTest; + CLabel *label; + IROLinear *savedHead2; + IROLinear *loophead25; + IROLinear *loopend; + IROLinear *loopscan; + IROLinear *indvar; + IROLinear *less; + IROLinear *loopExitTest; + IROLinear *saveTail; + CLabel *label2; + IROLinear *gotond; + CLabel *label3; + IROLinear *savedHead3; + IROLinear *updIndInc; + IROLinear *label2nd; + IROLinear *less2; + IROLinear *saveTail2; + IROLinear *less3; + IROLinear *wtf; + IROLinear *constnd; + IROLinear *ass; + IROLinear *nd18; + IRONode *fn19; + IRONode *newfnode1; + IRONode *newfnode2; + IRONode *newfnode3; + IRONode *newfnode4; + IRONode *newfnode5; + IRONode *newfnode6; + IRONode *newfnode7; + IRONode *newfnode8; + IROLinear *lastnd; + ENode *expr; + SInt32 result1; + SInt32 result2; + LoopPattern pattern; + IROList list; + + IRO_Dump("while(n--) loop \n"); + + if (loop->flags & LoopFlags_800) { + IRO_Dump("loop not unrolled because induction used in loop \n"); + return 0; + } + if (loop->flags & LoopFlags_1000) { + IRO_Dump("loop not unrolled because loop has multiple exits \n"); + return 0; + } + + if (!(loop->flags & LP_HAS_MULTIPLE_INDUCTIONS)) + return 0; + + for (ind = FirstInd; ind; ind = ind->next) { + if ((ind->flags & LoopInd_HasMod) && (ind->flags & LoopInd_HasDiv)) + break; + } + + if (!ind) { + IRO_Dump("Could not find loop with and induction with MOD and DIV operation\n"); + return 0; + } + + if (!IRO_IsUnsignedType(ind->nd->rtype)) + return 0; + + if (ind->nd->type == IROLinearOp2Arg) { + if (ind->nd->nodetype == EADDASS && IRO_IsConstant(ind->nd->u.diadic.right)) { + if (ind->addConst != 1) + return 0; + } else if (ind->nd->nodetype == EASS) { + if ( + ind->nd->u.diadic.right->type != IROLinearOp2Arg || + ind->nd->u.diadic.right->nodetype != EADD || + !IRO_IsConstant(ind->nd->u.diadic.right->u.diadic.right) + ) + return 0; + + if (ind->addConst != 1) + return 0; + } else { + return 0; + } + } else if (ind->nd->type == IROLinearOp1Arg && ind->nd->nodetype != EPREINC && ind->nd->nodetype != EPOSTINC) { + return 0; + } + + loop->induction = ind; + loop->index24 = ind->nd->index; + loop->index20 = IRO_FindStart(ind->nd)->index; + + scan = IRO_FirstNode; + memset(&pattern, 0, sizeof(pattern)); + while (scan) { + if (Bv_IsBitSet(scan->index, InLoop) && scan != header) { + if (!PatternMatchLoop(scan, loop, ind, &unrollFactor, &result1, &result2, &pattern)) + return 0; + } + scan = scan->nextnode; + } + + if (result1 > 1 || result2 > 1) + return 0; + + lastlabel = fnode2->last->u.label.label; + lastlabelnode = IRO_FindLabelNode(fnode2->last->u.label.label, fnode2->last); + + IRO_InitList(&list); + IRO_DuplicateExprRange(lastlabelnode->next, LoopNode->last->u.label.x4->u.diadic.left, &list); + IRO_DuplicateExpr(LoopNode->last->u.label.x4, &list); + IRO_Paste(list.head, list.tail, fnode2->last); + lastlabelnode = list.tail; + + IRO_InitList(&list); + earlyLoopExitTest = BuildEarlyLoopExitTest(LoopNode->last->type, &list); + earlyLoopExitTestLabel = IRO_NewLabel(); + earlyLoopExitTest->u.label.label = earlyLoopExitTestLabel; + earlyLoopExitTest->u.label.x4 = lastlabelnode; + earlyLoopExitTest->u.label.x4->flags |= IROLF_Reffed; + earlyLoopExitTest->rtype = LoopNode->last->rtype; + IRO_Paste(list.head, list.tail, fnode2->last); + + IRO_InitList(&list); + origIterationCount = BuildOrigIterationCount_DoWhile(&list, loop); + IRO_Paste(list.head, list.tail, fnode2->last); + savedHead60 = list.head; + + IRO_InitList(&list); + preAlignTemp = BuildPreAlignTemp(ind, unrollFactor, &list); + IRO_Paste(list.head, list.tail, fnode2->last); + + IRO_InitList(&list); + unrolledFinalValue = BuildUnrolledFinalvalue_DoWhile(origIterationCount, unrollFactor, &list, loop); + IRO_Paste(list.head, list.tail, fnode2->last); + + IRO_InitList(&list); + newFinalValue = BuildNewFinalvalue_DoWhile(origIterationCount, unrollFactor, &list, loop); + IRO_Paste(list.head, list.tail, fnode2->last); + + IRO_InitList(&list); + BuildUnrolledBodyEntryTest(&list, origIterationCount, unrollFactor, lastlabel); + IRO_Paste(list.head, list.tail, fnode2->last); + unrolledBodyEntryTest = list.tail; + + IRO_InitList(&list); + label = BuildLabel(&list); + IRO_Paste(list.head, list.tail, fnode2->last); + + savedHead2 = list.head; + loophead25 = NULL; + for (scan = fnode3; scan && scan != header; scan = scan->nextnode) { + IRO_InitList(&list); + loopend = scan->last; + loopscan = scan->first; + while (1) { + if (loopscan->stmt) + loopscan->stmt->flags |= StmtFlag_10; + if (loopscan->type != IROLinearLabel && !(loopscan->flags & IROLF_Reffed)) { + IRO_DuplicateExpr(loopscan, &list); + if (!loophead25) + loophead25 = list.head; + } + if (loopscan == loopend) + break; + loopscan = loopscan->next; + } + + if (list.head && list.tail) + IRO_Paste(list.head, list.tail, fnode2->last); + } + + IRO_InitList(&list); + + if (ind->nd->type == IROLinearOp1Arg) + IRO_DuplicateExpr(ind->nd->u.monadic, &list); + else + IRO_DuplicateExpr(ind->nd->u.diadic.left, &list); + list.tail->flags &= ~IROLF_Assigned; + indvar = list.tail; + + IRO_DuplicateExpr(preAlignTemp, &list); + list.tail->flags &= ~IROLF_Assigned; + + less = IRO_NewLinear(IROLinearOp2Arg); + less->nodetype = ELESS; + less->rtype = TYPE(&stbool); + less->index = ++IRO_NumLinear; + less->next = NULL; + less->u.diadic.left = indvar; + less->u.diadic.right = list.tail; + IRO_AddToList(less, &list); + less->flags |= IROLF_Reffed; + + loopExitTest = BuildLoopExitTest(LoopNode->last->type, &list); + loopExitTest->u.label.label = label; + loopExitTest->u.label.x4 = less; + loopExitTest->u.label.x4->flags |= IROLF_Reffed; + loopExitTest->rtype = LoopNode->last->rtype; + IRO_Paste(list.head, list.tail, fnode2->last); + saveTail = list.tail; + + IRO_InitList(&list); + label2 = IRO_NewLabel(); + gotond = IRO_NewLinear(IROLinearOp1Arg); + gotond->index = ++IRO_NumLinear; + gotond->type = IROLinearGoto; + gotond->u.label.label = label2; + IRO_AddToList(gotond, &list); + IRO_Paste(list.head, list.tail, fnode2->last); + + IRO_InitList(&list); + label3 = BuildLabel(&list); + IRO_Paste(list.head, list.tail, fnode2->last); + savedHead3 = list.head; + + UnrollWhileLoopBody(header, fnode2, fnode3, loop, &pattern, unrollFactor); + updIndInc = UpdateInductionIncrement(loop, 8 * unrollFactor, fnode2->last); + + IRO_InitList(&list); + label2nd = IRO_NewLinear(IROLinearLabel); + label2nd->index = IRO_NumLinear++; + label2nd->u.label.label = label2; + label2nd->flags |= IROLF_1; + IRO_AddToList(label2nd, &list); + IRO_Paste(list.head, list.tail, fnode2->last); + + IRO_InitList(&list); + + if (ind->nd->type == IROLinearOp1Arg) + IRO_DuplicateExpr(ind->nd->u.monadic, &list); + else + IRO_DuplicateExpr(ind->nd->u.diadic.left, &list); + list.tail->flags &= ~IROLF_Assigned; + indvar = list.tail; + + IRO_DuplicateExpr(unrolledFinalValue, &list); + list.tail->flags &= ~IROLF_Assigned; + + less2 = IRO_NewLinear(IROLinearOp2Arg); + less2->nodetype = ELESS; + less2->rtype = TYPE(&stbool); + less2->index = ++IRO_NumLinear; + less2->next = NULL; + less2->u.diadic.left = indvar; + less2->u.diadic.right = list.tail; + IRO_AddToList(less2, &list); + less2->flags |= IROLF_Reffed; + + loopExitTest = BuildLoopExitTest(LoopNode->last->type, &list); + loopExitTest->u.label.label = label3; + loopExitTest->u.label.x4 = less2; + loopExitTest->u.label.x4->flags |= IROLF_Reffed; + loopExitTest->rtype = LoopNode->last->rtype; + IRO_Paste(list.head, list.tail, fnode2->last); + saveTail2 = list.tail; + + IRO_InitList(&list); + + if (ind->nd->type == IROLinearOp1Arg) + IRO_DuplicateExpr(ind->nd->u.monadic, &list); + else + IRO_DuplicateExpr(ind->nd->u.diadic.left, &list); + list.tail->flags &= ~IROLF_Assigned; + indvar = list.tail; + + IRO_DuplicateExpr(newFinalValue, &list); + list.tail->flags &= ~IROLF_Assigned; + + less3 = IRO_NewLinear(IROLinearOp2Arg); + less3->nodetype = ELESS; + less3->rtype = TYPE(&stbool); + less3->index = ++IRO_NumLinear; + less3->next = NULL; + less3->u.diadic.left = indvar; + less3->u.diadic.right = list.tail; + IRO_AddToList(less3, &list); + less3->flags |= IROLF_Reffed; + + wtf = LoopNode->last->u.label.x4; + IRO_Paste(list.head, list.tail, LoopNode->last); + LoopNode->last->u.label.x4 = list.tail; + + IRO_InitList(&list); + + constnd = IRO_NewLinear(IROLinearOperand); + constnd->index = ++IRO_NumLinear; + expr = IRO_NewENode(EINTCONST); + expr->rtype = wtf->u.diadic.left->rtype; + expr->data.intval = cint64_zero; + constnd->u.node = expr; + constnd->rtype = expr->rtype; + IRO_AddToList(constnd, &list); + constnd->flags |= IROLF_Reffed; + + IRO_DuplicateExpr(wtf->u.diadic.left, &list); + + ass = IRO_NewLinear(IROLinearOp2Arg); + ass->nodetype = EASS; + ass->rtype = list.tail->rtype; + ass->index = ++IRO_NumLinear; + ass->next = NULL; + ass->u.diadic.left = list.tail; + ass->u.diadic.right = constnd; + IRO_AddToList(ass, &list); + ass->flags |= IROLF_Assigned; + + IRO_NopOut(wtf); + + fn19 = fnode2->nextnode; + nd18 = fnode2->last; + fnode2->last = earlyLoopExitTest; + + newfnode1 = IRO_NewFlowGraphNode(); + newfnode1->first = savedHead60; + newfnode1->last = unrolledBodyEntryTest; + fnode2->nextnode = newfnode1; + + newfnode2 = IRO_NewFlowGraphNode(); + newfnode2->first = savedHead2; + newfnode2->last = saveTail; + savedHead2->u.label.label->stmt = (Statement *) newfnode2; + newfnode1->nextnode = newfnode2; + + newfnode3 = IRO_NewFlowGraphNode(); + newfnode3->first = gotond; + newfnode3->last = gotond; + newfnode2->nextnode = newfnode3; + + newfnode4 = IRO_NewFlowGraphNode(); + newfnode4->first = savedHead3; + newfnode4->last = updIndInc; + savedHead3->u.label.label->stmt = (Statement *) newfnode4; + newfnode3->nextnode = newfnode4; + + newfnode5 = IRO_NewFlowGraphNode(); + newfnode5->first = label2nd; + newfnode5->last = saveTail2; + label2nd->u.label.label->stmt = (Statement *) newfnode5; + newfnode4->nextnode = newfnode5; + + newfnode6 = IRO_NewFlowGraphNode(); + newfnode6->first = nd18; + newfnode6->last = nd18; + newfnode5->nextnode = newfnode6; + newfnode6->nextnode = fn19; + + newfnode7 = oalloc(sizeof(IRONode)); + memset(newfnode7, 0, sizeof(IRONode)); + newfnode7->index = IRO_NumNodes; + IRO_NumNodes++; + + newfnode7->first = list.head; + newfnode7->last = list.tail; + + list.tail->next = LoopNode->last->next; + LoopNode->last->next = list.head; + + newfnode7->nextnode = LoopNode->nextnode; + LoopNode->nextnode = newfnode7; + + newfnode8 = oalloc(sizeof(IRONode)); + memset(newfnode8, 0, sizeof(IRONode)); + newfnode8->index = IRO_NumNodes; + IRO_NumNodes++; + + lastnd = IRO_NewLinear(IROLinearLabel); + lastnd->index = IRO_NumLinear++; + lastnd->next = NULL; + lastnd->u.label.label = earlyLoopExitTestLabel; + lastnd->flags |= IROLF_1; + earlyLoopExitTestLabel->stmt = (Statement *) newfnode8; + + newfnode8->first = lastnd; + newfnode8->last = lastnd; + + lastnd->next = newfnode7->last->next; + newfnode7->last->next = lastnd; + + newfnode8->nextnode = newfnode7->nextnode; + newfnode7->nextnode = newfnode8; + + return 1; +} + +void IRO_IterateForLoopBody(IRONode *start, IRONode *end, IROLoop *loop, IROLinear *destnode, SInt32 addConst, CInt64 *val, Boolean funkyFlag) { + IROLinear *first = NULL; + IROLinear *last = NULL; + IRONode *fnode; + IROLinear *lastnd; + IROLinear *nd; + IROList list; + + for (fnode = start; fnode && fnode != end; fnode = fnode->nextnode) { + IRO_InitList(&list); + + lastnd = fnode->last; + nd = fnode->first; + while (1) { + if (nd->stmt) + nd->stmt->flags |= StmtFlag_10; + + if ( + (nd->index < loop->index20 || nd->index > loop->index24) && + nd->type != IROLinearLabel && + !(nd->flags & IROLF_Reffed) + ) + { + IRO_DuplicateExpr(nd, &list); + if (!first) + first = list.head; + last = list.tail; + } + + if (nd == lastnd) + break; + nd = nd->next; + } + + if (list.head && list.tail) + IRO_Paste(list.head, list.tail, destnode); + } + + if (funkyFlag) { + *val = CInt64_Add(*val, IRO_MakeLong(loop->induction->addConst)); + ChangeInductionReference(first, last, *val, loop); + } +} + +void IRO_LinearizeForLoopPostLoop(IRONode *fnode1, IRONode *fnode2, IROLoop *loop, IRONode *fnode3, UInt32 unrollFactor) { + IRONode *newfnode; + IROLinear *newnd; + SInt32 i; + CInt64 val; + + newfnode = oalloc(sizeof(IRONode)); + memset(newfnode, 0, sizeof(IRONode)); + newfnode->index = IRO_NumNodes; + IRO_NumNodes++; + + newnd = IRO_NewLinear(IROLinearNop); + newnd->index = IRO_NumLinear++; + newnd->next = NULL; + newnd->flags |= IROLF_1; + + newfnode->first = newfnode->last = newnd; + + newfnode->nextnode = fnode3->nextnode; + fnode3->nextnode = newfnode; + + newnd->next = fnode3->last->next; + fnode3->last->next = newnd; + + val = cint64_zero; + for (i = 0; i < unrollFactor; i++) + IRO_IterateForLoopBody(fnode2, fnode1, loop, newfnode->last, loop->induction->addConst, &val, i > 0); + UpdateInductionIncrement(loop, unrollFactor, newfnode->last); +} + +static UInt32 UnrollForLoop(IRONode *header, IRONode *fnode2, IRONode *fnode3, IROLoop *loop, UInt32 unrollFactor) { + IROLinear *lastlabelnode; + IROLinear *earlyLoopExitTest; + IROLinear *origIterationCount; + IROLinear *saveHead1; + IROLinear *newFinalValue; + IROLinear *unrolledBodyEntryTest; + IROLinear *gotoNd; + IROLinear *saveHead2; + IROLinear *updIndInc; + IROLinear *labelNd; + IROLinear *saveTail2; + IROLinear *ndCopy; + IROLinear *saveTail3; + IROLinear *loopExitTest; + IROLinear *lastnd; + IROLinear *labelNd2; + IROLinear *saveTail4; + IROLinear *labelNd3; + IROLinear *scan; + IRONode *nd18; + IRONode *newfnode1; + IRONode *newfnode2; + IRONode *newfnode3; + IRONode *newfnode4; + IRONode *newfnode5; + IRONode *newfnode6; + CLabel *lastlabel; + CLabel *earlyLoopExitTestLabel; + CLabel *label; + CLabel *label2; + SInt32 i; + + IROList list; + CInt64 iterCount; + int isConstant; + UInt32 needOrigLoop = 0; + UInt32 needUnrollBodyTest = 0; + UInt32 resetUnrolledFinalValue = 0; + SInt32 leftOver; + CInt64 val; + + lastlabelnode = IRO_FindLabelNode(fnode2->last->u.label.label, fnode2->last); + lastlabel = IRO_NewLabel(); + + IRO_InitList(&list); + IRO_DuplicateExprRange(lastlabelnode->next, LoopNode->last->u.label.x4, &list); + IRO_DuplicateExpr(LoopNode->last->u.label.x4, &list); + IRO_Paste(list.head, list.tail, fnode2->last); + lastlabelnode = list.tail; + + IRO_InitList(&list); + earlyLoopExitTest = BuildEarlyLoopExitTest(LoopNode->last->type, &list); + earlyLoopExitTestLabel = IRO_NewLabel(); + earlyLoopExitTest->u.label.label = earlyLoopExitTestLabel; + earlyLoopExitTest->u.label.x4 = lastlabelnode; + earlyLoopExitTest->u.label.x4->flags |= IROLF_Reffed; + earlyLoopExitTest->rtype = LoopNode->last->rtype; + IRO_Paste(list.head, list.tail, fnode2->last); + + isConstant = IsIterationCountConstant(loop, &iterCount); + needOrigLoop = 1; + needUnrollBodyTest = 1; + resetUnrolledFinalValue = 0; + if (isConstant) + IRO_TestConstantIterationCount(loop, &iterCount, 1, &unrollFactor, &leftOver, &needOrigLoop, &needUnrollBodyTest, &resetUnrolledFinalValue); + + IRO_InitList(&list); + origIterationCount = BuildOrigIterationCount(&list, loop); + IRO_Paste(list.head, list.tail, fnode2->last); + saveHead1 = list.head; + + IRO_InitList(&list); + newFinalValue = BuildNewFinalvalue(origIterationCount, unrollFactor, &list, loop); + IRO_Paste(list.head, list.tail, fnode2->last); + + IRO_InitList(&list); + BuildUnrolledBodyEntryTest(&list, origIterationCount, unrollFactor, lastlabel); + IRO_Paste(list.head, list.tail, fnode2->last); + unrolledBodyEntryTest = list.tail; + + label = IRO_NewLabel(); + IRO_InitList(&list); + gotoNd = IRO_NewLinear(IROLinearOp1Arg); + gotoNd->index = ++IRO_NumLinear; + gotoNd->type = IROLinearGoto; + gotoNd->u.label.label = label; + IRO_AddToList(gotoNd, &list); + IRO_Paste(list.head, list.tail, fnode2->last); + + IRO_InitList(&list); + label2 = BuildLabel(&list); + IRO_Paste(list.head, list.tail, fnode2->last); + saveHead2 = list.head; + + val = cint64_zero; + for (i = 0; i < unrollFactor; i++) + IRO_IterateForLoopBody(fnode3, header, loop, fnode2->last, loop->induction->addConst, &val, i > 0); + updIndInc = UpdateInductionIncrement(loop, unrollFactor, fnode2->last); + + IRO_InitList(&list); + labelNd = IRO_NewLinear(IROLinearLabel); + labelNd->index = IRO_NumLinear++; + labelNd->u.label.label = label; + labelNd->flags |= IROLF_1; + IRO_AddToList(labelNd, &list); + IRO_Paste(list.head, list.tail, fnode2->last); + + IRO_InitList(&list); + + IRO_DuplicateExpr(LoopNode->last->u.label.x4->u.diadic.left, &list); + saveTail2 = list.tail; + + if (resetUnrolledFinalValue) + IRO_DuplicateExpr(loop->nd18->u.diadic.right, &list); + else + IRO_DuplicateExpr(newFinalValue, &list); + + ndCopy = IRO_NewLinear(LoopNode->last->u.label.x4->type); + *ndCopy = *LoopNode->last->u.label.x4; + ndCopy->index = ++IRO_NumLinear; + ndCopy->next = NULL; + ndCopy->expr = NULL; + ndCopy->u.diadic.left = saveTail2; + ndCopy->u.diadic.right = list.tail; + IRO_AddToList(ndCopy, &list); + + IRO_Paste(list.head, list.tail, fnode2->last); + saveTail3 = list.tail; + + IRO_InitList(&list); + loopExitTest = BuildLoopExitTest(LoopNode->last->type, &list); + loopExitTest->u.label.label = label2; + loopExitTest->u.label.x4 = saveTail3; + loopExitTest->u.label.x4->flags |= IROLF_Reffed; + loopExitTest->rtype = LoopNode->last->rtype; + IRO_Paste(list.head, list.tail, fnode2->last); + saveTail4 = list.tail; + + IRO_InitList(&list); + labelNd2 = IRO_NewLinear(IROLinearLabel); + labelNd2->index = IRO_NumLinear++; + labelNd2->u.label.label = lastlabel; + labelNd2->flags |= IROLF_1; + IRO_AddToList(labelNd2, &list); + IRO_Paste(list.head, list.tail, fnode2->last); + + lastnd = fnode2->last; + nd18 = fnode2->nextnode; + fnode2->last = earlyLoopExitTest; + + newfnode1 = IRO_NewFlowGraphNode(); + newfnode1->first = saveHead1; + newfnode1->last = unrolledBodyEntryTest; + fnode2->nextnode = newfnode1; + + newfnode2 = IRO_NewFlowGraphNode(); + newfnode2->first = gotoNd; + newfnode2->last = gotoNd; + newfnode1->nextnode = newfnode2; + + newfnode3 = IRO_NewFlowGraphNode(); + newfnode3->first = saveHead2; + newfnode3->last = updIndInc; + + saveHead2->u.label.label->stmt = (Statement *) newfnode3; + if (newfnode2) + newfnode2->nextnode = newfnode3; + else + newfnode1->nextnode = newfnode3; + + newfnode4 = IRO_NewFlowGraphNode(); + newfnode4->first = labelNd; + newfnode4->last = saveTail4; + labelNd->u.label.label->stmt = (Statement *) newfnode4; + newfnode3->nextnode = newfnode4; + + newfnode5 = IRO_NewFlowGraphNode(); + newfnode5->first = labelNd2; + newfnode5->last = lastnd; + newfnode4->nextnode = newfnode5; + newfnode5->nextnode = nd18; + + newfnode6 = oalloc(sizeof(IRONode)); + memset(newfnode6, 0, sizeof(IRONode)); + newfnode6->index = IRO_NumNodes; + IRO_NumNodes++; + + labelNd3 = IRO_NewLinear(IROLinearLabel); + labelNd3->index = IRO_NumLinear++; + labelNd3->next = NULL; + labelNd3->u.label.label = earlyLoopExitTestLabel; + labelNd3->flags |= IROLF_1; + earlyLoopExitTestLabel->stmt = (Statement *) newfnode6; + + newfnode6->first = labelNd3; + newfnode6->last = labelNd3; + + labelNd3->next = LoopNode->last->next; + LoopNode->last->next = labelNd3; + + newfnode6->nextnode = LoopNode->nextnode; + LoopNode->nextnode = newfnode6; + + if (!needOrigLoop) { + NoOpBlock(newfnode5); + NoOpBlock(header); + NoOpBlock(fnode3); + NoOpBlock(loop->induction->fnode); + IRO_NopOut(newfnode1->last->u.label.x4); + newfnode1->last->type = IROLinearNop; + } + + if (!needUnrollBodyTest) { + IRO_NopOut(earlyLoopExitTest->u.label.x4); + earlyLoopExitTest->type = IROLinearNop; + + IRO_NopOut(newfnode4->last->u.label.x4); + newfnode4->last->type = IROLinearNop; + + if (newfnode2) + newfnode2->last->type = IROLinearNop; + + for (scan = newfnode1->first; scan; scan = scan->next) { + if (!(scan->flags & IROLF_Reffed)) + IRO_NopOut(scan); + if (scan == newfnode1->last) + break; + } + } + + return 1; +} + +static UInt32 UnrollStandardLoop(IRONode *header, IRONode *fnode2, IRONode *fnode3, int count) { + IROLoop *loop; + + ConditionalHeaderAtBottom = 1; + loop = ExtractLoopInfo(header); + loop->xC = fnode2; + loop->x10 = fnode3; + FindAssignmenttoInductionVar(loop, fnode2); + + if (!IsLoopUnrollable(loop)) { + IRO_Dump("LoopUnroll:loop with header %d not unrolled because IsLoopUnrollable failed\n", header->index); + return 0; + } + + if (loop->flags & LoopFlags_10000) + return UnrollWhileLoop(header, fnode2, fnode3, loop, count); + else + return UnrollForLoop(header, fnode2, fnode3, loop, count); +} + +static void LoopUnroll(int count, IRONode *header) { + VarRecord *var; + IRONode *tmp; + UInt16 i; + UInt16 j; + IRONode *prevpred; + IRONode *prevsucc; + int foundpred; + UInt32 predcount; + UInt32 success = 0; + + LoopNode = header; + FindMustReach(); + + for (var = IRO_FirstVar; var; var = var->next) + var->xA = 1; + + ComputeLoopKills(); + ComputeLoopInvariance(); + ComputeLoopInduction(); + + LoopNode = header; + ConditionalHeaderAtBottom = 0; + + prevpred = NULL; + foundpred = 0; + for (i = 0; i < LoopNode->numpred; i++) { + tmp = IRO_NodeTable[LoopNode->pred[i]]; + if (!Bv_IsBitSet(tmp->index, InLoop)) { + foundpred = 1; + if (tmp->nextnode == header) { + CError_ASSERT(2101, !prevpred || tmp == prevpred); + prevpred = tmp; + } + } + } + + if (!foundpred) { + IRO_Dump("No predecessor outside the loop\n"); + return; + } + + if (LoopNode->last->type == IROLinearIf || LoopNode->last->type == IROLinearIfNot) { + if (LoopNode->nextnode && !Bv_IsBitSet(LoopNode->nextnode->index, InLoop)) { + prevsucc = NULL; + for (i = 0; i < LoopNode->numsucc; i++) { + tmp = IRO_NodeTable[LoopNode->succ[i]]; + if (Bv_IsBitSet(tmp->index, InLoop)) { + CError_ASSERT(2159, !prevsucc); + prevsucc = tmp; + } + } + + prevpred = NULL; + predcount = 0; + for (j = 0; j < LoopNode->numpred; j++) { + tmp = IRO_NodeTable[LoopNode->pred[j]]; + if (!Bv_IsBitSet(tmp->index, InLoop)) { + prevpred = tmp; + predcount++; + } + } + + if ( + predcount == 1 && + prevpred->last->type == IROLinearGoto && + prevpred->nextnode == prevsucc && + prevsucc != LoopNode + ) + { + success = UnrollStandardLoop(header, prevpred, prevsucc, count); + } + } + } else { + IRO_Dump(" LoopUnroll:Loop with header = %d is not a conditional loop\n", header->index); + } + + if (!success) + return; + + IRO_NodeTable = oalloc(sizeof(IRONode *) * IRO_NumNodes); + memset(IRO_NodeTable, 0, sizeof(IRONode *) * IRO_NumNodes); + for (tmp = IRO_FirstNode; tmp; tmp = tmp->nextnode) + IRO_NodeTable[tmp->index] = tmp; + IRO_ComputeSuccPred(); + IRO_ComputeDom(); + if (success) + IRO_Dump(" LoopUnroll:Loop with header = %d Unrolled\n", header->index); +} + +static int IsLoopUnrollable(IROLoop *loop) { + CInt64 tmp; + + if (loop->flags & LP_LOOP_HAS_ASM) { + IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HAS_ASM \n"); + return 0; + } + if (loop->flags & LP_IFEXPR_NON_CANONICAL) { + IRO_Dump("IsLoopUnrollable:No due to LP_IFEXPR_NON_CANONICAL \n"); + return 0; + } + if (loop->flags & LP_LOOP_HAS_CALLS) { + IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HAS_CALLS \n"); + return 0; + } + if (loop->flags & LP_LOOP_HAS_CNTRLFLOW) { + IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HAS_CNTRLFLOW \n"); + return 0; + } + if (loop->flags & LP_INDUCTION_NOT_FOUND) { + IRO_Dump("IsLoopUnrollable:No due to LP_INDUCTION_NOT_FOUND \n"); + return 0; + } + if (loop->flags & LP_LOOP_HDR_HAS_SIDEEFFECTS) { + IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HDR_HAS_SIDEEFFECTS \n"); + return 0; + } + if (!(loop->flags & LoopFlags_200)) { + IRO_Dump("IsLoopUnrollable:No because header does not follow induction update \n"); + return 0; + } + + if (!(loop->flags & LoopFlags_10000)) { + IROLinear *upperBound = loop->nd18->u.diadic.right; + if (!IRO_IsIntConstant(upperBound) && !(upperBound->flags & IROLF_LoopInvariant)) { + IRO_Dump("IsLoopUnrollable:No because Loop Upper Bound is Variant in the loop\n"); + return 0; + } + if (!loop->nd14) { + IRO_Dump("IsLoopUnrollable:No because there is no initialization of loop index in PreHeader\n"); + return 0; + } + if (!IRO_IsVariable(loop->nd14->u.diadic.left)) { + IRO_Dump("IsLoopUnrollable:No because initial value of induction stored thru pointer\n"); + return 0; + } + + if (!IRO_IsUnsignedType(loop->nd14->rtype)) { + if (IRO_IsIntConstant(loop->nd14->u.diadic.right)) { + if (!CInt64_GreaterEqual(loop->nd14->u.diadic.right->u.node->data.intval, cint64_zero)) { + IRO_Dump("IsLoopUnrollable:No because initial value of induction is signed but init < 0\n"); + return 0; + } + } else if (IsIterationCountConstant(loop, &tmp)) { + IRO_Dump("IsLoopUnrollable:Yes, the limits substract out to be constants\n"); + } else { + IRO_Dump("IsLoopUnrollable:No because initial value of induction is signed and not constant\n"); + return 0; + } + } + + if (!(loop->flags & LP_LOOP_STEP_ISADD)) { + IRO_Dump("IsLoopUnrollable:No because LP_LOOP_STEP_ISADD is not set i.e induciton is not updated by 1\n"); + return 0; + } + + } else { + if (!IRO_IsUnsignedType(loop->nd18->u.diadic.left->rtype)) { + IRO_Dump("IsLoopUnrollable:No because the while loop induction is signed\n"); + return 0; + } + if (!(loop->flags & LoopFlags_2000)) { + IRO_Dump("IsLoopUnrollable:No because the while loop operator is not of decrement form\n"); + return 0; + } + } + + if (loop->sizeBySomeMeasurement > copts.unrollinstrfactor) { + IRO_Dump("IsLoopUnrollable:No because loop size greater than threshold\n"); + return 0; + } + + return 1; +} + +IROLinear *BuildEarlyLoopExitTest(IROLinearType type, IROList *list) { + IROLinear *nd = IRO_NewLinear(IROLinearOp1Arg); + nd->index = ++IRO_NumLinear; + if (type == IROLinearIf) + nd->type = IROLinearIfNot; + else + nd->type = IROLinearIf; + IRO_AddToList(nd, list); + return nd; +} + +IROLinear *BuildLoopExitTest(IROLinearType type, IROList *list) { + IROLinear *nd = IRO_NewLinear(IROLinearOp1Arg); + nd->index = ++IRO_NumLinear; + nd->type = type; + IRO_AddToList(nd, list); + return nd; +} + +int IsIterationCountConstant(IROLoop *loop, CInt64 *pval) { + IROLinear *lowerBound; + IROLinear *upperBound; + Type *type; + int isUnsigned; + IROAddrRecord *lowerRec; + IROAddrRecord *upperRec; + CInt64 lowerval; + CInt64 upperval; + CInt64 incval; + CInt64 negOne; + + lowerBound = loop->nd14->u.diadic.right; + if (loop->flags & LoopFlags_1) { + upperBound = loop->nd18->u.diadic.right; + type = loop->nd18->u.diadic.right->rtype; + } else { + upperBound = loop->nd18->u.diadic.left; + type = loop->nd18->u.diadic.left->rtype; + } + + isUnsigned = IRO_IsUnsignedType(type); + + if (IRO_IsIntConstant(lowerBound) && IRO_IsIntConstant(upperBound)) { + lowerval = lowerBound->u.node->data.intval; + upperval = upperBound->u.node->data.intval; + if (isUnsigned) { + if (CInt64_LessEqualU(upperval, lowerval)) + return 0; + } else { + if (CInt64_LessEqual(upperval, lowerval)) + return 0; + } + + CInt64_SetLong(&incval, loop->induction->addConst); + CInt64_SetLong(&negOne, -1); + *pval = CInt64_Sub(upperval, lowerval); + *pval = CInt64_Add(*pval, incval); + + if (IS_LINEAR_DIADIC(loop->nd18, ELESS)) + *pval = CInt64_Add(*pval, negOne); + + CError_ASSERT(2486, !CInt64_IsZero(&incval)); + + if (isUnsigned) + *pval = CInt64_DivU(*pval, incval); + else + *pval = CInt64_Div(*pval, incval); + + if (CInt64_Equal(*pval, cint64_zero)) + return 0; + + if (isUnsigned) { + CError_ASSERT(2508, !CInt64_LessEqualU(*pval, cint64_zero)); + } else { + CError_ASSERT(2517, !CInt64_LessEqual(*pval, cint64_zero)); + } + + return 1; + } + + lowerRec = IRO_InitAddrRecordPointer(lowerBound); + upperRec = IRO_InitAddrRecordPointer(upperBound); + + if (IS_LINEAR_DIADIC(lowerBound, EADD)) { + IRO_DecomposeAddressExpression(lowerBound, lowerRec); + } else if (IRO_IsIntConstant(lowerBound)) { + lowerRec->numInts++; + IRO_AddElmToList(lowerBound, &lowerRec->ints); + lowerRec->numObjRefs = 0; + lowerRec->numMisc = 0; + } else { + lowerRec->numMisc++; + IRO_AddElmToList(lowerBound, &lowerRec->misc); + lowerRec->numObjRefs = 0; + lowerRec->numInts = 0; + } + + if (IS_LINEAR_DIADIC(upperBound, EADD)) { + IRO_DecomposeAddressExpression(upperBound, upperRec); + } else if (IRO_IsIntConstant(upperBound)) { + upperRec->numInts++; + IRO_AddElmToList(upperBound, &upperRec->ints); + upperRec->numObjRefs = 0; + upperRec->numMisc = 0; + } else { + upperRec->numMisc++; + IRO_AddElmToList(upperBound, &upperRec->misc); + upperRec->numObjRefs = 0; + upperRec->numInts = 0; + } + + if (IsDifferenceOfTermsConstant(lowerRec, upperRec, isUnsigned, pval)) { + if (IS_LINEAR_DIADIC(loop->nd18, ELESSEQU)) + *pval = CInt64_Add(*pval, cint64_one); + return 1; + } + + return 0; +} + +static int IsDifferenceOfTermsConstant(IROAddrRecord *lowerRec, IROAddrRecord *upperRec, int isUnsigned, CInt64 *pval) { + UInt32 i; + CInt64 upperval; + CInt64 lowerval; + IROElmList *el; + IROLinear *nd; + + if (upperRec->numObjRefs == lowerRec->numObjRefs && upperRec->numObjRefs != 0) + return 0; + else if (upperRec->numObjRefs != lowerRec->numObjRefs) + return 0; + + if (upperRec->numMisc == lowerRec->numMisc && upperRec->numMisc != 0) { + for (i = 0; i < upperRec->numMisc; i++) { + // bug? surely this should index on i...? + if (!IRO_ExprsSame(lowerRec->misc->element, upperRec->misc->element)) + return 0; + } + } else if (upperRec->numMisc != lowerRec->numMisc) { + return 0; + } + + upperval = cint64_zero; + for (el = upperRec->ints; el; el = el->next) { + nd = el->element; + upperval = CMach_CalcIntDiadic(nd->rtype, upperval, '+', nd->u.node->data.intval); + } + + lowerval = cint64_zero; + for (el = lowerRec->ints; el; el = el->next) { + nd = el->element; + lowerval = CMach_CalcIntDiadic(nd->rtype, lowerval, '+', nd->u.node->data.intval); + } + + if (CInt64_Equal(upperval, lowerval)) + return 0; + + if (CInt64_Greater(upperval, lowerval)) { + *pval = CInt64_Sub(upperval, lowerval); + return 1; + } else { + return 0; + } +} + +void NoOpBlock(IRONode *fnode) { + IROLinear *last, *scan; + + for (scan = fnode->first, last = fnode->last; scan; scan = scan->next) { + scan->type = IROLinearNop; + if (scan == last) + break; + } +} + +void IRO_TestConstantIterationCount(IROLoop *loop, CInt64 *iterCount, SInt32 vectorStride, UInt32 *unrollFactor, SInt32 *leftOver, UInt32 *needOrigLoop, UInt32 *needUnrollBodyTest, UInt32 *resetUnrolledFinalValue) { + UInt32 isUnsigned; + CInt64 val; + CInt64 val3; + CInt64 mod; + CInt64 val2; + CInt64 loopvar3; + CInt64 loopvar1; + CInt64 loopvar2; + CInt64 strideVal; + + CError_ASSERT(2737, *unrollFactor); + + isUnsigned = IRO_IsUnsignedType( + (loop->flags & LoopFlags_1) ? loop->nd18->u.diadic.right->rtype :loop->nd18->u.diadic.left->rtype); + + CError_ASSERT(2756, vectorStride); + + strideVal = IRO_MakeLong(vectorStride); + if (isUnsigned ? CInt64_LessU(*iterCount, strideVal) : CInt64_Less(*iterCount, strideVal)) { + *needOrigLoop = 1; + *needUnrollBodyTest = 0; + *unrollFactor = 0; + *leftOver = CInt64_GetULong(iterCount); + } else { + switch (vectorStride) { + case 1: + val = *iterCount; + break; + case 2: + val = CInt64_ShrU(*iterCount, cint64_one); + break; + case 4: + val = CInt64_ShrU(*iterCount, IRO_MakeLong(2)); + break; + case 8: + val = CInt64_ShrU(*iterCount, IRO_MakeLong(3)); + break; + case 16: + val = CInt64_ShrU(*iterCount, IRO_MakeLong(4)); + break; + default: + val = CInt64_Div(*iterCount, strideVal); + } + + if (CInt64_LessU(val, IRO_MakeLong(*unrollFactor))) + *unrollFactor = CInt64_GetULong(&val); + + CInt64_SetLong(&val2, *unrollFactor); + switch (vectorStride) { + case 1: + val3 = cint64_zero; + break; + case 2: + val3 = CInt64_And(*iterCount, cint64_one); + break; + case 4: + val3 = CInt64_And(*iterCount, IRO_MakeLong(3)); + break; + case 8: + val3 = CInt64_And(*iterCount, IRO_MakeLong(7)); + break; + case 16: + val3 = CInt64_And(*iterCount, IRO_MakeLong(15)); + break; + default: + val3 = CInt64_Mod(*iterCount, strideVal); + } + + if (CInt64_LessEqualU(val, IRO_MakeLong(8))) { + *needUnrollBodyTest = vectorStride > 1; + *unrollFactor = CInt64_GetULong(&val); + *leftOver = CInt64_GetULong(&val3); + *needOrigLoop = *leftOver != 0; + *resetUnrolledFinalValue = !(*needOrigLoop && *needUnrollBodyTest); + } else { + loopvar1 = IRO_MakeLong(0x7FFFFFFF); + loopvar2 = IRO_MakeLong(0x7FFFFFFF); + do { + mod = CInt64_Mod(val, val2); + loopvar3 = CInt64_Add(CInt64_Mul(mod, strideVal), val3); + if (CInt64_Less(loopvar3, loopvar2)) { + loopvar2 = loopvar3; + loopvar1 = val2; + } + if (vectorStride > 1) + break; + val2 = CInt64_Add(val2, cint64_negone); + } while (CInt64_GreaterEqualU(CInt64_Mul(val2, val2), val)); + + *unrollFactor = CInt64_GetULong(&loopvar1); + *leftOver = CInt64_GetULong(&loopvar2); + *needOrigLoop = *leftOver != 0; + *needUnrollBodyTest = CInt64_Less(loopvar1, val) || vectorStride > 1; + *resetUnrolledFinalValue = !(*needOrigLoop && *needUnrollBodyTest); + } + } + + IRO_Dump( + "---- IterCount = %d, VectorStride = %d, UnrollFactor = %d, LeftOver = %d,\n" + "\tNeedOrigLoop = %d, NeedUnrollBodyTest = %d, ResetUnrolledFinalValue = %d\n", + CInt64_GetULong(iterCount), vectorStride, *unrollFactor, *leftOver, + *needOrigLoop, *needUnrollBodyTest, *resetUnrolledFinalValue + ); +} + +IROLinear *BuildOrigIterationCount(IROList *list, IROLoop *loop) { + IROLinear *upperBound; + IROLinear *nd29b; + IROLinear *lowerBound; + IROLinear *finalCount; + IROLinear *divisor; + Type *type; + IROLinear *nd25; + IROLinear *tmp; + Boolean isZeroBase; + Object *tempobj; + IROLinear *iterCount; + IROLinear *negone; + IROLinear *ass; + ENode *expr; + SInt32 powval; + + isZeroBase = 0; + lowerBound = loop->nd14->u.diadic.right; + if (IRO_IsIntConstant(lowerBound) && CInt64_Equal(lowerBound->u.node->data.intval, cint64_zero)) + isZeroBase = 1; + + if (!isZeroBase) + lowerBound = IRO_DuplicateExpr(lowerBound, list); + + if (loop->flags & LoopFlags_1) { + upperBound = IRO_DuplicateExpr(loop->nd18->u.diadic.right, list); + type = loop->nd18->u.diadic.right->rtype; + } else { + upperBound = IRO_DuplicateExpr(loop->nd18->u.diadic.left, list); + type = loop->nd18->u.diadic.left->rtype; + } + + CError_ASSERT(2924, loop->induction); + CError_ASSERT(2929, loop->induction->addConst); + + divisor = IRO_NewLinear(IROLinearOperand); + divisor->index = ++IRO_NumLinear; + divisor->rtype = type; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + CInt64_SetLong(&expr->data.intval, loop->induction->addConst); + divisor->u.node = expr; + + if (isZeroBase) { + iterCount = upperBound; + } else { + iterCount = IRO_NewLinear(IROLinearOp2Arg); + iterCount->index = ++IRO_NumLinear; + iterCount->nodetype = ESUB; + iterCount->u.diadic.left = upperBound; + iterCount->u.diadic.right = lowerBound; + iterCount->rtype = type; + IRO_AddToList(iterCount, list); + } + + nd25 = IRO_DuplicateExpr(divisor, list); + + nd29b = IRO_NewLinear(IROLinearOp2Arg); + nd29b->index = ++IRO_NumLinear; + nd29b->nodetype = EADD; + nd29b->u.diadic.left = iterCount; + nd29b->u.diadic.right = nd25; + nd29b->rtype = type; + IRO_AddToList(nd29b, list); + + if (loop->nd18->type == IROLinearOp2Arg && loop->nd18->nodetype == ELESS) { + tmp = nd29b; + + negone = IRO_NewLinear(IROLinearOperand); + negone->index = ++IRO_NumLinear; + negone->rtype = type; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + CInt64_SetLong(&expr->data.intval, -1); + negone->u.node = expr; + IRO_AddToList(negone, list); + + nd29b = IRO_NewLinear(IROLinearOp2Arg); + nd29b->index = ++IRO_NumLinear; + nd29b->nodetype = EADD; + nd29b->u.diadic.left = tmp; + nd29b->u.diadic.right = negone; + nd29b->rtype = type; + IRO_AddToList(nd29b, list); + } + + if (CInt64_Equal(divisor->u.node->data.intval, cint64_one)) { + finalCount = nd29b; + } else { + if (divisor->rtype->size <= 4 && IS_TYPE_INT(divisor->rtype) && IRO_IsPow2(divisor, &powval)) { + finalCount = IRO_NewLinear(IROLinearOp2Arg); + finalCount->index = ++IRO_NumLinear; + finalCount->nodetype = ESHL; + finalCount->u.diadic.left = nd29b; + finalCount->u.diadic.right = divisor; + CInt64_SetLong(&divisor->u.node->data.intval, powval); + finalCount->rtype = type; + IRO_AddToList(divisor, list); + IRO_AddToList(finalCount, list); + } else { + finalCount = IRO_NewLinear(IROLinearOp2Arg); + finalCount->index = ++IRO_NumLinear; + finalCount->nodetype = EDIV; + finalCount->u.diadic.left = nd29b; + finalCount->u.diadic.right = divisor; + finalCount->rtype = type; + IRO_AddToList(divisor, list); + IRO_AddToList(finalCount, list); + } + } + + tempobj = create_temp_object(type); + IRO_FindVar(tempobj, 1, 1); + + ass = IRO_NewLinear(IROLinearOp2Arg); + ass->index = ++IRO_NumLinear; + ass->nodetype = EASS; + ass->u.diadic.left = IRO_TempReference(tempobj, list); + ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.right = finalCount; + ass->u.diadic.right->flags |= IROLF_Reffed; + ass->rtype = type; + IRO_AddToList(ass, list); + + return ass->u.diadic.left; +} + +static IROLinear *BuildOrigIterationCount_DoWhile(IROList *list, IROLoop *loop) { + IROLinear *finalCount; + IROLinear *count; + IROLinear *ass; + Type *type; + Object *tempobj; + ENode *expr; + + type = loop->nd18->u.diadic.left->rtype; + + count = IRO_NewLinear(IROLinearOperand); + count->index = ++IRO_NumLinear; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + expr->data.intval = cint64_one; + count->u.node = expr; + count->rtype = type; + IRO_AddToList(count, list); + count->flags |= IROLF_Reffed; + + finalCount = IRO_NewLinear(IROLinearOp2Arg); + finalCount->index = ++IRO_NumLinear; + finalCount->nodetype = EADD; + finalCount->rtype = type; + finalCount->u.diadic.left = IRO_DuplicateExpr(loop->nd18->u.diadic.left, list); + finalCount->u.diadic.left->flags |= IROLF_Reffed; + finalCount->u.diadic.left->flags &= ~IROLF_Assigned; + finalCount->u.diadic.left->u.monadic->flags &= ~IROLF_Assigned; + finalCount->u.diadic.right = count; + IRO_AddToList(finalCount, list); + + tempobj = create_temp_object(type); + IRO_FindVar(tempobj, 1, 1); + + ass = IRO_NewLinear(IROLinearOp2Arg); + ass->index = ++IRO_NumLinear; + ass->nodetype = EASS; + ass->u.diadic.left = IRO_TempReference(tempobj, list); + ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.right = finalCount; + ass->rtype = type; + IRO_AddToList(ass, list); + + return ass->u.diadic.left; +} + +IROLinear *BuildNewFinalvalue(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop) { + IROLinear *sub; + IROLinear *addvalue; + Type *type; + IROLinear *ass; + IROLinear *dupbound; + Object *tempobj; + ENode *expr; + + type = iterCount->rtype; + + addvalue = IRO_NewLinear(IROLinearOperand); + addvalue->index = ++IRO_NumLinear; + addvalue->rtype = type; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + CInt64_SetLong(&expr->data.intval, loop->induction->addConst * unrollFactor); + addvalue->u.node = expr; + IRO_AddToList(addvalue, list); + + if (loop->flags & LoopFlags_1) + dupbound = IRO_DuplicateExpr(loop->nd18->u.diadic.right, list); + else + dupbound = IRO_DuplicateExpr(loop->nd18->u.diadic.left, list); + + sub = IRO_NewLinear(IROLinearOp2Arg); + sub->index = ++IRO_NumLinear; + sub->nodetype = ESUB; + sub->u.diadic.left = dupbound; + sub->u.diadic.right = addvalue; + sub->rtype = type; + IRO_AddToList(sub, list); + + tempobj = create_temp_object(type); + IRO_FindVar(tempobj, 1, 1); + + ass = IRO_NewLinear(IROLinearOp2Arg); + ass->index = ++IRO_NumLinear; + ass->nodetype = EASS; + ass->u.diadic.left = IRO_TempReference(tempobj, list); + ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.right = sub; + ass->u.diadic.right->flags |= IROLF_Reffed; + ass->rtype = type; + IRO_AddToList(ass, list); + + return ass->u.diadic.left; +} + +static IROLinear *BuildPreAlignTemp(IROLoopInd *ind, UInt32 unrollFactor, IROList *list) { + Type *type; + IROLinear *indnd; + IROLinear *factornd; + IROLinear *div; + IROLinear *constnd; + IROLinear *add; + IROLinear *mul; + IROLinear *ass; + Object *tempobj; + ENode *expr; + + indnd = ind->nd; + type = indnd->rtype; + + factornd = IRO_NewLinear(IROLinearOperand); + factornd->index = ++IRO_NumLinear; + factornd->rtype = type; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + CInt64_SetLong(&expr->data.intval, unrollFactor); + factornd->u.node = expr; + IRO_AddToList(factornd, list); + + if (indnd->type == IROLinearOp1Arg) + IRO_DuplicateExpr(indnd->u.monadic, list); + else + IRO_DuplicateExpr(indnd->u.diadic.left, list); + + list->tail->flags &= ~IROLF_Assigned; + list->tail->u.monadic->flags &= ~IROLF_Assigned; + + div = IRO_NewLinear(IROLinearOp2Arg); + div->index = ++IRO_NumLinear; + div->nodetype = EDIV; + div->u.diadic.left = list->tail; + div->u.diadic.right = factornd; + div->rtype = type; + IRO_AddToList(div, list); + div->flags |= IROLF_Reffed; + + constnd = IRO_NewLinear(IROLinearOperand); + constnd->index = ++IRO_NumLinear; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + expr->data.intval = cint64_one; + constnd->u.node = expr; + constnd->rtype = type; + IRO_AddToList(constnd, list); + constnd->flags |= IROLF_Reffed; + + add = IRO_NewLinear(IROLinearOp2Arg); + add->index = ++IRO_NumLinear; + add->nodetype = EADD; + add->u.diadic.left = div; + add->u.diadic.right = constnd; + add->rtype = type; + IRO_AddToList(add, list); + add->flags |= IROLF_Reffed; + + IRO_DuplicateExpr(factornd, list); + + mul = IRO_NewLinear(IROLinearOp2Arg); + mul->index = ++IRO_NumLinear; + mul->nodetype = EMUL; + mul->u.diadic.left = add; + mul->u.diadic.right = list->tail; + mul->rtype = type; + IRO_AddToList(mul, list); + mul->flags |= IROLF_Reffed; + + tempobj = create_temp_object(type); + IRO_FindVar(tempobj, 1, 1); + + ass = IRO_NewLinear(IROLinearOp2Arg); + ass->index = ++IRO_NumLinear; + ass->nodetype = EASS; + ass->u.diadic.left = IRO_TempReference(tempobj, list); + ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.right = mul; + ass->u.diadic.right->flags |= IROLF_Reffed; + ass->rtype = type; + IRO_AddToList(ass, list); + + return ass->u.diadic.left; +} + +static IROLinear *BuildNewFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop) { + IROLinear *addvalue; + IROLinear *add; + IROLinear *mul; + IROLinear *ass; + Type *type; + Object *tempobj; + ENode *expr; + + type = iterCount->rtype; + + addvalue = IRO_NewLinear(IROLinearOperand); + addvalue->index = ++IRO_NumLinear; + addvalue->rtype = type; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + CInt64_SetLong(&expr->data.intval, loop->induction->addConst); + addvalue->u.node = expr; + IRO_AddToList(addvalue, list); + addvalue->flags |= IROLF_Reffed; + + mul = IRO_NewLinear(IROLinearOp2Arg); + mul->index = ++IRO_NumLinear; + mul->nodetype = EMUL; + mul->u.diadic.left = IRO_DuplicateExpr(iterCount, list); + mul->u.diadic.right = addvalue; + mul->rtype = type; + IRO_AddToList(mul, list); + mul->flags |= IROLF_Reffed; + mul->u.diadic.left->flags &= ~IROLF_Assigned; + mul->u.diadic.left->u.diadic.left->flags &= ~IROLF_Assigned; + + if (loop->induction->nd->type == IROLinearOp1Arg) + IRO_DuplicateExpr(loop->induction->nd->u.monadic, list); + else + IRO_DuplicateExpr(loop->induction->nd->u.diadic.left, list); + list->tail->flags &= ~IROLF_Assigned; + list->tail->u.diadic.left->flags &= ~IROLF_Assigned; + + add = IRO_NewLinear(IROLinearOp2Arg); + add->index = ++IRO_NumLinear; + add->nodetype = EADD; + add->u.diadic.left = mul; + add->u.diadic.right = list->tail; + add->rtype = type; + IRO_AddToList(add, list); + add->flags |= IROLF_Reffed; + + tempobj = create_temp_object(type); + IRO_FindVar(tempobj, 1, 1); + + ass = IRO_NewLinear(IROLinearOp2Arg); + ass->index = ++IRO_NumLinear; + ass->nodetype = EASS; + ass->u.diadic.left = IRO_TempReference(tempobj, list); + ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.right = add; + ass->u.diadic.right->flags |= IROLF_Reffed; + ass->rtype = type; + IRO_AddToList(ass, list); + + return ass->u.diadic.left; +} + +static IROLinear *BuildUnrolledFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop) { + IROLinear *addvalue_mult; + IROLinear *addvalue; + IROLinear *mul; + IROLinear *sub; + IROLinear *add; + IROLinear *ass; + Type *type; + Object *tempobj; + ENode *expr; + + type = iterCount->rtype; + + addvalue_mult = IRO_NewLinear(IROLinearOperand); + addvalue_mult->index = ++IRO_NumLinear; + addvalue_mult->rtype = type; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + CInt64_SetLong(&expr->data.intval, loop->induction->addConst * unrollFactor); + addvalue_mult->u.node = expr; + IRO_AddToList(addvalue_mult, list); + addvalue_mult->flags |= IROLF_Reffed; + + addvalue = IRO_NewLinear(IROLinearOperand); + addvalue->index = ++IRO_NumLinear; + addvalue->rtype = type; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + CInt64_SetLong(&expr->data.intval, loop->induction->addConst); + addvalue->u.node = expr; + IRO_AddToList(addvalue, list); + addvalue->flags |= IROLF_Reffed; + + mul = IRO_NewLinear(IROLinearOp2Arg); + mul->index = ++IRO_NumLinear; + mul->nodetype = EMUL; + mul->u.diadic.left = IRO_DuplicateExpr(iterCount, list); + mul->u.diadic.right = addvalue; + mul->rtype = type; + IRO_AddToList(mul, list); + mul->flags |= IROLF_Reffed; + mul->u.diadic.left->flags &= ~IROLF_Assigned; + mul->u.diadic.left->u.diadic.left->flags &= ~IROLF_Assigned; + + sub = IRO_NewLinear(IROLinearOp2Arg); + sub->index = ++IRO_NumLinear; + sub->nodetype = ESUB; + sub->u.diadic.left = mul; + sub->u.diadic.right = addvalue_mult; + sub->rtype = type; + IRO_AddToList(sub, list); + sub->flags |= IROLF_Reffed; + + if (loop->induction->nd->type == IROLinearOp1Arg) + IRO_DuplicateExpr(loop->induction->nd->u.monadic, list); + else + IRO_DuplicateExpr(loop->induction->nd->u.diadic.left, list); + list->tail->flags &= ~IROLF_Assigned; + list->tail->u.diadic.left->flags &= ~IROLF_Assigned; + + add = IRO_NewLinear(IROLinearOp2Arg); + add->index = ++IRO_NumLinear; + add->nodetype = EADD; + add->u.diadic.left = sub; + add->u.diadic.right = list->tail; + add->rtype = type; + IRO_AddToList(add, list); + add->flags |= IROLF_Reffed; + + tempobj = create_temp_object(type); + IRO_FindVar(tempobj, 1, 1); + + ass = IRO_NewLinear(IROLinearOp2Arg); + ass->index = ++IRO_NumLinear; + ass->nodetype = EASS; + ass->u.diadic.left = IRO_TempReference(tempobj, list); + ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind; + ass->u.diadic.right = add; + ass->u.diadic.right->flags |= IROLF_Reffed; + ass->rtype = type; + IRO_AddToList(ass, list); + + return ass->u.diadic.left; +} + +void BuildUnrolledBodyEntryTest(IROList *list, IROLinear *iterCount, UInt32 unrollFactor, CLabel *label) { + Type *type; + IROLinear *ifnot; + IROLinear *comp; + IROLinear *var; + IROLinear *value; + ENode *expr; + + type = iterCount->rtype; + + value = IRO_NewLinear(IROLinearOperand); + value->index = ++IRO_NumLinear; + value->rtype = type; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + CInt64_SetLong(&expr->data.intval, unrollFactor); + value->u.node = expr; + IRO_AddToList(value, list); + + var = IRO_DuplicateExpr(iterCount, list); + + comp = IRO_NewLinear(IROLinearOp2Arg); + comp->index = ++IRO_NumLinear; + comp->nodetype = EGREATER; + comp->u.diadic.left = var; + comp->u.diadic.right = value; + comp->u.diadic.right->flags |= IROLF_Reffed; + comp->rtype = type; + IRO_AddToList(comp, list); + + ifnot = IRO_NewLinear(IROLinearOp1Arg); + ifnot->index = ++IRO_NumLinear; + ifnot->type = IROLinearIfNot; + ifnot->u.label.x4 = comp; + ifnot->u.label.x4->flags |= IROLF_Reffed; + ifnot->rtype = type; + ifnot->u.label.label = label; + IRO_AddToList(ifnot, list); +} + +void ChangeInductionReference(IROLinear *first, IROLinear *last, CInt64 val, IROLoop *loop) { + IROLinear *nd; + IROLinear *value; + IROLinear *add; + UInt32 isUnsigned; + IROLinear *father; + Boolean flag; + IROLinear *father2; + IROLinear *father3; + Type *tmp; + UInt32 flag2; + Object *varobj; + IROLinear *next; + ENode *expr; + Type *type; + + CInt64 val2; + CInt64 val1; + IROList list; + + type = loop->induction->nd->rtype; + isUnsigned = IRO_IsUnsignedType(type); + + for (nd = first; nd; nd = next) { + next = nd->next; + + varobj = IRO_IsVariable(nd); + if (varobj && loop->induction->var->object == varobj) { + value = IRO_NewLinear(IROLinearOperand); + value->index = ++IRO_NumLinear; + value->rtype = type; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + expr->data.intval = val; + value->u.node = expr; + + add = IRO_NewLinear(IROLinearOp2Arg); + add->index = ++IRO_NumLinear; + add->nodetype = EADD; + add->rtype = type; + + father = IRO_LocateFather(nd); + flag = 1; + if (father && IS_LINEAR_MONADIC(father, ETYPCON)) { + tmp = father->rtype; + father = IRO_LocateFather(father); + if (tmp->type != nd->rtype->type || tmp->size < nd->rtype->size) + flag = 0; + } + + flag2 = 0; + if ( + flag && + father && + IS_LINEAR_DIADIC_2(father, ESHL, EMUL) && + IRO_IsIntConstant(father->u.diadic.right) && + (father2 = IRO_LocateFather(father)) && + IS_LINEAR_DIADIC(father2, EADD) && + father2->u.diadic.right == father && + (father3 = IRO_LocateFather(father2)) + ) + { + IRO_InitList(&list); + val2 = father->u.diadic.right->u.node->data.intval; + if (father->nodetype == ESHL) + val2 = CInt64_Shl(cint64_one, val2); + + val1 = value->u.node->data.intval; + if (isUnsigned) + val1 = CInt64_MulU(val2, val1); + else + val1 = CInt64_Mul(val2, val1); + value->u.node->data.intval = val1; + + IRO_AddToList(value, &list); + IRO_AddToList(add, &list); + add->u.diadic.right = value; + IRO_Paste(list.head, list.tail, father3); + IRO_LocateFather_Cut_And_Paste_Without_Nopping(father2, add); + add->u.diadic.left = father2; + add->rtype = father2->rtype; + flag2 = 1; + } + + if (!flag2) { + add->u.diadic.right = value; + add->u.diadic.right->flags |= IROLF_Reffed; + value->next = add; + + add->u.diadic.left = nd; + IRO_LocateFather_Cut_And_Paste_Without_Nopping(nd, add); + add->flags |= IROLF_Reffed; + + nd->next = value; + add->next = next; + } + } + + if (nd == last) + break; + } +} + +IROLinear *UpdateInductionIncrement(IROLoop *loop, SInt32 value, IROLinear *before) { + IROLinear *ind_nd; + IROLinear *addvalue; + IROLinear *ass; + Type *type; + ENode *expr; + IROList list; + + IRO_InitList(&list); + ind_nd = loop->induction->nd; + type = ind_nd->rtype; + + addvalue = IRO_NewLinear(IROLinearOperand); + addvalue->index = ++IRO_NumLinear; + addvalue->rtype = type; + expr = IRO_NewENode(EINTCONST); + expr->rtype = type; + CInt64_SetLong(&expr->data.intval, value * loop->induction->addConst); + addvalue->u.node = expr; + IRO_AddToList(addvalue, &list); + + if (IS_LINEAR_MONADIC_2(ind_nd, EPREINC, EPOSTINC)) { + ind_nd = IRO_DuplicateExpr(ind_nd->u.monadic, &list); + + ass = IRO_NewLinear(IROLinearOp2Arg); + ass->index = ++IRO_NumLinear; + ass->nodetype = EADDASS; + ass->u.diadic.left = ind_nd; + ass->u.diadic.right = addvalue; + ass->rtype = type; + IRO_AddToList(ass, &list); + } else if (IS_LINEAR_MONADIC_2(ind_nd, EPREDEC, EPOSTDEC)) { + ind_nd = IRO_DuplicateExpr(ind_nd->u.monadic, &list); + + ass = IRO_NewLinear(IROLinearOp2Arg); + ass->index = ++IRO_NumLinear; + ass->nodetype = ESUBASS; + ass->u.diadic.left = ind_nd; + ass->u.diadic.right = addvalue; + ass->rtype = type; + IRO_AddToList(ass, &list); + } else if (IS_LINEAR_DIADIC(ind_nd, EADDASS)) { + ind_nd = IRO_DuplicateExpr(ind_nd->u.monadic, &list); + + ass = IRO_NewLinear(IROLinearOp2Arg); + ass->index = ++IRO_NumLinear; + ass->nodetype = EADDASS; + ass->u.diadic.left = ind_nd; + ass->u.diadic.right = addvalue; + ass->rtype = type; + IRO_AddToList(ass, &list); + } + + IRO_Paste(list.head, list.tail, before); + return list.tail; +} + +void GenInitialAssignment(IROLoop *loop, Object *var, IROList *list) { + Type *type; + IROLinear *nd; + + CError_ASSERT(3924, loop->nd14 && loop->nd14->type == IROLinearOp2Arg); + + type = loop->induction->nd->rtype; + + nd = IRO_NewLinear(IROLinearOp2Arg); + nd->index = ++IRO_NumLinear; + nd->nodetype = EASS; + nd->u.diadic.left = IRO_TempReference(var, list); + nd->u.diadic.right = IRO_DuplicateExpr(loop->nd14->u.diadic.right, list); + nd->rtype = type; + IRO_AddToList(nd, list); +} + +void GenNewInduction(void) { + CError_FATAL(3941); +} |