summaryrefslogtreecommitdiff
path: root/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer
diff options
context:
space:
mode:
authorAsh Wolf <ninji@wuffs.org>2023-01-26 11:30:47 +0000
committerAsh Wolf <ninji@wuffs.org>2023-01-26 11:30:47 +0000
commit094b96ca1df4a035b5f93c351f773306c0241f3f (patch)
tree95ce05e3ebe816c7ee7996206bb37ea17d8ca33c /compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer
parentfc0c4c0df7b583b55a08317cf1ef6a71d27c0440 (diff)
downloadMWCC-094b96ca1df4a035b5f93c351f773306c0241f3f.tar.gz
MWCC-094b96ca1df4a035b5f93c351f773306c0241f3f.zip
move lots of source files around to match their actual placement in the original treemain
Diffstat (limited to 'compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer')
-rw-r--r--compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/Alias.c747
-rw-r--r--compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/CodeMotion.c906
-rw-r--r--compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ConstantPropagation.c643
-rw-r--r--compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopDetection.c885
-rw-r--r--compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c1553
-rw-r--r--compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c751
-rw-r--r--compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ValueNumbering.c661
-rw-r--r--compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/VectorArraysToRegs.c548
8 files changed, 6694 insertions, 0 deletions
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/Alias.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/Alias.c
new file mode 100644
index 0000000..8223bf4
--- /dev/null
+++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/Alias.c
@@ -0,0 +1,747 @@
+#include "compiler/Alias.h"
+#include "compiler/CClass.h"
+#include "compiler/CError.h"
+#include "compiler/CParser.h"
+#include "compiler/CMachine.h"
+#include "compiler/CodeGen.h"
+#include "compiler/CopyPropagation.h"
+#include "compiler/PCode.h"
+#include "compiler/PCodeInfo.h"
+#include "compiler/RegisterInfo.h"
+#include "compiler/UseDefChains.h"
+#include "compiler/ValueNumbering.h"
+#include "compiler/BitVectors.h"
+#include "compiler/CompilerTools.h"
+#include "compiler/objects.h"
+#include "compiler/types.h"
+
+static Alias *aliases;
+static int n_aliases;
+static int n_gathered_aliases;
+static Alias *alias_hash[997];
+Alias *worst_case;
+Object worst_case_obj;
+
+static TypePointer worst_case_memory_type = {
+ TYPEARRAY,
+ 0xFFFFFF,
+ TYPE(&stchar)
+};
+
+static Boolean is_safe_const(Object *obj) {
+ Type *type;
+
+ type = obj->type;
+ while (IS_TYPE_ARRAY(type))
+ type = TPTR_TARGET(type);
+
+ if (TYPE_FITS_IN_REGISTER(type) || IS_TYPE_VECTOR(type) || IS_TYPE_FLOAT(type) || IS_TYPE_STRUCT(type))
+ return is_const_object(obj);
+
+ if (IS_TYPE_CLASS(type))
+ return is_const_object(obj) && CClass_IsPODClass(TYPE_CLASS(type));
+
+ return 0;
+}
+
+void initialize_aliases(void) {
+ int i;
+
+ memclrw(&worst_case_obj, sizeof(Object));
+ worst_case_obj.otype = OT_OBJECT;
+ worst_case_obj.type = TYPE(&worst_case_memory_type);
+ worst_case_obj.datatype = DDATA;
+ worst_case_obj.name = GetHashNameNodeExport("@worst_case@");
+
+ aliases = NULL;
+ n_aliases = 0;
+ n_gathered_aliases = 0;
+ for (i = 0; i < 997; i++)
+ alias_hash[i] = NULL;
+
+ worst_case = make_alias_set();
+ add_alias_member(worst_case, make_alias(&worst_case_obj, 0, 0));
+}
+
+static UInt32 hash_alias(Object *object, SInt32 offset, SInt32 size) {
+ return (UInt32) (object->name->hashval * offset * size) % 997;
+}
+
+static Alias *create_alias(AliasType type, Object *object, SInt32 offset, SInt32 size, Boolean addToHash) {
+ Alias *alias;
+ UInt32 hash;
+
+ alias = lalloc(sizeof(Alias));
+ memclrw(alias, sizeof(Alias));
+ alias->type = type;
+ alias->index = n_aliases++;
+ alias->next = aliases;
+ aliases = alias;
+ alias->object = object;
+ alias->offset = offset;
+ alias->size = size;
+
+ if (addToHash) {
+ hash = hash_alias(object, offset, size);
+ alias->hashNext = alias_hash[hash];
+ alias_hash[hash] = alias;
+ }
+
+ return alias;
+}
+
+static Alias *lookup_alias(Object *object, SInt32 offset, SInt32 size) {
+ Alias *scan;
+
+ for (scan = alias_hash[hash_alias(object, offset, size)]; scan; scan = scan->hashNext) {
+ if (scan->object == object && scan->offset == offset && scan->size == size)
+ return scan;
+ }
+
+ return NULL;
+}
+
+Alias *make_alias(Object *object, SInt32 offset, SInt32 size) {
+ Alias *alias;
+ Alias *alias2;
+
+ if (!offset && !size)
+ size = object->type->size;
+
+ alias = lookup_alias(object, offset, size);
+ if (!alias) {
+ if (offset > 0 || size != object->type->size) {
+ alias2 = make_alias(object, 0, object->type->size);
+ alias = create_alias(AliasType1, object, offset, size, 1);
+ add_alias_member(alias2, alias);
+ } else {
+ alias = create_alias(AliasType0, object, offset, size, 1);
+ }
+
+ switch (object->datatype) {
+ case DLOCAL:
+ case DNONLAZYPTR:
+ break;
+ default:
+ if (!is_safe_const(object))
+ add_alias_member(worst_case, make_alias(object, 0, 0));
+ }
+ }
+
+ if (offset > object->type->size)
+ return NULL;
+ else
+ return alias;
+}
+
+Alias *make_alias_set(void) {
+ return create_alias(AliasType2, NULL, 0, 0, 0);
+}
+
+void add_alias_member(Alias *parent, Alias *child) {
+ AliasMember *member;
+
+ if (child->type == AliasType2) {
+ for (member = child->parents; member; member = member->nextParent)
+ add_alias_member(parent, member->child);
+ } else {
+ if (parent == worst_case && child->type == AliasType1)
+ child = make_alias(child->object, 0, 0);
+
+ for (member = parent->parents; member; member = member->nextParent) {
+ if (member->child == child)
+ return;
+ }
+
+ member = lalloc(sizeof(AliasMember));
+ member->parent = parent;
+ member->child = child;
+ member->nextParent = parent->parents;
+ parent->parents = member;
+ member->nextChild = child->children;
+ child->children = member;
+ }
+}
+
+Alias *make_alias_set_from_IR(void) {
+ CError_FATAL(333);
+ return NULL;
+}
+
+static Boolean aliases_overlap(Alias *a, Alias *b) {
+ return (
+ a->offset == b->offset ||
+ (a->offset > b->offset && a->offset < (b->offset + b->size)) ||
+ (b->offset > a->offset && b->offset < (a->offset + a->size))
+ );
+}
+
+static int is_address_load(PCode *pcode) {
+ Object *obj;
+
+ switch (pcode->op) {
+ case PC_LWZ:
+ if (pcode->args[2].kind == PCOp_MEMORY && pcode->args[2].data.mem.obj->datatype == DNONLAZYPTR)
+ return 1;
+ break;
+ case PC_LBZU:
+ case PC_LBZUX:
+ case PC_LHZU:
+ case PC_LHZUX:
+ case PC_LHAU:
+ case PC_LHAUX:
+ case PC_LWZU:
+ case PC_LWZUX:
+ case PC_STBU:
+ case PC_STBUX:
+ case PC_STHU:
+ case PC_STHUX:
+ case PC_STWU:
+ case PC_STWUX:
+ return 1;
+ case PC_ADDI:
+ case PC_ADDIS:
+ if (pcode->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) {
+ if (pcode->args[2].kind == PCOp_MEMORY) {
+ obj = pcode->args[2].data.mem.obj;
+ if (obj->datatype == DLOCAL && !is_safe_const(obj))
+ add_alias_member(worst_case, make_alias(obj, 0, 0));
+ return 0;
+ }
+ } else {
+ return 1;
+ }
+ break;
+ case PC_ADD:
+ return 1;
+ }
+
+ return 0;
+}
+
+static int addresspropagatestouse(int candidateID, int useID) {
+ PCode *candidate_pcode; // r30
+ PCode *use_pcode; // r29
+ int reg; // r28
+ short reg2;
+ Object *object; // r27
+ SInt32 offset; // r26
+ Alias *alias; // r25
+ Boolean flag24; // r24
+ SInt32 size; // r23
+ Alias *aliasSet; // r22
+ int i;
+ PCode *scan;
+ PCodeArg *op;
+
+ candidate_pcode = Candidates[candidateID].pcode;
+ use_pcode = Uses[useID].pcode;
+ flag24 = 0;
+ size = 1;
+ reg = candidate_pcode->args[0].data.reg.reg;
+
+ if (candidate_pcode->alias && (candidate_pcode->alias->type == AliasType0 || candidate_pcode->alias->type == AliasType1)) {
+ object = candidate_pcode->alias->object;
+ offset = candidate_pcode->alias->offset;
+ if (offset == 0 && candidate_pcode->alias->size == object->type->size)
+ flag24 = 1;
+ } else if (candidate_pcode->args[2].kind == PCOp_MEMORY) {
+ object = candidate_pcode->args[2].data.mem.obj;
+ if (candidate_pcode->op == PC_ADDIS)
+ offset = candidate_pcode->args[2].data.mem.offset << 16;
+ else
+ offset = candidate_pcode->args[2].data.mem.offset;
+ } else {
+ return 0;
+ }
+
+ CError_ASSERT(478, object->otype == OT_OBJECT);
+
+ if ((candidate_pcode->flags & (fIsRead | fIsWrite)) && (candidate_pcode->flags & fUpdatesPtr)) {
+ reg = candidate_pcode->args[1].data.reg.reg;
+ offset = 0;
+ flag24 = 1;
+ } else if (candidate_pcode->op == PC_LWZ) {
+ if (object->datatype != DNONLAZYPTR)
+ return 0;
+
+ object = object->u.var.realObj;
+ CError_ASSERT(495, object->otype == OT_OBJECT);
+ offset = 0;
+ } else if (candidate_pcode->op == PC_ADDI) {
+ if (!candidate_pcode->alias && object)
+ candidate_pcode->alias = make_alias(object, offset, 1);
+ } else if (candidate_pcode->op == PC_ADDIS) {
+ if (!candidate_pcode->alias && object)
+ candidate_pcode->alias = make_alias(object, offset, 1);
+ } else if (candidate_pcode->op == PC_ADD) {
+ offset = 0;
+ flag24 = 1;
+ } else {
+ CError_FATAL(509);
+ }
+
+ if (
+ !(use_pcode->flags & (fIsRead | fIsWrite)) &&
+ use_pcode->op != PC_ADDI &&
+ use_pcode->op != PC_ADD &&
+ use_pcode->op != PC_ADDIS
+ ) {
+ if (object->datatype == DLOCAL && !is_safe_const(object))
+ add_alias_member(worst_case, make_alias(object, 0, 0));
+ return 1;
+ }
+
+ if (
+ (use_pcode->flags & (fIsWrite | fPCodeFlag40000)) &&
+ use_pcode->args[0].kind == PCOp_REGISTER &&
+ use_pcode->args[0].arg == RegClass_GPR &&
+ use_pcode->args[0].data.reg.reg == reg &&
+ object->datatype == DLOCAL &&
+ !is_safe_const(object)
+ )
+ add_alias_member(worst_case, make_alias(object, 0, 0));
+
+ if (use_pcode->argCount < 3)
+ return 1;
+
+ CError_ASSERT(543, use_pcode->args[1].kind == PCOp_REGISTER);
+
+ if (candidate_pcode->block == use_pcode->block && precedes(candidate_pcode, use_pcode)) {
+ for (scan = candidate_pcode->nextPCode; scan && scan != use_pcode; scan = scan->nextPCode) {
+ for (op = scan->args, i = scan->argCount; i--; op++) {
+ if (op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ (op->data.reg.effect & EffectWrite) &&
+ op->data.reg.reg == reg)
+ return 1;
+ }
+ }
+ } else {
+ if (!bitvectorgetbit(candidateID, propinfo[use_pcode->block->blockIndex].vec8)) {
+ if (bitvectorgetbit(candidate_pcode->defID, usedefinfo[use_pcode->block->blockIndex].defvec8)) {
+ for (scan = use_pcode->block->firstPCode; scan && scan != use_pcode; scan = scan->nextPCode) {
+ for (op = scan->args, i = scan->argCount; i--; op++) {
+ if (op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ (op->data.reg.effect & EffectWrite) &&
+ op->data.reg.reg == reg)
+ return 1;
+ }
+ }
+ } else {
+ return 1;
+ }
+ }
+
+ for (scan = use_pcode->block->firstPCode; scan; scan = scan->nextPCode) {
+ if (scan == use_pcode)
+ break;
+ for (op = scan->args, i = scan->argCount; i--; op++) {
+ if (op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ (op->data.reg.effect & EffectWrite) &&
+ op->data.reg.reg == reg)
+ return 1;
+ }
+ }
+ }
+
+ CError_ASSERT(598, object != NULL);
+
+ if (use_pcode->op == PC_ADDI || use_pcode->op == PC_ADD || use_pcode->op == PC_ADDIS) {
+ if (use_pcode->args[0].data.reg.reg < n_real_registers[RegClass_GPR] && !is_safe_const(object))
+ add_alias_member(worst_case, make_alias(object, 0, 0));
+ }
+
+ if (use_pcode->flags & (fIsRead | fIsWrite))
+ size = nbytes_loaded_or_stored_by(use_pcode);
+
+ if (use_pcode->args[2].kind == PCOp_REGISTER) {
+ if (use_pcode->args[1].data.reg.reg == 0) {
+ if (use_pcode->args[2].data.reg.reg == reg)
+ alias = make_alias(object, offset, size);
+ } else {
+ if (use_pcode->args[1].data.reg.reg == reg)
+ reg2 = use_pcode->args[2].data.reg.reg;
+ else if (use_pcode->args[2].data.reg.reg == reg)
+ reg2 = use_pcode->args[1].data.reg.reg;
+ else
+ return 1;
+
+ for (scan = use_pcode->prevPCode; scan; scan = scan->prevPCode) {
+ if (scan->op == PC_LI && scan->args[0].data.reg.reg == reg2)
+ break;
+
+ for (i = 0; i < scan->argCount; i++) {
+ if (scan->args[i].kind == PCOp_REGISTER &&
+ scan->args[i].arg == RegClass_GPR &&
+ scan->args[i].data.reg.reg == reg2 &&
+ (scan->args[i].data.reg.effect & EffectWrite)) {
+ scan = NULL;
+ break;
+ }
+ }
+
+ if (!scan)
+ break;
+ }
+
+ if (scan) {
+ offset += scan->args[1].data.mem.offset;
+ alias = make_alias(object, offset, size);
+ } else {
+ alias = make_alias(object, 0, 0);
+ }
+ }
+ } else {
+ if (use_pcode->args[1].kind != PCOp_REGISTER ||
+ use_pcode->args[1].arg != RegClass_GPR ||
+ use_pcode->args[1].data.reg.reg != reg)
+ return 1;
+
+ if (use_pcode->args[1].data.reg.effect & EffectWrite) {
+ alias = make_alias(object, 0, 0);
+ } else if (use_pcode->args[2].kind == PCOp_IMMEDIATE) {
+ if (use_pcode->op == PC_ADDIS) {
+ offset += use_pcode->args[2].data.imm.value << 16;
+ alias = make_alias(object, offset, 1);
+ } else {
+ offset += use_pcode->args[2].data.imm.value;
+ alias = make_alias(object, offset, size);
+ }
+ } else {
+ return 1;
+ }
+ }
+
+ if (flag24)
+ alias = make_alias(object, 0, 0);
+
+ if (!alias)
+ return 1;
+
+ if (!use_pcode->alias) {
+ if (
+ use_pcode->op == PC_ADDI ||
+ use_pcode->op == PC_ADD ||
+ use_pcode->op == PC_ADDIS ||
+ ((candidate_pcode->flags & (fIsRead | fIsWrite)) && (candidate_pcode->flags & fUpdatesPtr))
+ )
+ recursive_propagation = 1;
+ }
+
+ if (use_pcode->alias) {
+ if (use_pcode->alias == worst_case) {
+ add_alias_member(worst_case, make_alias(object, 0, 0));
+ } else if (use_pcode->alias == alias) {
+ return 1;
+ } else if (use_pcode->alias->type == AliasType0 || use_pcode->alias->type == AliasType1) {
+ if (object == use_pcode->alias->object) {
+ use_pcode->alias = make_alias(object, 0, 0);
+ } else {
+ aliasSet = make_alias_set();
+ if (
+ use_pcode->op == PC_ADDI ||
+ use_pcode->op == PC_ADD ||
+ use_pcode->op == PC_ADDIS ||
+ ((use_pcode->flags & (fIsRead | fIsWrite)) && (use_pcode->flags & fUpdatesPtr))
+ ) {
+ if (alias->type == AliasType2)
+ add_alias_member(worst_case, alias);
+ else
+ add_alias_member(worst_case, make_alias(use_pcode->alias->object, 0, 0));
+ }
+ add_alias_member(aliasSet, use_pcode->alias);
+ add_alias_member(aliasSet, alias);
+ use_pcode->alias = aliasSet;
+ }
+ } else {
+ add_alias_member(use_pcode->alias, alias);
+ }
+ } else {
+ use_pcode->alias = alias;
+ }
+
+ propagated_instructions = 1;
+ return 1;
+}
+
+static void finishpropagatealiases(int id) {
+ propagated_instructions = 1;
+}
+
+static Propagation alias_prop = {
+ &is_address_load,
+ &addresspropagatestouse,
+ &finishpropagatealiases,
+ "ALIAS",
+ "ALIASES",
+ "A%" PRId32,
+ 1
+};
+
+static void propagatealiasinfo(Object *proc) {
+ propagateinstructions(proc, &alias_prop, (copts.optimizationlevel >= 4) ? 4 : 1, 1);
+}
+
+void gather_alias_info(void) {
+ UInt32 *myvec; // r31
+ Alias *alias; // r22
+ AliasMember *member;
+ AliasMember *member2;
+ PCodeBlock *block; // r21
+ PCode *pcode; // r20
+ PCodeArg *op; // r19
+ RegUseOrDef *list; // r18
+ int i; // r17
+ Alias *alias_choice; // r16
+ int aliases_idx; // r15 (helper in r23)
+ PCode *defpcode; // r14
+ Alias *alias_array[3];
+ UseOrDef *def;
+ int defID;
+
+ if (coloring) {
+ propagatealiasinfo(gFunction);
+ myvec = oalloc(4 * ((number_of_Defs + 31) >> 5));
+
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ bitvectorcopy(myvec, usedefinfo[block->blockIndex].defvec8, number_of_Defs);
+
+ for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) {
+ if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) {
+ if (!pcode->alias) {
+ pcode->alias = worst_case;
+ } else {
+ if ((pcode->alias->type == AliasType0 || pcode->alias->type == AliasType1) &&
+ pcode->alias->size == nbytes_loaded_or_stored_by(pcode)) {
+ pcode->flags &= ~fIsPtrOp;
+ } else {
+ pcode->flags |= fIsPtrOp;
+ }
+
+ if (pcode->alias != worst_case) {
+ aliases_idx = 0;
+ alias_choice = NULL;
+ op = pcode->args;
+ for (i = 0; i < pcode->argCount; i++, op++) {
+ if (
+ (!(pcode->flags & (fIsWrite | fPCodeFlag40000)) || op != pcode->args) &&
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ (op->data.reg.effect & EffectRead)
+ ) {
+ alias_array[aliases_idx] = NULL;
+ if (aliases_idx >= 2) {
+ alias_choice = worst_case;
+ break;
+ }
+ alias_array[aliases_idx] = pcode->alias;
+
+ for (list = reg_Defs[RegClass_GPR][op->data.reg.reg]; list; list = list->next) {
+ if (bitvectorgetbit(list->id, myvec)) {
+ defpcode = Defs[list->id].pcode;
+ if (!defpcode->alias || !is_address_load(defpcode) || defpcode->alias == worst_case) {
+ alias_array[aliases_idx] = worst_case;
+ break;
+ }
+ }
+ }
+
+ aliases_idx++;
+ }
+ }
+
+ if (!alias_choice) {
+ if (aliases_idx > 0) {
+ alias_choice = alias_array[0];
+ if (aliases_idx == 2) {
+ if (alias_array[0] != worst_case) {
+ if (alias_array[1] != worst_case)
+ alias_choice = worst_case;
+ } else if (alias_array[1] != worst_case) {
+ alias_choice = alias_array[1];
+ }
+ }
+ }
+
+ if (alias_choice == worst_case) {
+ pcode->flags |= fIsPtrOp;
+ if (pcode->alias->type == AliasType2)
+ add_alias_member(worst_case, pcode->alias);
+ else
+ add_alias_member(worst_case, make_alias(pcode->alias->object, 0, 0));
+ }
+
+ if (alias_choice)
+ pcode->alias = alias_choice;
+ }
+ }
+ }
+ } else {
+ if ((pcode->flags & fIsCall) && !pcode->alias)
+ pcode->alias = worst_case;
+ }
+
+ for (def = &Defs[defID = pcode->defID]; defID < number_of_Defs && def->pcode == pcode; defID++, def++) {
+ if (def->v.kind == PCOp_REGISTER && def->v.arg == RegClass_GPR) {
+ for (list = reg_Defs[RegClass_GPR][def->v.u.reg]; list; list = list->next)
+ bitvectorclearbit(list->id, myvec);
+ }
+ bitvectorsetbit(defID, myvec);
+ }
+ }
+ }
+
+ freeoheap();
+ } else {
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) {
+ if ((pcode->flags & (fIsRead | fIsWrite | fIsCall | fPCodeFlag20000 | fPCodeFlag40000)) && !pcode->alias)
+ pcode->alias = worst_case;
+ }
+ }
+ }
+
+ if (n_gathered_aliases != n_aliases) {
+ for (alias = aliases; alias; alias = alias->next) {
+ if (alias->type == AliasType2) {
+ alias->vec24 = lalloc(4 * ((n_aliases + 31) >> 5));
+ bitvectorinitialize(alias->vec24, n_aliases, 0);
+ for (member = alias->parents; member; member = member->nextParent) {
+ bitvectorsetbit(member->child->index, alias->vec24);
+ for (member2 = member->child->parents; member2; member2 = member2->nextParent)
+ bitvectorsetbit(member2->child->index, alias->vec24);
+ }
+ }
+ }
+ n_gathered_aliases = n_aliases;
+ }
+}
+
+static Boolean may_alias_alias(Alias *a, Alias *b) {
+ switch ((a->type * 3) + b->type) {
+ case (AliasType0 * 3) + AliasType0:
+ return a == b;
+ case (AliasType0 * 3) + AliasType1:
+ case (AliasType1 * 3) + AliasType0:
+ return a->object == b->object;
+ case (AliasType1 * 3) + AliasType1:
+ return (a->object == b->object) && aliases_overlap(a, b);
+ case (AliasType0 * 3) + AliasType2:
+ case (AliasType1 * 3) + AliasType2:
+ return bitvectorgetbit(a->index, b->vec24) != 0;
+ case (AliasType2 * 3) + AliasType0:
+ case (AliasType2 * 3) + AliasType1:
+ return bitvectorgetbit(b->index, a->vec24) != 0;
+ case (AliasType2 * 3) + AliasType2:
+ return (a == b) || !bitvectorintersectionisempty(a->vec24, b->vec24, n_aliases);
+ default:
+ CError_FATAL(1054);
+ return 1;
+ }
+}
+
+Boolean may_alias(PCode *a, PCode *b) {
+ return may_alias_alias(a->alias, b->alias);
+}
+
+Boolean uniquely_aliases(PCode *a, PCode *b) {
+ if (may_alias_alias(a->alias, b->alias)) {
+ if (
+ a->alias->type != AliasType2 &&
+ b->alias->type != AliasType2 &&
+ a->alias &&
+ b->alias &&
+ a->alias->size == nbytes_loaded_or_stored_by(a) &&
+ b->alias->size == nbytes_loaded_or_stored_by(b)
+ )
+ return 1;
+ }
+
+ return 0;
+}
+
+Boolean may_alias_worst_case(PCode *pcode) {
+ return may_alias_alias(pcode->alias, worst_case);
+}
+
+Boolean may_alias_object(PCode *pcode, Object *object) {
+ return may_alias_alias(pcode->alias, make_alias(object, 0, 0));
+}
+
+void initialize_alias_values(void) {
+ Alias *alias;
+
+ for (alias = aliases; alias; alias = alias->next) {
+ alias->valuenumber = nextvaluenumber++;
+ alias->valuepcode = NULL;
+ }
+}
+
+void update_alias_value(Alias *alias, PCode *pcode) {
+ AliasMember *member;
+ AliasMember *member2;
+ AliasMember *member3;
+
+ switch (alias->type) {
+ case AliasType0:
+ killmemory(alias, pcode);
+ for (member = alias->children; member; member = member->nextChild) {
+ CError_ASSERT(1152, member->parent->type == AliasType2);
+ killmemory(member->parent, NULL);
+ }
+ for (member = alias->parents; member; member = member->nextParent) {
+ CError_ASSERT(1157, member->child->type == AliasType1);
+ killmemory(member->child, NULL);
+ for (member2 = member->child->children; member2; member2 = member2->nextChild) {
+ if (member2->parent != alias) {
+ CError_ASSERT(1163, member2->parent->type == AliasType2);
+ killmemory(member2->parent, NULL);
+ }
+ }
+ }
+ break;
+
+ case AliasType1:
+ killmemory(alias, pcode);
+ for (member = alias->children; member; member = member->nextChild) {
+ killmemory(member->parent, NULL);
+ if (member->parent->type == AliasType0) {
+ for (member2 = member->parent->parents; member2; member2 = member2->nextParent) {
+ if (member2->child != alias && aliases_overlap(alias, member2->child)) {
+ killmemory(member2->child, NULL);
+ }
+ }
+ }
+ }
+ break;
+
+ case AliasType2:
+ killmemory(alias, NULL);
+ for (member = alias->parents; member; member = member->nextParent) {
+ killmemory(member->child, NULL);
+ for (member2 = member->child->children; member2; member2 = member2->nextChild) {
+ if (member2->parent != alias)
+ killmemory(member2->parent, NULL);
+ }
+ for (member3 = member->child->parents; member3; member3 = member3->nextParent) {
+ killmemory(member3->child, NULL);
+ for (member2 = member3->child->children; member2; member2 = member2->nextChild) {
+ if (member2->parent != member->child)
+ killmemory(member2->parent, NULL);
+ }
+ }
+ }
+ break;
+ }
+}
+
+void update_all_alias_values(void) {
+ Alias *alias;
+
+ for (alias = aliases; alias; alias = alias->next)
+ killmemory(alias, NULL);
+}
+
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/CodeMotion.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/CodeMotion.c
new file mode 100644
index 0000000..8ce2962
--- /dev/null
+++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/CodeMotion.c
@@ -0,0 +1,906 @@
+#include "compiler/CodeMotion.h"
+#include "compiler/Alias.h"
+#include "compiler/BitVectors.h"
+#include "compiler/LoopDetection.h"
+#include "compiler/LoopOptimization.h"
+#include "compiler/CompilerTools.h"
+#include "compiler/PCode.h"
+#include "compiler/UseDefChains.h"
+#include "compiler/RegisterInfo.h"
+
+int movedloopinvariantcode;
+int unswitchedinvariantcode;
+
+static int isloopinvariant(PCode *pcode, Loop *loop, UInt32 *vec, int flag1, int flag2) {
+ PCodeArg *op;
+ RegUseOrDef *list;
+ int i;
+
+ if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) {
+ if (pcode->alias) {
+ if (pcode->alias->type == AliasType2 || (pcode->flags & (fIsVolatile | fSideEffects)))
+ return 0;
+
+ if (pcode->flags & fIsRead) {
+ for (list = findobjectusedef(pcode->alias->object)->defs; list; list = list->next) {
+ if (
+ may_alias(pcode, Defs[list->id].pcode) &&
+ bitvectorgetbit(list->id, vec) &&
+ bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)
+ )
+ return 0;
+ }
+ }
+
+ if (pcode->flags & fIsWrite) {
+ for (list = findobjectusedef(pcode->alias->object)->uses; list; list = list->next) {
+ if (
+ may_alias(pcode, Uses[list->id].pcode) &&
+ bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
+ )
+ return 0;
+ }
+ }
+ } else {
+ return 0;
+ }
+ }
+
+ if ((pcode->flags & fIsWrite) && !bitvectorgetbit(pcode->block->blockIndex, loop->vec2C))
+ return 0;
+
+ op = pcode->args;
+ i = pcode->argCount;
+ while (i--) {
+ switch (op->kind) {
+ case PCOp_MEMORY:
+ if ((pcode->flags & fIsRead) && ((pcode->flags == 0) & 0x40)) {
+ for (list = findobjectusedef(op->data.mem.obj)->defs; list; list = list->next) {
+ if (
+ may_alias(pcode, Defs[list->id].pcode) &&
+ bitvectorgetbit(list->id, vec) &&
+ bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)
+ )
+ return 0;
+ }
+ }
+ if (pcode->flags & fIsWrite) {
+ for (list = findobjectusedef(op->data.mem.obj)->uses; list; list = list->next) {
+ if (
+ may_alias(pcode, Uses[list->id].pcode) &&
+ bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
+ )
+ return 0;
+ }
+ }
+ break;
+ case PCOp_REGISTER:
+ if (op->data.reg.effect & (EffectRead | EffectWrite)) {
+ if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) {
+ if (op->data.reg.reg == _FP_)
+ break;
+ if (op->data.reg.reg == _CALLER_SP_)
+ break;
+ if (op->data.reg.reg == 2)
+ break;
+ }
+ if (op->data.reg.reg < n_real_registers[op->arg]) {
+ if (op->arg == RegClass_CRFIELD) {
+ if (!flag2 || (op->data.reg.effect & EffectRead))
+ return 0;
+ } else if (op->arg == RegClass_SPR) {
+ if (!flag1)
+ return 0;
+ } else {
+ return 0;
+ }
+ } else if (op->data.reg.effect & EffectRead) {
+ if (flag1 && op->kind == PCOp_REGISTER && op->arg == RegClass_SPR)
+ break;
+ if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) {
+ if (op->data.reg.reg == _FP_)
+ break;
+ if (op->data.reg.reg == _CALLER_SP_)
+ break;
+ if (op->data.reg.reg == 2)
+ break;
+ }
+
+ for (list = reg_Defs[op->arg][op->data.reg.reg]; list; list = list->next) {
+ if (
+ bitvectorgetbit(list->id, vec) &&
+ bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)
+ )
+ return 0;
+ }
+ }
+ }
+ break;
+ }
+ op++;
+ }
+
+ return 1;
+}
+
+static int isuniquedefinition(PCode *pcode, Loop *loop) {
+ RegUseOrDef *list;
+ int defID;
+ UseOrDef *def;
+
+ defID = pcode->defID;
+ def = &Defs[defID];
+ if (defID >= number_of_Defs)
+ return 0;
+ if (def->pcode != pcode)
+ return 0;
+ if ((defID + 1) < number_of_Defs && def[1].pcode == pcode)
+ return 0;
+
+ if (def->v.kind == PCOp_REGISTER) {
+ for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) {
+ if (
+ bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) &&
+ list->id != defID
+ )
+ return 0;
+ }
+ } else if (def->v.kind == PCOp_MEMORY) {
+ for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) {
+ if (
+ may_alias(pcode, Defs[list->id].pcode) &&
+ bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) &&
+ list->id != defID
+ )
+ return 0;
+ }
+ } else {
+ CError_FATAL(292);
+ }
+
+ return 1;
+}
+
+static int uniquelyreachesuse(int defID, int useID) {
+ UseOrDef *def;
+ UseOrDef *use;
+ RegUseOrDef *list;
+ PCode *pcode;
+
+ def = &Defs[defID];
+ use = &Uses[useID];
+ if (def->v.kind == PCOp_REGISTER) {
+ for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) {
+ if (
+ list->id != defID &&
+ bitvectorgetbit(list->id, usedefinfo[use->pcode->block->blockIndex].defvec8)
+ )
+ break;
+ }
+ } else if (def->v.kind == PCOp_MEMORY) {
+ for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) {
+ if (
+ may_alias(def->pcode, Defs[list->id].pcode) &&
+ list->id != defID &&
+ bitvectorgetbit(list->id, usedefinfo[use->pcode->block->blockIndex].defvec8)
+ )
+ break;
+ }
+ }
+
+ if (!list)
+ return 1;
+
+ if (def->pcode->block == use->pcode->block) {
+ for (pcode = use->pcode->prevPCode; pcode; pcode = pcode->prevPCode) {
+ if (pcode == def->pcode)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int uniquelyreachesalluses(int defID, Loop *loop) {
+ UseOrDef *def;
+ RegUseOrDef *list;
+
+ def = &Defs[defID];
+
+ if (def->v.kind == PCOp_REGISTER) {
+ for (list = reg_Uses[def->v.arg][def->v.u.reg]; list; list = list->next) {
+ if (
+ bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].usevec1C) ||
+ (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) && !uniquelyreachesuse(defID, list->id))
+ )
+ return 0;
+ }
+ } else if (def->v.kind == PCOp_MEMORY) {
+ for (list = findobjectusedef(def->v.u.object)->uses; list; list = list->next) {
+ if (may_alias(def->pcode, Uses[list->id].pcode)) {
+ if (
+ bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].usevec1C) ||
+ (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) && !uniquelyreachesuse(defID, list->id))
+ )
+ return 0;
+ }
+ }
+ } else {
+ CError_FATAL(382);
+ }
+
+ return 1;
+}
+
+static int isliveonexit(TinyValue *v, Loop *loop) {
+ RegUseOrDef *list;
+ UInt32 *vec;
+
+ vec = usedefinfo[loop->preheader->blockIndex].usevec1C;
+
+ if (v->kind == PCOp_REGISTER) {
+ for (list = reg_Uses[v->arg][v->u.reg]; list; list = list->next) {
+ if (
+ bitvectorgetbit(list->id, vec) &&
+ !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
+ )
+ return 1;
+ }
+ } else if (v->kind == PCOp_MEMORY) {
+ for (list = findobjectusedef(v->u.object)->uses; list; list = list->next) {
+ if (
+ bitvectorgetbit(list->id, vec) &&
+ !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
+ )
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int dominatesallexits(PCode *pcode, Loop *loop) {
+ return bitvectorgetbit(pcode->block->blockIndex, loop->vec28) != 0;
+}
+
+static int maymove(PCode *pcode, Loop *loop) {
+ short reg;
+
+ if (!isuniquedefinition(pcode, loop))
+ return 0;
+ if (!uniquelyreachesalluses(pcode->defID, loop))
+ return 0;
+ if (!dominatesallexits(pcode, loop) && isliveonexit(&Defs[pcode->defID].v, loop))
+ return 0;
+
+ if (loop->bodySize > 25) {
+ switch (pcode->op) {
+ case PC_LI:
+ if (
+ pcode->nextPCode &&
+ pcode->nextPCode->op == PC_LVX &&
+ (pcode->nextPCode->flags & fIsConst)
+ ) {
+ reg = pcode->args[0].data.reg.reg;
+ if (pcode->nextPCode->args[1].data.reg.reg == reg ||
+ pcode->nextPCode->args[2].data.reg.reg == reg)
+ return 1;
+ }
+ case PC_VSPLTISB:
+ case PC_VSPLTISH:
+ case PC_VSPLTISW:
+ return 0;
+ default:
+ if (!bitvectorgetbit(pcode->block->blockIndex, loop->vec2C))
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static void moveinvariantcomputation(PCode *pcode, Loop *loop) {
+ ObjectUseDef *oud;
+ BlockList *blocklist;
+ RegUseOrDef *list;
+ UseOrDef *def;
+ int defID;
+
+ defID = pcode->defID;
+ def = &Defs[defID];
+ deletepcode(pcode);
+ insertpcodebefore(loop->preheader->lastPCode, pcode);
+ loop->bodySize--;
+ movedloopinvariantcode = 1;
+
+ if (def->v.kind == PCOp_REGISTER) {
+ for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
+ for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next)
+ bitvectorclearbit(list->id, usedefinfo[blocklist->block->blockIndex].defvec8);
+ bitvectorsetbit(defID, usedefinfo[blocklist->block->blockIndex].defvec8);
+ }
+ } else if (def->v.kind == PCOp_MEMORY) {
+ oud = findobjectusedef(def->v.u.object);
+ for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
+ for (list = oud->defs; list; list = list->next) {
+ if (uniquely_aliases(pcode, Defs[list->id].pcode))
+ bitvectorclearbit(list->id, usedefinfo[blocklist->block->blockIndex].defvec8);
+ }
+ bitvectorsetbit(defID, usedefinfo[blocklist->block->blockIndex].defvec8);
+ }
+ } else {
+ CError_FATAL(545);
+ }
+}
+
+static int srawi_addze_maymove(PCode *pcode, Loop *loop) {
+ RegUseOrDef *list;
+ UseOrDef *def;
+ int defID;
+ int nextDefID;
+
+ defID = pcode->defID;
+ nextDefID = pcode->nextPCode->defID;
+
+ def = &Defs[defID];
+ if (defID >= number_of_Defs)
+ return 0;
+ if (def->pcode != pcode)
+ return 0;
+ if ((defID + 1) < number_of_Defs && def[1].pcode == pcode)
+ return 0;
+
+ if (def->v.kind == PCOp_REGISTER && def->v.arg == RegClass_GPR) {
+ for (list = reg_Defs[RegClass_GPR][def->v.u.reg]; list; list = list->next) {
+ if (
+ bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) &&
+ list->id != defID &&
+ list->id != nextDefID
+ )
+ return 0;
+ }
+ } else {
+ CError_FATAL(582);
+ }
+
+ if (!uniquelyreachesalluses(pcode->defID, loop))
+ return 0;
+ if (!dominatesallexits(pcode, loop) && isliveonexit(&Defs[pcode->defID].v, loop))
+ return 0;
+ if (!dominatesallexits(pcode->nextPCode, loop) && isliveonexit(&Defs[pcode->nextPCode->defID].v, loop))
+ return 0;
+
+ return 1;
+}
+
+static int srawi_addze_isloopinvariant(PCode *pcode, Loop *loop, UInt32 *vec) {
+ static PCode *oldNextInstr;
+ PCode *nextInstr;
+
+ nextInstr = pcode->nextPCode;
+ if (
+ pcode->op == PC_ADDZE &&
+ oldNextInstr == pcode
+ ) {
+ oldNextInstr = NULL;
+ return 1;
+ } else if (
+ pcode->op == PC_SRAWI &&
+ nextInstr &&
+ nextInstr->op == PC_ADDZE &&
+ pcode->args[0].data.reg.reg == nextInstr->args[0].data.reg.reg &&
+ nextInstr->args[0].data.reg.reg == nextInstr->args[1].data.reg.reg &&
+ !(pcode->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) &&
+ !(nextInstr->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) &&
+ isloopinvariant(pcode, loop, vec, 1, 0) &&
+ srawi_addze_maymove(pcode, loop)
+ ) {
+ oldNextInstr = nextInstr;
+ return 1;
+ } else {
+ oldNextInstr = NULL;
+ return 0;
+ }
+}
+
+static void removeblockfromloop(Loop *loop, PCodeBlock *block) {
+ BlockList *list;
+ BlockList **ptr;
+
+ bitvectorclearbit(block->blockIndex, loop->memberblocks);
+ bitvectorclearbit(block->blockIndex, loop->vec24);
+ bitvectorclearbit(block->blockIndex, loop->vec28);
+ bitvectorclearbit(block->blockIndex, loop->vec2C);
+ loop->bodySize -= block->pcodeCount;
+
+ ptr = &loop->blocks;
+ while ((list = *ptr)) {
+ if (list->block == block)
+ *ptr = list->next;
+ else
+ ptr = &list->next;
+ }
+}
+
+static void changesuccessor(PCodeBlock *block, PCodeBlock *from, PCodeBlock *to) {
+ PCLink **ptr;
+ PCLink *link;
+
+ for (link = block->successors; link; link = link->nextLink) {
+ if (link->block == from)
+ link->block = to;
+ }
+
+ ptr = &from->predecessors;
+ while ((link = *ptr)) {
+ if (link->block == block) {
+ *ptr = link->nextLink;
+ link->nextLink = to->predecessors;
+ to->predecessors = link;
+ } else {
+ ptr = &link->nextLink;
+ }
+ }
+}
+
+static void movesuccessor(PCodeBlock *to, PCodeBlock *from, PCodeBlock *block) {
+ PCLink **ptr;
+ PCLink *link;
+
+ for (link = block->predecessors; link; link = link->nextLink) {
+ if (link->block == from)
+ link->block = to;
+ }
+
+ ptr = &from->successors;
+ while ((link = *ptr)) {
+ if (link->block == block) {
+ *ptr = link->nextLink;
+ link->nextLink = to->successors;
+ to->successors = link;
+ } else {
+ ptr = &link->nextLink;
+ }
+ }
+}
+
+static void movecmptopreheader(Loop *loop, PCodeBlock *block, PCode *pc1, PCode *pc2, PCodeArg *op) {
+ PCodeBlock *preheader;
+ PCode *pc3;
+
+ preheader = loop->preheader;
+ if (PCODE_FLAG_SET_F(pc1) & fRecordBit) {
+ moveinvariantcomputation(pc1, loop);
+ } else {
+ deletepcode(pc1);
+ insertpcodebefore(loop->preheader->lastPCode, pc1);
+ loop->bodySize--;
+ movedloopinvariantcode = 1;
+ }
+ loop->preheader = NULL;
+
+ insertpreheaderblock(loop);
+
+ pc3 = preheader->lastPCode;
+ CError_ASSERT(775, pc3->op == PC_B);
+ deletepcode(pc3);
+ deletepcode(pc2);
+ appendpcode(preheader, pc2);
+ movesuccessor(preheader, block, op->data.label.label->block);
+}
+
+static PCodeBlock *appendheadercopy(Loop *loop, PCodeBlock *block1, PCodeBlock *block2, PCodeBlock *block3) {
+ PCodeBlock *newblock1;
+ PCodeBlock *newblock2;
+ PCLink *link;
+ PCode *scan;
+
+ newblock1 = lalloc(sizeof(PCodeBlock));
+ newblock2 = lalloc(sizeof(PCodeBlock));
+
+ newblock1->labels = NULL;
+ newblock1->predecessors = newblock1->successors = NULL;
+ newblock1->firstPCode = newblock1->lastPCode = NULL;
+ newblock1->pcodeCount = 0;
+ newblock1->loopWeight = loop->body->loopWeight;
+ newblock1->flags = 0;
+ newblock1->blockIndex = pcblockcount++;
+
+ newblock2->labels = NULL;
+ newblock2->predecessors = newblock2->successors = NULL;
+ newblock2->firstPCode = newblock2->lastPCode = NULL;
+ newblock2->pcodeCount = 0;
+ newblock2->loopWeight = loop->body->loopWeight;
+ newblock2->flags = 0;
+ newblock2->blockIndex = pcblockcount++;
+
+ newblock1->nextBlock = newblock2;
+ newblock2->prevBlock = newblock1;
+ newblock1->prevBlock = block1;
+ newblock2->nextBlock = block1->nextBlock;
+ block1->nextBlock = newblock1;
+ newblock2->nextBlock->prevBlock = newblock2;
+
+ pclabel(newblock1, makepclabel());
+ pclabel(newblock2, makepclabel());
+
+ changesuccessor(block1, block1->successors->block, newblock1);
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newblock2;
+ link->nextLink = newblock1->successors;
+ newblock1->successors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newblock1;
+ link->nextLink = newblock2->predecessors;
+ newblock2->predecessors = link;
+
+ appendpcode(newblock2, makepcode(PC_B, block2->nextBlock->labels));
+ pcbranch(newblock2, block2->nextBlock->labels);
+ pccomputepredecessors1(newblock2);
+
+ for (scan = block2->firstPCode; scan; scan = scan->nextPCode)
+ appendpcode(newblock1, copypcode(scan));
+
+ pcbranch(newblock1, block3->labels);
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newblock1;
+ link->nextLink = block3->predecessors;
+ block3->predecessors = link;
+
+ addblocktoloop(loop, newblock1);
+ if (bitvectorgetbit(block2->blockIndex, loop->vec28))
+ bitvectorsetbit(newblock1->blockIndex, loop->vec28);
+ if (bitvectorgetbit(block2->blockIndex, loop->vec2C))
+ bitvectorsetbit(newblock1->blockIndex, loop->vec2C);
+
+ for (loop = loop->parent; loop; loop = loop->parent) {
+ addblocktoloop(loop, newblock1);
+ if (bitvectorgetbit(block2->blockIndex, loop->vec28))
+ bitvectorsetbit(newblock1->blockIndex, loop->vec28);
+ if (bitvectorgetbit(block2->blockIndex, loop->vec2C))
+ bitvectorsetbit(newblock1->blockIndex, loop->vec2C);
+
+ addblocktoloop(loop, newblock2);
+ if (bitvectorgetbit(block2->blockIndex, loop->vec28))
+ bitvectorsetbit(newblock2->blockIndex, loop->vec28);
+ if (bitvectorgetbit(block2->blockIndex, loop->vec2C))
+ bitvectorsetbit(newblock2->blockIndex, loop->vec2C);
+ }
+
+ return newblock1;
+}
+
+static BlockList *findswitchpath(Loop *loop, PCodeBlock *block) {
+ BlockList *head;
+ BlockList *tail;
+ BlockList *node;
+ PCodeBlock *scan;
+
+ head = NULL;
+ tail = NULL;
+
+ for (scan = block; scan && scan != loop->body; scan = scan->successors->block) {
+ if (!bitvectorgetbit(scan->blockIndex, loop->memberblocks))
+ return NULL;
+ if (scan->successors && scan->successors->nextLink)
+ return NULL;
+
+ node = oalloc(sizeof(BlockList));
+ node->block = scan;
+ node->next = NULL;
+ if (head) {
+ tail->next = node;
+ tail = node;
+ } else {
+ head = node;
+ tail = node;
+ }
+ }
+
+ return head;
+}
+
+static void simpleunswitchloop(Loop *loop) {
+ PCode *pc29;
+ PCodeArg *op27;
+ UInt32 *myvec;
+ PCodeBlock *block26;
+ PCode *pc25; // r25
+ BlockList *path2_24;
+ PCodeArg *op23;
+ PCode *pc23; // r23
+ BlockList *scanlist; // r23
+ BlockList *bestpath1; // r23
+ BlockList *bestpath2; // r22
+ PCodeBlock *headercopy; // r22
+ Loop *newloop; // r21
+ PCodeBlock *preheader21;
+ BlockList *path20;
+ PCode *scan20;
+ PCode *lastpcode;
+ int i;
+ BlockList *pathiter1;
+ BlockList *pathiter2;
+
+ if (!(lastpcode = loop->body->lastPCode))
+ return;
+ if (lastpcode->op != PC_BT && lastpcode->op != PC_BF)
+ return;
+ if (lastpcode->args[2].kind != PCOp_LABEL)
+ return;
+ if (!bitvectorgetbit(lastpcode->args[2].data.label.label->block->blockIndex, loop->memberblocks))
+ return;
+ if (loop->x57)
+ return;
+ if (loop->x4D)
+ return;
+ if (bitvectorgetbit(loop->body->nextBlock->blockIndex, loop->memberblocks))
+ return;
+
+ for (block26 = pcbasicblocks; block26; block26 = block26->nextBlock) {
+ if (bitvectorgetbit(block26->blockIndex, loop->memberblocks))
+ break;
+ }
+
+ if (!block26)
+ return;
+
+ myvec = oalloc(4 * ((number_of_Defs + 31) >> 5));
+ bitvectorcopy(myvec, usedefinfo[block26->blockIndex].defvec8, number_of_Defs);
+ for (pc25 = loop->preheader->nextBlock->firstPCode; pc25; pc25 = pc25->nextPCode) {
+ if (!(PCODE_FLAG_SET_F(pc25) & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects | fRecordBit))) {
+ if (isloopinvariant(pc25, loop, myvec, 0, 1))
+ break;
+ }
+ }
+
+ if (!pc25 || pc25->argCount < 1)
+ return;
+
+ if (
+ pc25->argCount < 1 ||
+ pc25->args[0].kind != PCOp_REGISTER ||
+ pc25->args[0].arg != RegClass_CRFIELD
+ )
+ return;
+
+ pc29 = pc25->block->lastPCode;
+ if (
+ !pc29 ||
+ !(pc29->flags & fIsBranch) ||
+ pc29->args[0].kind != PCOp_REGISTER ||
+ pc29->args[0].arg != RegClass_CRFIELD
+ )
+ return;
+
+ if (pc29->args[0].data.reg.reg != pc25->args[0].data.reg.reg)
+ return;
+
+ op27 = NULL;
+ for (i = 0; i < pc29->argCount; i++) {
+ if (pc29->args[i].kind == PCOp_LABEL)
+ op27 = &pc29->args[i];
+ }
+
+ if (op27) {
+ preheader21 = loop->preheader;
+
+ path20 = findswitchpath(loop, block26->nextBlock);
+ if (!path20)
+ return;
+
+ path2_24 = findswitchpath(loop, op27->data.label.label->block);
+ if (!path2_24)
+ return;
+
+ bestpath1 = NULL;
+ bestpath2 = NULL;
+ for (pathiter1 = path20; pathiter1; pathiter1 = pathiter1->next) {
+ for (pathiter2 = path2_24; pathiter2; pathiter2 = pathiter2->next) {
+ if (pathiter1->block == pathiter2->block) {
+ bestpath1 = pathiter1;
+ break;
+ }
+ }
+ if (bestpath1)
+ break;
+ bestpath2 = pathiter1;
+ }
+
+ CError_ASSERT(1192, bestpath2->block);
+
+ if (bestpath2->block->lastPCode && bestpath2->block->lastPCode->op == PC_B)
+ deletepcode(bestpath2->block->lastPCode);
+
+ while (bestpath1) {
+ for (scan20 = bestpath1->block->firstPCode; scan20; scan20 = scan20->nextPCode) {
+ if (scan20->op != PC_B)
+ appendpcode(bestpath2->block, copypcode(scan20));
+ }
+ bestpath1 = bestpath1->next;
+ }
+
+ headercopy = appendheadercopy(loop, bestpath2->block, loop->body, block26);
+ movecmptopreheader(loop, block26, pc25, pc29, op27);
+
+ if (block26->pcodeCount) {
+ if (path2_24->block->firstPCode) {
+ pc23 = path2_24->block->firstPCode;
+ for (scan20 = block26->firstPCode; scan20; scan20 = scan20->nextPCode) {
+ if (scan20->op != PC_B)
+ insertpcodebefore(pc23, copypcode(scan20));
+ }
+ } else {
+ for (scan20 = block26->firstPCode; scan20; scan20 = scan20->nextPCode) {
+ if (scan20->op != PC_B)
+ appendpcode(path2_24->block, copypcode(scan20));
+ }
+ }
+ }
+
+ op23 = NULL;
+ for (i = 0; i < loop->body->lastPCode->argCount; i++) {
+ if (loop->body->lastPCode->args[i].kind == PCOp_LABEL)
+ op23 = &loop->body->lastPCode->args[i];
+ }
+
+ CError_ASSERT(1250, op23 != NULL);
+
+ changesuccessor(loop->body, op23->data.label.label->block, path2_24->block);
+ op23->data.label.label = path2_24->block->labels;
+
+ op23 = NULL;
+ for (i = 0; i < preheader21->lastPCode->argCount; i++) {
+ if (preheader21->lastPCode->args[i].kind == PCOp_LABEL)
+ op23 = &preheader21->lastPCode->args[i];
+ }
+
+ CError_ASSERT(1267, op23 != NULL);
+
+ changesuccessor(preheader21, op23->data.label.label->block, loop->body);
+ op23->data.label.label = loop->body->labels;
+
+ op23 = NULL;
+ for (i = 0; i < loop->preheader->lastPCode->argCount; i++) {
+ if (loop->preheader->lastPCode->args[i].kind == PCOp_LABEL)
+ op23 = &loop->preheader->lastPCode->args[i];
+ }
+
+ CError_ASSERT(1284, op23 != NULL);
+
+ changesuccessor(loop->preheader, op23->data.label.label->block, headercopy);
+ op23->data.label.label = headercopy->labels;
+
+ newloop = lalloc(sizeof(Loop));
+ newloop->parent = loop->parent;
+ newloop->children = NULL;
+ newloop->nextSibling = loop->nextSibling;
+ loop->nextSibling = newloop;
+ newloop->body = loop->body;
+ newloop->preheader = NULL;
+ newloop->blocks = NULL;
+ newloop->basicInductionVars = NULL;
+ newloop->footer = NULL;
+ newloop->pc18 = NULL;
+ newloop->loopWeight = loop->loopWeight;
+
+ bitvectorinitialize(newloop->memberblocks = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
+ bitvectorinitialize(newloop->vec24 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
+ bitvectorinitialize(newloop->vec28 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
+ bitvectorinitialize(newloop->vec2C = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
+
+ removeblockfromloop(loop, newloop->body);
+ addblocktoloop(newloop, newloop->body);
+
+ bitvectorsetbit(newloop->body->blockIndex, newloop->vec24);
+ bitvectorsetbit(newloop->body->blockIndex, newloop->vec2C);
+ bitvectorsetbit(newloop->body->blockIndex, newloop->vec28);
+
+ for (scanlist = path2_24; scanlist; scanlist = scanlist->next) {
+ removeblockfromloop(loop, scanlist->block);
+ addblocktoloop(newloop, scanlist->block);
+ bitvectorsetbit(scanlist->block->blockIndex, newloop->vec2C);
+ }
+
+ newloop->preheader = NULL;
+ insertpreheaderblock(newloop);
+ analyzeloop(newloop);
+
+ loop->body = headercopy;
+
+ for (scanlist = loop->blocks; scanlist; scanlist = scanlist->next)
+ bitvectorsetbit(scanlist->block->blockIndex, loop->vec2C);
+
+ bitvectorsetbit(headercopy->blockIndex, loop->vec24);
+ analyzeloop(loop);
+
+ unswitchedinvariantcode = 1;
+ }
+}
+
+static void simpleunswitchloops(Loop *loop) {
+ while (loop) {
+ if (loop->children)
+ simpleunswitchloops(loop->children);
+ else if (!loop->x4F)
+ simpleunswitchloop(loop);
+ loop = loop->nextSibling;
+ }
+}
+
+static void moveinvariantsfromloop(Loop *loop) {
+ RegUseOrDef *list;
+ BlockList *blocklist;
+ PCode *instr;
+ PCode *nextInstr;
+ UInt32 *myvec;
+ UseOrDef *def;
+ int defID;
+ int flag;
+ PCodeBlock *block;
+
+ myvec = oalloc(4 * ((number_of_Defs + 31) >> 5));
+ do {
+ flag = 0;
+ for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
+ block = blocklist->block;
+ bitvectorcopy(myvec, usedefinfo[block->blockIndex].defvec8, number_of_Defs);
+ for (instr = block->firstPCode; instr; instr = nextInstr) {
+ nextInstr = instr->nextPCode;
+ if (!(instr->flags & fIsBranch) && instr->argCount) {
+ if (
+ !(instr->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) &&
+ isloopinvariant(instr, loop, myvec, 0, 0) &&
+ maymove(instr, loop)
+ ) {
+ moveinvariantcomputation(instr, loop);
+ flag = 1;
+ } else if (srawi_addze_isloopinvariant(instr, loop, myvec)) {
+ moveinvariantcomputation(instr, loop);
+ flag = 1;
+ }
+
+ for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) {
+ if (def->v.kind == PCOp_REGISTER) {
+ for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next)
+ bitvectorclearbit(list->id, myvec);
+ } else if (def->v.kind == PCOp_MEMORY) {
+ if (def->v.arg == PCOpMemory0) {
+ for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) {
+ if (uniquely_aliases(instr, Defs[list->id].pcode))
+ bitvectorclearbit(list->id, myvec);
+ }
+ }
+ } else {
+ CError_FATAL(1434);
+ }
+
+ bitvectorsetbit(defID, myvec);
+ }
+ }
+ }
+ }
+ } while (flag);
+}
+
+static void moveinvariantsfromloops(Loop *loop) {
+ while (loop) {
+ if (loop->children)
+ moveinvariantsfromloops(loop->children);
+ moveinvariantsfromloop(loop);
+ loop = loop->nextSibling;
+ }
+}
+
+void moveloopinvariantcode(void) {
+ unswitchedinvariantcode = 0;
+ movedloopinvariantcode = 0;
+ if (loopsinflowgraph) {
+ moveinvariantsfromloops(loopsinflowgraph);
+ simpleunswitchloops(loopsinflowgraph);
+ }
+ freeoheap();
+}
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ConstantPropagation.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ConstantPropagation.c
new file mode 100644
index 0000000..2b40453
--- /dev/null
+++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ConstantPropagation.c
@@ -0,0 +1,643 @@
+#include "compiler/ConstantPropagation.h"
+#include "compiler/Alias.h"
+#include "compiler/BitVectors.h"
+#include "compiler/CompilerTools.h"
+#include "compiler/PCode.h"
+#include "compiler/PCodeInfo.h"
+#include "compiler/RegisterInfo.h"
+#include "compiler/StackFrame.h"
+#include "compiler/UseDefChains.h"
+#include "compiler/objects.h"
+
+int propagatedconstants;
+static int changed;
+static PCode **defininginstruction;
+static PCode **vrdefininginstruction;
+
+static void computedefininginstructions(PCodeBlock *block) {
+ RegUseOrDef *list;
+ PCode *instr;
+ int i;
+
+ for (i = 0; i < used_virtual_registers[RegClass_GPR]; i++) {
+ instr = NULL;
+ for (list = reg_Defs[RegClass_GPR][i]; list; list = list->next) {
+ if (bitvectorgetbit(list->id, usedefinfo[block->blockIndex].defvec8)) {
+ if (instr == NULL) {
+ instr = Defs[list->id].pcode;
+ } else {
+ instr = NULL;
+ break;
+ }
+ }
+ }
+ defininginstruction[i] = instr;
+ }
+
+ for (i = 0; i < used_virtual_registers[RegClass_VR]; i++) {
+ instr = NULL;
+ for (list = reg_Defs[RegClass_VR][i]; list; list = list->next) {
+ if (bitvectorgetbit(list->id, usedefinfo[block->blockIndex].defvec8)) {
+ if (instr == NULL) {
+ instr = Defs[list->id].pcode;
+ } else {
+ instr = NULL;
+ break;
+ }
+ }
+ }
+ vrdefininginstruction[i] = instr;
+ }
+}
+
+static PCode *isstackoperand(PCodeArg *op, SInt16 *resultValue, SInt16 addend) {
+ PCode *instr;
+
+ if ((instr = defininginstruction[op->data.reg.reg]) && instr->op == PC_ADDI) {
+ if (
+ instr->args[2].kind == PCOp_MEMORY &&
+ (instr->args[1].data.reg.reg == _FP_ || instr->args[1].data.reg.reg == _CALLER_SP_) &&
+ instr->args[2].data.mem.obj->datatype == DLOCAL
+ )
+ {
+ if (can_add_displ_to_local(instr->args[2].data.mem.obj, addend)) {
+ *resultValue = instr->args[2].data.mem.offset;
+ return instr;
+ } else {
+ return NULL;
+ }
+ } else {
+ return NULL;
+ }
+ } else {
+ return NULL;
+ }
+}
+
+static int isconstantoperand(PCodeArg *op, SInt16 *resultValue) {
+ PCode *instr;
+
+ if (
+ (instr = defininginstruction[op->data.reg.reg]) &&
+ instr->op == PC_LI &&
+ instr->args[1].kind == PCOp_IMMEDIATE
+ )
+ {
+ *resultValue = instr->args[1].data.imm.value;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int isuint16constantoperand(PCodeArg *op, SInt16 *resultValue) {
+ PCode *instr;
+
+ if (
+ (instr = defininginstruction[op->data.reg.reg]) &&
+ instr->op == PC_LI &&
+ instr->args[1].kind == PCOp_IMMEDIATE &&
+ FITS_IN_USHORT(instr->args[1].data.imm.value)
+ )
+ {
+ *resultValue = instr->args[1].data.imm.value;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int isvectorconstantoperand(PCodeArg *op, SInt16 *resultValue, Opcode *resultNewOp) {
+ PCode *instr;
+
+ if (
+ (instr = vrdefininginstruction[op->data.reg.reg]) &&
+ (instr->op == PC_VSPLTISB || instr->op == PC_VSPLTISH || instr->op == PC_VSPLTISW) &&
+ instr->args[1].kind == PCOp_IMMEDIATE
+ )
+ {
+ *resultValue = instr->args[1].data.imm.value;
+ *resultNewOp = instr->op;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int isunsignedloadoperand(PCodeArg *op) {
+ PCode *instr;
+
+ if ((instr = defininginstruction[op->data.reg.reg])) {
+ if (instr->flags & fIsRead) {
+ if (instr->op >= PC_LHZ && instr->op <= PC_LHZUX)
+ return 2;
+ if (instr->op >= PC_LBZ && instr->op <= PC_LBZUX)
+ return 1;
+ } else if (instr->op == PC_RLWINM) {
+ int var3 = instr->args[3].data.imm.value;
+ int var4 = instr->args[4].data.imm.value;
+ if (var4 == 31) {
+ if (var3 == 24)
+ return 1;
+ if (var3 == 16)
+ return 2;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int ismaskedoperand(PCodeArg *op, UInt32 *resultMask) {
+ PCode *instr;
+ UInt32 mask;
+
+ if ((instr = defininginstruction[op->data.reg.reg]) && instr->op == PC_RLWINM) {
+ if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) {
+ mask =
+ ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) &
+ ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
+ } else {
+ mask =
+ ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) |
+ ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
+ }
+ *resultMask = mask;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int issignedloadoperand(PCodeArg *op) {
+ PCode *instr;
+
+ if ((instr = defininginstruction[op->data.reg.reg])) {
+ if (instr->flags & fIsRead) {
+ if (instr->op >= PC_LHA && instr->op <= PC_LHAUX)
+ return 2;
+ } else if (instr->op == PC_EXTSB) {
+ return 1;
+ } else if (instr->op == PC_EXTSH) {
+ return 2;
+ }
+ }
+
+ return 0;
+}
+
+static void propagateconstantstoblock(PCodeBlock *block) {
+ PCode *instr;
+ SInt16 immAddend;
+ SInt16 value1;
+ SInt16 valueU16;
+ Opcode newOpcode;
+ SInt16 value2;
+ UInt32 mask;
+ UInt32 mask2;
+ int loadSize;
+ PCodeArg *op;
+ int i;
+
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ switch (instr->op) {
+ case PC_MR:
+ if (isconstantoperand(&instr->args[1], &value1)) {
+ change_opcode(instr, PC_LI);
+ instr->args[1].kind = PCOp_IMMEDIATE;
+ instr->args[1].data.imm.value = value1;
+ instr->args[1].data.imm.obj = NULL;
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ break;
+ case PC_VMR:
+ if (isvectorconstantoperand(&instr->args[1], &value1, &newOpcode)) {
+ change_opcode(instr, newOpcode);
+ instr->args[1].kind = PCOp_IMMEDIATE;
+ instr->args[1].data.imm.value = value1;
+ instr->args[1].data.imm.obj = NULL;
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ break;
+ case PC_RLWINM:
+ if (
+ !(PCODE_FLAG_SET_F(instr) & fRecordBit) &&
+ instr->args[2].data.imm.value == 0 &&
+ instr->args[4].data.imm.value == 31
+ )
+ {
+ if (isconstantoperand(&instr->args[1], &value1)) {
+ if (
+ (instr->args[3].data.imm.value == 16 && value1 == (value1 & 0x7FFF)) ||
+ (instr->args[3].data.imm.value == 24 && value1 == (value1 & 0xFF))
+ )
+ {
+ change_opcode(instr, PC_LI);
+ instr->args[1].kind = PCOp_IMMEDIATE;
+ instr->args[1].data.imm.value = value1;
+ instr->args[1].data.imm.obj = NULL;
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ break;
+ }
+ }
+
+ loadSize = isunsignedloadoperand(&instr->args[1]);
+ if (
+ (loadSize == 2 && instr->args[3].data.imm.value <= 16) ||
+ (loadSize == 1 && instr->args[3].data.imm.value <= 24)
+ )
+ {
+ change_opcode(instr, PC_MR);
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ break;
+ }
+
+ if (ismaskedoperand(&instr->args[1], &mask)) {
+ if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) {
+ mask2 =
+ ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) &
+ ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
+ } else {
+ mask2 =
+ ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) |
+ ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
+ }
+ if (mask == (mask & mask2)) {
+ change_opcode(instr, PC_MR);
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ }
+ }
+ break;
+
+ case PC_EXTSH:
+ if (PCODE_FLAG_SET_F(instr) & fRecordBit)
+ break;
+
+ if (isconstantoperand(&instr->args[1], &value1)) {
+ change_opcode(instr, PC_LI);
+ instr->args[1].kind = PCOp_IMMEDIATE;
+ instr->args[1].data.imm.value = value1;
+ instr->args[1].data.imm.obj = NULL;
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ break;
+ }
+
+ loadSize = issignedloadoperand(&instr->args[1]);
+ if (loadSize == 1 || loadSize == 2) {
+ change_opcode(instr, PC_MR);
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ break;
+
+ case PC_EXTSB:
+ if (PCODE_FLAG_SET_F(instr) & fRecordBit)
+ break;
+
+ if (
+ isconstantoperand(&instr->args[1], &value1) &&
+ value1 >= -128 &&
+ value1 <= 127
+ )
+ {
+ change_opcode(instr, PC_LI);
+ instr->args[1].kind = PCOp_IMMEDIATE;
+ instr->args[1].data.imm.value = value1;
+ instr->args[1].data.imm.obj = NULL;
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ break;
+ }
+
+ loadSize = issignedloadoperand(&instr->args[1]);
+ if (loadSize == 1) {
+ change_opcode(instr, PC_MR);
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ break;
+
+ case PC_ADDI:
+ if (PCODE_FLAG_SET_F(instr) & fRecordBit)
+ break;
+
+ immAddend = instr->args[2].data.imm.value;
+ if (
+ isconstantoperand(&instr->args[1], &value1) &&
+ FITS_IN_SHORT(immAddend + value1)
+ )
+ {
+ change_opcode(instr, PC_LI);
+ instr->args[1].kind = PCOp_IMMEDIATE;
+ instr->args[1].data.imm.value = immAddend + value1;
+ instr->args[1].data.imm.obj = NULL;
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ break;
+
+ case PC_ADD:
+ if (PCODE_FLAG_SET_F(instr) & fRecordBit)
+ break;
+
+ if (isconstantoperand(&instr->args[2], &value1)) {
+ if (value1 == 0) {
+ change_opcode(instr, PC_MR);
+ change_num_operands(instr, 2);
+ } else {
+ change_opcode(instr, PC_ADDI);
+ instr->args[2].kind = PCOp_IMMEDIATE;
+ instr->args[2].data.imm.value = value1;
+ instr->args[2].data.imm.obj = NULL;
+ }
+ propagatedconstants = 1;
+ changed = 1;
+ immAddend = value1;
+ }
+
+ if (isconstantoperand(&instr->args[1], &value1)) {
+ if (instr->op == PC_ADDI || instr->op == PC_MR) {
+ if (FITS_IN_SHORT(immAddend + value1)) {
+ change_opcode(instr, PC_LI);
+ instr->args[1].kind = PCOp_IMMEDIATE;
+ instr->args[1].data.imm.value = immAddend + value1;
+ instr->args[1].data.imm.obj = NULL;
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ } else {
+ instr->args[1] = instr->args[2];
+ if (value1 == 0) {
+ change_opcode(instr, PC_MR);
+ change_num_operands(instr, 2);
+ } else {
+ change_opcode(instr, PC_ADDI);
+ instr->args[2].kind = PCOp_IMMEDIATE;
+ instr->args[2].data.imm.value = value1;
+ instr->args[2].data.imm.obj = NULL;
+ }
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ }
+
+ if (changed) {
+ if (instr->op == PC_MR) {
+ PCode *stackInstr;
+ if ((stackInstr = isstackoperand(&instr->args[1], &value1, 0))) {
+ change_opcode(instr, PC_ADDI);
+ instr->flags = stackInstr->flags;
+ instr->args[1] = stackInstr->args[1];
+ instr->args[2] = stackInstr->args[2];
+ change_num_operands(instr, 3);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ } else if (instr->op == PC_ADDI && instr->args[2].kind == PCOp_IMMEDIATE) {
+ PCode *stackInstr;
+ SInt16 addend = instr->args[2].data.imm.value;
+ if ((stackInstr = isstackoperand(&instr->args[1], &value1, addend))) {
+ change_opcode(instr, PC_ADDI);
+ instr->flags = stackInstr->flags;
+ instr->args[1] = stackInstr->args[1];
+ instr->args[2] = stackInstr->args[2];
+ instr->args[2].data.imm.value = value1 + addend;
+ if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000))
+ instr->alias = make_alias(instr->args[2].data.imm.obj, instr->args[2].data.imm.value, 1);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ }
+ }
+ break;
+
+ case PC_OR:
+ if (PCODE_FLAG_SET_F(instr) & fRecordBit)
+ break;
+
+ value1 = 0;
+ immAddend = 0;
+ if (isconstantoperand(&instr->args[2], &value1)) {
+ if (isuint16constantoperand(&instr->args[2], &valueU16)) {
+ if (valueU16 != 0) {
+ change_opcode(instr, PC_ORI);
+ instr->args[2].kind = PCOp_IMMEDIATE;
+ instr->args[2].data.imm.value = valueU16;
+ instr->args[2].data.imm.obj = NULL;
+ propagatedconstants = 1;
+ changed = 1;
+ } else {
+ change_opcode(instr, PC_MR);
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ value1 = valueU16;
+ } else if (value1 == 0) {
+ change_opcode(instr, PC_MR);
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ immAddend = value1;
+ }
+
+ if (isconstantoperand(&instr->args[1], &value1)) {
+ if (instr->op == PC_ORI || instr->op == PC_MR) {
+ change_opcode(instr, PC_LI);
+ instr->args[1].kind = PCOp_IMMEDIATE;
+ instr->args[1].data.imm.value = immAddend | value1;
+ instr->args[1].data.imm.obj = NULL;
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ } else if (isuint16constantoperand(&instr->args[1], &valueU16)) {
+ if (valueU16 != 0) {
+ change_opcode(instr, PC_ORI);
+ instr->args[1] = instr->args[2];
+ instr->args[2].kind = PCOp_IMMEDIATE;
+ instr->args[2].data.imm.value = valueU16;
+ instr->args[2].data.imm.obj = NULL;
+ propagatedconstants = 1;
+ changed = 1;
+ } else {
+ change_opcode(instr, PC_MR);
+ instr->args[1] = instr->args[2];
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ } else if (value1 == 0) {
+ change_opcode(instr, PC_MR);
+ instr->args[1] = instr->args[2];
+ change_num_operands(instr, 2);
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ }
+ break;
+
+ case PC_SUBF:
+ if (PCODE_FLAG_SET_F(instr) & fRecordBit)
+ break;
+
+ if (isconstantoperand(&instr->args[1], &value1) && FITS_IN_SHORT(-value1)) {
+ if (isconstantoperand(&instr->args[2], &value2) && FITS_IN_SHORT(value2 - value1)) {
+ change_opcode(instr, PC_LI);
+ instr->args[1].kind = PCOp_IMMEDIATE;
+ instr->args[1].data.imm.value = value2 - value1;
+ instr->args[1].data.imm.obj = NULL;
+ change_num_operands(instr, 2);
+ } else if (value1 == 0) {
+ change_opcode(instr, PC_MR);
+ instr->args[1] = instr->args[2];
+ change_num_operands(instr, 2);
+ } else {
+ change_opcode(instr, PC_ADDI);
+ instr->args[1] = instr->args[2];
+ instr->args[2].kind = PCOp_IMMEDIATE;
+ instr->args[2].data.imm.value = -value1;
+ instr->args[2].data.imm.obj = NULL;
+ }
+ propagatedconstants = 1;
+ changed = 1;
+ value2 = value1;
+ } else if (isconstantoperand(&instr->args[2], &value1) && FITS_IN_SHORT(-value1)) {
+ if (value1 == 0) {
+ change_opcode(instr, PC_NEG);
+ change_num_operands(instr, 2);
+ } else {
+ instr->flags = opcodeinfo[PC_SUBFIC].flags | (instr->flags & ~opcodeinfo[PC_SUBF].flags);
+ change_opcode(instr, PC_SUBFIC);
+ instr->args[2].kind = PCOp_IMMEDIATE;
+ instr->args[2].data.imm.value = value1;
+ instr->args[2].data.imm.obj = NULL;
+ instr->args[3].kind = PCOp_REGISTER;
+ instr->args[3].arg = RegClass_SPR;
+ instr->args[3].data.reg.reg = 0;
+ instr->args[3].data.reg.effect = EffectWrite;
+ change_num_operands(instr, 4);
+ }
+ propagatedconstants = 1;
+ changed = 1;
+ }
+
+ break;
+
+ case PC_LBZ:
+ case PC_LHZ:
+ case PC_LHA:
+ case PC_LWZ:
+ case PC_STB:
+ case PC_STH:
+ case PC_STW:
+ case PC_LFS:
+ case PC_LFD:
+ case PC_STFS:
+ case PC_STFD:
+ if (instr->args[2].kind == PCOp_IMMEDIATE) {
+ PCode *stackInstr;
+ SInt16 addend = instr->args[2].data.imm.value;
+
+ if ((stackInstr = isstackoperand(&instr->args[1], &value1, addend))) {
+ instr->args[1] = stackInstr->args[1];
+ instr->args[2] = stackInstr->args[2];
+ instr->args[2].data.imm.value = value1 + addend;
+ if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000))
+ instr->alias = make_alias(instr->args[2].data.imm.obj, instr->args[2].data.imm.value,
+ nbytes_loaded_or_stored_by(instr));
+ propagatedconstants = 1;
+ changed = 1;
+ }
+ }
+ break;
+
+ case PC_LBZX:
+ case PC_LHZX:
+ case PC_LHAX:
+ case PC_LWZX:
+ case PC_STBX:
+ case PC_STHX:
+ case PC_STWX:
+ case PC_LFSX:
+ case PC_LFDX:
+ case PC_STFSX:
+ case PC_STFDX:
+ if (isconstantoperand(&instr->args[2], &value1)) {
+ instr->op -= 2;
+ instr->args[2].kind = PCOp_IMMEDIATE;
+ instr->args[2].data.imm.value = value1;
+ instr->args[2].data.imm.obj = NULL;
+ propagatedconstants = 1;
+ changed = 1;
+ } else if (isconstantoperand(&instr->args[1], &value1)) {
+ instr->op -= 2;
+ instr->args[1] = instr->args[2];
+ instr->args[2].kind = PCOp_IMMEDIATE;
+ instr->args[2].data.imm.value = value1;
+ instr->args[2].data.imm.obj = NULL;
+ propagatedconstants = 1;
+ changed = 1;
+ }
+
+ break;
+ }
+
+ for (i = 0, op = instr->args; i < instr->argCount; i++, op++) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ (op->data.reg.effect & EffectWrite)
+ )
+ {
+ defininginstruction[op->data.reg.reg] = instr;
+ }
+ else if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_VR &&
+ (op->data.reg.effect & EffectWrite)
+ )
+ {
+ vrdefininginstruction[op->data.reg.reg] = instr;
+ }
+ }
+ }
+}
+
+void propagateconstants(void) {
+ PCodeBlock *block;
+ int i;
+
+ propagatedconstants = 0;
+ computeusedefchains(0);
+ defininginstruction = galloc(sizeof(PCode *) * used_virtual_registers[RegClass_GPR]);
+ vrdefininginstruction = galloc(sizeof(PCode *) * used_virtual_registers[RegClass_VR]);
+
+ do {
+ changed = 0;
+ for (i = 0; i < pcblockcount; i++) {
+ if ((block = depthfirstordering[i])) {
+ computedefininginstructions(block);
+ propagateconstantstoblock(block);
+ }
+ }
+ } while (changed);
+
+ freeoheap();
+}
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopDetection.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopDetection.c
new file mode 100644
index 0000000..6bb2d51
--- /dev/null
+++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopDetection.c
@@ -0,0 +1,885 @@
+#include "compiler/LoopDetection.h"
+#include "compiler/CFunc.h"
+#include "compiler/PCode.h"
+#include "compiler/TOC.h"
+#include "compiler/UseDefChains.h"
+#include "compiler/CompilerTools.h"
+#include "compiler/BitVectors.h"
+#include "compiler/enode.h"
+#include "compiler/objects.h"
+
+Loop *loopsinflowgraph;
+int loopdetection_nblocks;
+static UInt32 **dominators;
+static BlockList *loopheaders;
+static int nloopheaders;
+static PCodeBlock **loopstack;
+BitVector *LoopTemp;
+struct LoopList *LoopList_First;
+
+static void computedominators(void) {
+ int i;
+ PCodeBlock *block;
+ int blockCount;
+ int flag;
+ UInt32 *myvec;
+ PCLink *link;
+
+ blockCount = pcblockcount;
+ flag = 1;
+
+ dominators = oalloc(sizeof(UInt32 *) * pcblockcount);
+ for (i = 0; i < pcblockcount; i++)
+ dominators[i] = oalloc(4 * ((blockCount + 31) >> 5));
+
+ myvec = oalloc(4 * ((blockCount + 31) >> 5));
+
+ bitvectorinitialize(dominators[pcbasicblocks->blockIndex], blockCount, 0);
+ //dominators[pcbasicblocks->blockIndex][0] |= 1;
+ bitvectorsetbit(0, dominators[pcbasicblocks->blockIndex]);
+
+ for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock)
+ bitvectorinitialize(dominators[block->blockIndex], blockCount, 0xFFFFFFFF);
+
+ computedepthfirstordering();
+
+ while (flag) {
+ flag = 0;
+ for (i = 0; i < pcblockcount; i++) {
+ block = depthfirstordering[i];
+ if (block && block->blockIndex != pcbasicblocks->blockIndex) {
+ bitvectorcopy(myvec, dominators[block->predecessors->block->blockIndex], blockCount);
+ for (link = block->predecessors->nextLink; link; link = link->nextLink)
+ bitvectorintersect(myvec, dominators[link->block->blockIndex], blockCount);
+ //myvec[block->blockIndex >> 5] |= 1 << (block->blockIndex & 31);
+ bitvectorsetbit(block->blockIndex, myvec);
+
+ if (bitvectorchanged(dominators[block->blockIndex], myvec, blockCount))
+ flag = 1;
+ }
+ }
+ }
+}
+
+static BlockList *findloopheaders(void) {
+ PCodeBlock *block;
+ PCLink *link;
+ BlockList *list;
+
+ loopheaders = NULL;
+ nloopheaders = 0;
+
+ for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) {
+ for (link = block->predecessors; link; link = link->nextLink) {
+ //if ((1 << (block->blockIndex & 31)) & dominators[link->block->blockIndex][block->blockIndex >> 5])
+ if (bitvectorgetbit(block->blockIndex, dominators[link->block->blockIndex]))
+ break;
+ }
+
+ if (link) {
+ list = oalloc(sizeof(BlockList));
+ list->block = block;
+ list->next = loopheaders;
+ loopheaders = list;
+ nloopheaders++;
+ }
+ }
+
+ return loopheaders;
+}
+
+void addblocktoloop(Loop *loop, PCodeBlock *block) {
+ BlockList *list = lalloc(sizeof(BlockList));
+
+ //loop->memberblocks[block->blockIndex >> 5] |= 1 << (block->blockIndex & 31);
+ bitvectorsetbit(block->blockIndex, loop->memberblocks);
+
+ list->block = block;
+ list->next = loop->blocks;
+ loop->blocks = list;
+}
+
+static void findnaturalloop(Loop *loop) {
+ BlockList *list;
+ BlockList *list2;
+ PCLink *link;
+ PCodeBlock *block;
+ int i;
+
+ i = 0;
+ addblocktoloop(loop, loop->body);
+ for (link = loop->body->predecessors; link; link = link->nextLink) {
+ if (bitvectorgetbit(loop->body->blockIndex, dominators[link->block->blockIndex]) && link->block != loop->body) {
+ addblocktoloop(loop, link->block);
+ loopstack[i++] = link->block;
+ }
+ }
+
+ while (i) {
+ link = loopstack[--i]->predecessors;
+ while (link) {
+ if (!bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) {
+ addblocktoloop(loop, link->block);
+ loopstack[i++] = link->block;
+ }
+ link = link->nextLink;
+ }
+ }
+
+ for (list = loop->blocks; list; list = list->next) {
+ block = list->block;
+ for (link = block->successors; link; link = link->nextLink) {
+ if (!bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) {
+ bitvectorsetbit(block->blockIndex, loop->vec24);
+ break;
+ }
+ }
+ }
+
+ for (list = loop->blocks; list; list = list->next) {
+ for (list2 = loop->blocks; list2; list2 = list2->next) {
+ if (bitvectorgetbit(list2->block->blockIndex, loop->vec24) &&
+ !bitvectorgetbit(list->block->blockIndex, dominators[list2->block->blockIndex]))
+ break;
+ }
+
+ if (!list2)
+ bitvectorsetbit(list->block->blockIndex, loop->vec28);
+ }
+
+ for (list = loop->blocks; list; list = list->next) {
+ for (link = loop->body->predecessors; link; link = link->nextLink) {
+ if (bitvectorgetbit(link->block->blockIndex, loop->memberblocks) &&
+ !bitvectorgetbit(list->block->blockIndex, dominators[link->block->blockIndex]))
+ break;
+ }
+
+ if (!link)
+ bitvectorsetbit(list->block->blockIndex, loop->vec2C);
+ }
+}
+
+static void addlooptolist(Loop *loop, Loop **list) {
+ Loop **scan;
+ Loop *scanloop;
+
+ scan = list;
+ while ((scanloop = *scan)) {
+ if (bitvectorgetbit(loop->body->blockIndex, scanloop->memberblocks)) {
+ loop->parent = scanloop;
+ addlooptolist(loop, &scanloop->children);
+ return;
+ }
+
+ if (bitvectorgetbit(scanloop->body->blockIndex, loop->memberblocks)) {
+ *scan = scanloop->nextSibling;
+ scanloop->parent = loop;
+ scanloop->nextSibling = loop->children;
+ loop->children = scanloop;
+ } else {
+ scan = &scanloop->nextSibling;
+ }
+ }
+
+ loop->nextSibling = *list;
+ *list = loop;
+}
+
+static void findnaturalloops(void) {
+ Loop *loop;
+ int size;
+
+ loopdetection_nblocks = pcblockcount + 5 * nloopheaders;
+ loopstack = oalloc(sizeof(PCodeBlock *) * pcblockcount);
+ while (loopheaders) {
+ loop = lalloc(sizeof(Loop));
+ loop->parent = loop->nextSibling = loop->children = NULL;
+ loop->body = loopheaders->block;
+ loop->preheader = NULL;
+ loop->blocks = NULL;
+ loop->basicInductionVars = NULL;
+ loop->footer = NULL;
+ loop->pc18 = NULL;
+ loop->loopWeight = loop->body->loopWeight;
+
+ bitvectorinitialize(loop->memberblocks = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
+ bitvectorinitialize(loop->vec24 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
+ bitvectorinitialize(loop->vec28 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
+ bitvectorinitialize(loop->vec2C = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
+
+ findnaturalloop(loop);
+ addlooptolist(loop, &loopsinflowgraph);
+
+ loopheaders = loopheaders->next;
+ }
+}
+
+static PCodeBlock *makepreheaderblock(void) {
+ PCodeLabel *label;
+ PCodeBlock *block;
+
+ label = makepclabel();
+ block = lalloc(sizeof(PCodeBlock));
+ block->nextBlock = NULL;
+ block->prevBlock = NULL;
+ block->labels = NULL;
+ block->successors = NULL;
+ block->predecessors = NULL;
+ block->firstPCode = block->lastPCode = NULL;
+ block->pcodeCount = 0;
+ block->flags = 0;
+ block->blockIndex = pcblockcount++;
+ pclabel(block, label);
+ return block;
+}
+
+static void insertpreheaderbefore(PCodeBlock *a, PCodeBlock *b) {
+ a->nextBlock = b;
+ a->prevBlock = b->prevBlock;
+ b->prevBlock->nextBlock = a;
+ b->prevBlock = a;
+}
+
+void insertpreheaderblock(Loop *loop) {
+ PCodeBlock *preheader;
+ PCodeBlock *block29;
+ PCodeBlock *block28;
+ PCode *pcode27;
+ PCLink *link; // r26
+ PCLink **linkptr; // r25
+ PCodeLabel *newlabel; // r23
+ PCLink *innerlink;
+ PCodeBlock *block;
+ PCodeArg *arg;
+ int i;
+
+ preheader = loop->preheader = makepreheaderblock();
+ block29 = NULL;
+ block28 = loop->body;
+
+ if (!block28->labels)
+ pclabel(block28, makepclabel());
+
+ appendpcode(preheader, makepcode(PC_B, block28->labels));
+ preheader->loopWeight = loop->parent ? loop->parent->loopWeight : 1;
+
+ linkptr = &block28->predecessors;
+ while ((link = *linkptr)) {
+ if (bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) {
+ linkptr = &link->nextLink;
+ } else {
+ if (link->block->pcodeCount) {
+ pcode27 = link->block->lastPCode;
+ if (pcode27->op == PC_B) {
+ CError_ASSERT(462, pcode27->args[0].kind == PCOp_LABEL);
+ if (pcode27->args[0].data.label.label->block == block28)
+ pcode27->args[0].data.label.label = preheader->labels;
+ } else if (pcode27->op == PC_BT || pcode27->op == PC_BF) {
+ CError_ASSERT(474, pcode27->args[2].kind == PCOp_LABEL);
+ if (pcode27->args[2].data.label.label->block == block28)
+ pcode27->args[2].data.label.label = preheader->labels;
+ } else if (pcode27->op == PC_BCTR) {
+ if (pcode27->argCount > 1 && pcode27->args[1].kind == PCOp_MEMORY) {
+ Object *obj = pcode27->args[1].data.mem.obj;
+ UInt32 *array = (UInt32 *) obj->u.data.u.switchtable.data;
+ int i;
+ for (i = 0; i < obj->u.data.u.switchtable.size; i++) {
+ if (((PCodeLabel *) CTool_ResolveIndexToPointer(array[i]))->block == block28)
+ array[i] = CTool_CreateIndexFromPointer(preheader->labels);
+ }
+ } else {
+ CodeLabelList *cll;
+ for (cll = codelabellist; cll; cll = cll->next) {
+ if (cll->label->pclabel->block == block28)
+ cll->label->pclabel = preheader->labels;
+ }
+ }
+ } else {
+ CError_ASSERT(505, link->block->nextBlock == block28);
+ }
+ }
+
+ for (innerlink = link->block->successors; innerlink; innerlink = innerlink->nextLink) {
+ if (innerlink->block == block28)
+ innerlink->block = preheader;
+ }
+
+ *linkptr = link->nextLink;;
+ link->nextLink = preheader->predecessors;
+ preheader->predecessors = link;
+ }
+ }
+
+ if (!bitvectorgetbit(block28->prevBlock->blockIndex, loop->memberblocks)) {
+ insertpreheaderbefore(preheader, block28);
+
+ if (
+ (!block28->nextBlock || !bitvectorgetbit(block28->nextBlock->blockIndex, loop->memberblocks)) &&
+ block28->lastPCode &&
+ (block28->lastPCode->flags & fIsBranch) &&
+ block28->lastPCode->op != PC_BDNZ
+ ) {
+ i = block28->lastPCode->argCount;
+ arg = block28->lastPCode->args;
+ while (i && arg->kind != PCOp_LABEL) {
+ arg++;
+ i--;
+ }
+
+ if (i && arg->kind == PCOp_LABEL && arg->data.label.label->block == block28) {
+ block29 = makepreheaderblock();
+ insertpreheaderbefore(block29, block28);
+ newlabel = makepclabel();
+ pclabel(block29, newlabel);
+ arg->data.label.label = newlabel;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = block28;
+ link->nextLink = block29->predecessors;
+ block29->predecessors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = block28;
+ link->nextLink = block29->successors;
+ block29->successors = link;
+
+ for (link = block28->successors; link; link = link->nextLink) {
+ if (link->block == block28)
+ link->block = block29;
+ }
+ for (link = block28->predecessors; link; link = link->nextLink) {
+ if (link->block == block28)
+ link->block = block29;
+ }
+
+ bitvectorsetbit(block29->blockIndex, loop->vec2C);
+ addblocktoloop(loop, block29);
+ }
+ }
+ } else {
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ if (bitvectorgetbit(block->blockIndex, loop->memberblocks))
+ break;
+ }
+ insertpreheaderbefore(preheader, block);
+ }
+
+ link = lalloc(sizeof(PCLink));
+ link->block = preheader;
+ link->nextLink = block28->predecessors;
+ block28->predecessors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = block28;
+ link->nextLink = preheader->successors;
+ preheader->successors = link;
+
+ for (loop = loop->parent; loop; loop = loop->parent) {
+ addblocktoloop(loop, preheader);
+ if (bitvectorgetbit(block28->blockIndex, loop->vec28)) {
+ bitvectorsetbit(preheader->blockIndex, loop->vec28);
+ if (block29)
+ bitvectorsetbit(block29->blockIndex, loop->vec28);
+ }
+ if (bitvectorgetbit(block28->blockIndex, loop->vec2C)) {
+ bitvectorsetbit(preheader->blockIndex, loop->vec2C);
+ if (block29)
+ bitvectorsetbit(block29->blockIndex, loop->vec2C);
+ }
+ }
+}
+
+static void insertpreheaderblocks(Loop *loop) {
+ while (loop) {
+ if (loop->children)
+ insertpreheaderblocks(loop->children);
+ insertpreheaderblock(loop);
+ loop = loop->nextSibling;
+ }
+}
+
+void findloopsinflowgraph(void) {
+ loopsinflowgraph = NULL;
+ computedominators();
+ if (findloopheaders()) {
+ findnaturalloops();
+ insertpreheaderblocks(loopsinflowgraph);
+ }
+ freeoheap();
+}
+
+static int checklooplimits(SInt32 opcode, SInt32 condition, SInt32 c, SInt32 d, SInt32 addend, SInt32 *result) {
+ if (opcode == PC_BT) {
+ if (condition == 0) {
+ if (addend <= 0)
+ return 0;
+ if (c < d)
+ *result = (d - c + addend - 1) / addend;
+ else
+ *result = 0;
+ } else if (condition == 1) {
+ if (addend >= 0)
+ return 0;
+ if (c > d)
+ *result = (c - d - addend - 1) / -addend;
+ else
+ *result = 0;
+ } else {
+ return 0;
+ }
+ } else {
+ if (condition == 0) {
+ if (addend >= 0)
+ return 0;
+ if (c >= d)
+ *result = (c - d - addend) / -addend;
+ else
+ *result = 0;
+ } else if (condition == 1) {
+ if (addend <= 0)
+ return 0;
+ if (c <= d)
+ *result = (d - c + addend) / addend;
+ else
+ *result = 0;
+ } else if (c < d) {
+ if (addend <= 0)
+ return 0;
+ if ((d - c) % addend)
+ return 0;
+ *result = (d - c) / addend;
+ } else if (c > d) {
+ if (addend >= 0)
+ return 0;
+ if ((c - d) % -addend)
+ return 0;
+ *result = (c - d) / -addend;
+ } else {
+ *result = 0;
+ }
+ }
+
+ return 1;
+}
+
+static int checkunsignedlooplimits(SInt32 opcode, SInt32 condition, UInt32 c, UInt32 d, SInt32 addend, UInt32 *result) {
+ if (opcode == PC_BT) {
+ if (condition == 0) {
+ if (addend <= 0)
+ return 0;
+ if (c < d)
+ *result = (d - c + addend - 1) / addend;
+ else
+ *result = 0;
+ } else if (condition == 1) {
+ if (addend >= 0)
+ return 0;
+ if (c > d)
+ *result = (c - d - addend - 1) / -addend;
+ else
+ *result = 0;
+ } else {
+ return 0;
+ }
+ } else {
+ if (condition == 0) {
+ if (addend >= 0)
+ return 0;
+ if (c >= d)
+ *result = (c - d - addend) / -addend;
+ else
+ *result = 0;
+ } else if (condition == 1) {
+ if (addend <= 0)
+ return 0;
+ if (c <= d)
+ *result = (d - c + addend) / addend;
+ else
+ *result = 0;
+ } else if (c < d) {
+ if (addend <= 0)
+ return 0;
+ if ((d - c) % addend)
+ return 0;
+ *result = (d - c) / addend;
+ } else if (c > d) {
+ if (addend >= 0)
+ return 0;
+ if ((c - d) % -addend)
+ return 0;
+ *result = (c - d) / -addend;
+ } else {
+ *result = 0;
+ }
+ }
+
+ return (*result & 0x80000000) == 0;
+}
+
+static int checkunknownloop(int a, int b, int c, unsigned char *op) {
+ if (a == PC_BT) {
+ if (b == 0) {
+ if (c <= 0)
+ return 0;
+ *op = ELESS;
+ } else if (b == 1) {
+ if (c >= 0)
+ return 0;
+ *op = EGREATER;
+ } else {
+ return 0;
+ }
+ } else {
+ if (b == 0) {
+ if (c >= 0)
+ return 0;
+ *op = EGREATEREQU;
+ } else if (b == 1) {
+ if (c <= 0)
+ return 0;
+ *op = ELESSEQU;
+ } else if (c == 1) {
+ *op = ENOTEQU;
+ } else if (c == -1) {
+ *op = ENOTEQU;
+ } else {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static void checkcountingloop(Loop *loop) {
+ RegUseOrDef *list;
+ PCode *lastpcode;
+ PCode *prevpcode;
+ PCode *pc8;
+ PCode *check;
+ short op12;
+ short reg11;
+ SInt16 reg4;
+ short reg11b;
+ Loop *child;
+
+ if (!(lastpcode = loop->body->lastPCode))
+ return;
+ if (lastpcode->op != PC_BT && lastpcode->op != PC_BF)
+ return;
+ if (lastpcode->args[2].kind != PCOp_LABEL)
+ return;
+
+ if (!bitvectorgetbit(lastpcode->args[2].data.label.label->block->blockIndex, loop->memberblocks))
+ return;
+ if (bitvectorgetbit(loop->body->nextBlock->blockIndex, loop->memberblocks))
+ return;
+
+ reg11 = lastpcode->args[0].data.reg.reg;
+ reg4 = lastpcode->args[1].data.imm.value;
+ prevpcode = lastpcode->prevPCode;
+ if (!prevpcode)
+ return;
+
+ op12 = prevpcode->op;
+ if (op12 == PC_ADDI && prevpcode->args[2].kind == PCOp_IMMEDIATE) {
+ pc8 = prevpcode;
+ prevpcode = prevpcode->prevPCode;
+ if (!prevpcode)
+ return;
+
+ op12 = prevpcode->op;
+ if (pc8->args[0].data.reg.reg != pc8->args[1].data.reg.reg)
+ return;
+ if (op12 != PC_CMP && op12 != PC_CMPL && op12 != PC_CMPI && op12 != PC_CMPLI)
+ return;
+ if (prevpcode->args[1].data.reg.reg != pc8->args[0].data.reg.reg)
+ return;
+ if ((loop->step = pc8->args[2].data.imm.value) == 0)
+ return;
+ }
+
+ if (op12 != PC_CMP && op12 != PC_CMPL && op12 != PC_CMPI && op12 != PC_CMPLI)
+ return;
+
+ if (prevpcode->args[0].data.reg.reg != reg11)
+ return;
+
+ reg11b = prevpcode->args[1].data.reg.reg;
+ if (reg11b < 32)
+ return;
+
+ if (loop->preheader->nextBlock != lastpcode->args[2].data.label.label->block)
+ return;
+
+ if (op12 == PC_CMPI) {
+ if (prevpcode->prevPCode)
+ return;
+ loop->upper = prevpcode->args[2].data.imm.value;
+ loop->upperType = LOOP_BOUND_CONSTANT;
+ } else if (op12 == PC_CMPLI) {
+ if (prevpcode->prevPCode)
+ return;
+ loop->upper = prevpcode->args[2].data.imm.value & 0xFFFF;
+ loop->upperType = LOOP_BOUND_CONSTANT;
+ } else if (op12 == PC_CMP || op12 == PC_CMPL) {
+ if (prevpcode->prevPCode) {
+ if (
+ prevpcode->prevPCode->op == PC_LI &&
+ prevpcode->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
+ prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
+ !prevpcode->prevPCode->prevPCode
+ ) {
+ loop->upper = prevpcode->prevPCode->args[1].data.imm.value;
+ loop->upperType = LOOP_BOUND_CONSTANT;
+ } else if (
+ prevpcode->prevPCode->op == PC_LIS &&
+ prevpcode->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
+ prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
+ !prevpcode->prevPCode->prevPCode
+ ) {
+ loop->upper = prevpcode->prevPCode->args[1].data.imm.value << 16;
+ loop->upperType = LOOP_BOUND_CONSTANT;
+ } else if (
+ prevpcode->prevPCode->op == PC_ADDI &&
+ prevpcode->prevPCode->args[2].kind == PCOp_IMMEDIATE &&
+ prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
+ prevpcode->prevPCode->args[1].data.reg.reg == prevpcode->args[2].data.reg.reg &&
+ prevpcode->prevPCode->prevPCode &&
+ prevpcode->prevPCode->prevPCode->op == PC_LIS &&
+ prevpcode->prevPCode->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
+ prevpcode->prevPCode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
+ !prevpcode->prevPCode->prevPCode->prevPCode
+ ) {
+ loop->upper = prevpcode->prevPCode->args[2].data.imm.value +
+ (prevpcode->prevPCode->prevPCode->args[1].data.imm.value << 16);
+ loop->upperType = LOOP_BOUND_CONSTANT;
+ } else {
+ return;
+ }
+ } else {
+ pc8 = NULL;
+ for (list = reg_Defs[RegClass_GPR][prevpcode->args[2].data.reg.reg]; list; list = list->next) {
+ if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks))
+ return;
+ }
+ for (list = reg_Defs[RegClass_GPR][prevpcode->args[2].data.reg.reg]; list; list = list->next) {
+ if (bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].defvec8)) {
+ if (!pc8) {
+ pc8 = Defs[list->id].pcode;
+ if (
+ pc8->op == PC_LI &&
+ pc8->args[1].kind == PCOp_IMMEDIATE
+ ) {
+ loop->upper = pc8->args[1].data.imm.value;
+ loop->upperType = LOOP_BOUND_CONSTANT;
+ } else if (
+ pc8->op == PC_LIS &&
+ pc8->args[1].kind == PCOp_IMMEDIATE
+ ) {
+ loop->upper = pc8->args[1].data.imm.value << 16;
+ loop->upperType = LOOP_BOUND_CONSTANT;
+ } else if (
+ pc8->op == PC_ADDI &&
+ pc8->args[2].kind == PCOp_IMMEDIATE &&
+ pc8->args[1].data.reg.reg == prevpcode->args[2].data.reg.reg &&
+ pc8->prevPCode &&
+ pc8->prevPCode->op == PC_LIS &&
+ pc8->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
+ pc8->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg
+ ) {
+ loop->upper = pc8->args[2].data.imm.value +
+ (pc8->prevPCode->args[1].data.imm.value << 16);
+ loop->upperType = LOOP_BOUND_CONSTANT;
+ } else {
+ loop->upperType = LOOP_BOUND_VARIABLE;
+ break;
+ }
+ } else {
+ loop->upperType = LOOP_BOUND_VARIABLE;
+ break;
+ }
+ }
+ }
+
+ if (loop->upperType == LOOP_BOUND_INDETERMINATE)
+ loop->upperType = LOOP_BOUND_VARIABLE;
+ }
+ }
+
+ pc8 = NULL;
+
+ for (list = reg_Defs[RegClass_GPR][reg11b]; list; list = list->next) {
+ check = Defs[list->id].pcode;
+ if (bitvectorgetbit(check->block->blockIndex, loop->memberblocks)) {
+ if (!pc8) {
+ pc8 = check;
+ if (check->op != PC_ADDI)
+ return;
+ if (check->args[1].data.reg.reg != reg11b)
+ return;
+ if (check->args[2].kind != PCOp_IMMEDIATE)
+ return;
+ if ((loop->step = check->args[2].data.imm.value) == 0)
+ return;
+ } else {
+ return;
+ }
+ }
+ }
+
+ if (!pc8)
+ return;
+
+ if (pc8->block != prevpcode->block && !bitvectorgetbit(prevpcode->block->blockIndex, loop->vec2C))
+ return;
+
+ if (loop->children) {
+ for (child = loop->children; child; child = child->nextSibling) {
+ if (bitvectorgetbit(pc8->block->blockIndex, child->memberblocks))
+ return;
+ }
+ }
+
+ loop->pc18 = pc8;
+
+ pc8 = NULL;
+
+ for (list = reg_Defs[RegClass_GPR][reg11b]; list; list = list->next) {
+ if (bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].defvec8)) {
+ if (!pc8) {
+ pc8 = Defs[list->id].pcode;
+ if (
+ pc8->op == PC_LI &&
+ pc8->args[1].kind == PCOp_IMMEDIATE
+ ) {
+ loop->lower = pc8->args[1].data.imm.value;
+ loop->lowerType = LOOP_BOUND_CONSTANT;
+ } else if (
+ pc8->op == PC_LIS &&
+ pc8->args[1].kind == PCOp_IMMEDIATE
+ ) {
+ loop->lower = pc8->args[1].data.imm.value << 16;
+ loop->lowerType = LOOP_BOUND_CONSTANT;
+ } else if (
+ pc8->op == PC_ADDI &&
+ pc8->args[2].kind == PCOp_IMMEDIATE &&
+ pc8->args[1].data.reg.reg == reg11b &&
+ pc8->prevPCode &&
+ pc8->prevPCode->op == PC_LIS &&
+ pc8->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
+ pc8->prevPCode->args[0].data.reg.reg == reg11b
+ ) {
+ loop->lower = pc8->args[2].data.imm.value +
+ (pc8->prevPCode->args[1].data.imm.value << 16);
+ loop->lowerType = LOOP_BOUND_CONSTANT;
+ } else {
+ loop->lowerType = LOOP_BOUND_VARIABLE;
+ break;
+ }
+ } else {
+ loop->lowerType = LOOP_BOUND_INDETERMINATE;
+ break;
+ }
+ }
+ }
+
+ if (loop->lowerType == LOOP_BOUND_INDETERMINATE)
+ loop->lowerType = LOOP_BOUND_VARIABLE;
+
+ if (loop->lowerType == LOOP_BOUND_CONSTANT && loop->upperType == LOOP_BOUND_CONSTANT) {
+ if (op12 == PC_CMP || op12 == PC_CMPI) {
+ if (!checklooplimits(lastpcode->op, reg4, loop->lower, loop->upper, loop->step, &loop->iterationCount))
+ return;
+ } else {
+ if (!checkunsignedlooplimits(lastpcode->op, reg4, loop->lower, loop->upper, loop->step, (UInt32 *) &loop->iterationCount))
+ return;
+ }
+ loop->isKnownCountingLoop = 1;
+ } else if (loop->lowerType != LOOP_BOUND_INDETERMINATE || loop->upperType != LOOP_BOUND_INDETERMINATE) {
+ if (!checkunknownloop(lastpcode->op, reg4, loop->step, &loop->unknownCondition))
+ return;
+ loop->isUnknownCountingLoop = 1;
+ }
+}
+
+void analyzeForCountableLoops(Loop *loop) {
+ if (!loop)
+ return;
+
+ while (loop) {
+ if (loop->children)
+ analyzeForCountableLoops(loop->children);
+ checkcountingloop(loop);
+ loop = loop->nextSibling;
+ }
+}
+
+void analyzeloop(Loop *loop) {
+ BlockList *list;
+ PCodeBlock *block;
+ PCode *pcode;
+
+ loop->bodySize = 0;
+ loop->x4D = 0;
+ loop->x4E = 0;
+ loop->x4F = 1;
+ loop->isKnownCountingLoop = 0;
+ loop->isUnknownCountingLoop = 0;
+ loop->lowerType = LOOP_BOUND_INDETERMINATE;
+ loop->upperType = LOOP_BOUND_INDETERMINATE;
+ loop->iterationCount = -1;
+ loop->x57 = 0;
+ loop->x52 = 0;
+
+ for (list = loop->blocks; list; list = list->next) {
+ block = list->block;
+ if (!loop->children)
+ block->flags |= fPCBlockFlag2000;
+ loop->bodySize += block->pcodeCount;
+
+ if (block != loop->body) {
+ if (!block->successors || !block->predecessors || block->successors->nextLink || block->predecessors->nextLink)
+ loop->x4F = 0;
+ }
+
+ if ((block->flags & fPCBlockFlag4000) == fPCBlockFlag4000)
+ loop->x52 = 1;
+
+ for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) {
+ if (PCODE_FLAG_SET_T(pcode) & fLink)
+ loop->x4D = 1;
+
+ if (pcode->op == PC_BCTRL || pcode->op == PC_BCTR || pcode->op == PC_BCCTR || pcode->op == PC_MTCTR || pcode->op == PC_MFCTR) {
+ loop->x4E = 1;
+ } else if (pcode->flags & fIsRead) {
+ if (pcode->op == PC_LBZX || pcode->op == PC_LHZX || pcode->op == PC_LHAX || pcode->op == PC_LWZX || pcode->op == PC_LFSX || pcode->op == PC_LFDX)
+ loop->x53 = 1;
+ } else if (pcode->flags & fIsWrite) {
+ if (pcode->op == PC_STBX || pcode->op == PC_STHX || pcode->op == PC_STWX || pcode->op == PC_STFSX || pcode->op == PC_STFDX)
+ loop->x54 = 1;
+ } else {
+ if (pcode->op == PC_EIEIO || pcode->op == PC_SYNC || pcode->op == PC_ISYNC)
+ loop->x57 = 1;
+ }
+ }
+ }
+
+ if (!loop->children && !loop->x4D && loop->bodySize < 32) {
+ for (list = loop->blocks; list; list = list->next)
+ list->block->flags |= fPCBlockFlag2000;
+ }
+}
+
+static void analyzeloops(Loop *loop) {
+ while (loop) {
+ if (loop->children)
+ analyzeloops(loop->children);
+ analyzeloop(loop);
+ loop = loop->nextSibling;
+ }
+}
+
+void analyzeloopsinflowgraph(void) {
+ if (loopsinflowgraph)
+ analyzeloops(loopsinflowgraph);
+}
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c
new file mode 100644
index 0000000..b2aef1e
--- /dev/null
+++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c
@@ -0,0 +1,1553 @@
+#include "compiler/LoopOptimization.h"
+#include "compiler/CFunc.h"
+#include "compiler/CParser.h"
+#include "compiler/BitVectors.h"
+#include "compiler/CompilerTools.h"
+#include "compiler/LoopDetection.h"
+#include "compiler/PCode.h"
+#include "compiler/Registers.h"
+#include "compiler/UseDefChains.h"
+#include "compiler/objects.h"
+#include "compiler/types.h"
+
+int optimizedloops;
+int optimizedloop_full_unroll;
+int optimizedloop_trans_regs;
+static UInt32 *liveonexit;
+static UInt32 *inductionvariables;
+static int last_virtual_GPR;
+
+static int ispowerof2(SInt32 value) {
+ int bit = getbit(value);
+ return (bit > 0 && bit < 31) ? bit : 0;
+}
+
+static void insertupdateinstructions(Loop *loop) {
+ // nothing
+}
+
+static void computeliveonexit(Loop *loop) {
+ UInt32 *usevec;
+ UInt32 *defvec;
+ BlockList *blocklist;
+ RegUseOrDef *list;
+ int gpr;
+
+ bitvectorinitialize(usevec = oalloc(4 * ((number_of_Uses + 31) >> 5)), number_of_Uses, 0);
+ for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
+ if (bitvectorgetbit(blocklist->block->blockIndex, loop->vec24))
+ bitvectorunion(usevec, usedefinfo[blocklist->block->blockIndex].usevec1C, number_of_Uses);
+ }
+
+ bitvectorinitialize(defvec = oalloc(4 * ((number_of_Defs + 31) >> 5)), number_of_Defs, 0);
+ if (loop->preheader)
+ bitvectorunion(defvec, usedefinfo[loop->preheader->blockIndex].defvec8, number_of_Defs);
+
+ bitvectorinitialize(liveonexit, last_virtual_GPR, 0);
+
+ for (gpr = 32; gpr < last_virtual_GPR; gpr++) {
+ for (list = reg_Defs[RegClass_GPR][gpr]; list; list = list->next) {
+ if (bitvectorgetbit(list->id, defvec)) {
+ if (!Defs[list->id].pcode->block || bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) {
+ bitvectorsetbit(gpr, liveonexit);
+ break;
+ }
+ }
+ }
+
+ for (list = reg_Uses[RegClass_GPR][gpr]; list; list = list->next) {
+ if (bitvectorgetbit(list->id, usevec)) {
+ if (!Uses[list->id].pcode->block || !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) {
+ bitvectorsetbit(gpr, liveonexit);
+ break;
+ }
+ }
+ }
+ }
+}
+
+static void eliminateinductionvariables(Loop *loop) {
+ BlockList *blocklist;
+ PCode *instr;
+ PCode *nextInstr;
+ PCodeArg *op;
+ int i;
+
+ bitvectorinitialize(inductionvariables, last_virtual_GPR, 0xFFFFFFFF);
+
+ for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
+ for (instr = blocklist->block->firstPCode; instr; instr = instr->nextPCode) {
+ if (
+ instr->op != PC_ADDI ||
+ instr->args[0].data.reg.reg < 32 ||
+ instr->args[0].data.reg.reg >= last_virtual_GPR ||
+ instr->args[1].data.reg.reg != instr->args[0].data.reg.reg
+ ) {
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ op->data.reg.reg >= n_real_registers[RegClass_GPR] &&
+ op->data.reg.reg < last_virtual_GPR
+ )
+ bitvectorclearbit(op->data.reg.reg, inductionvariables);
+ op++;
+ }
+ }
+ }
+ }
+
+ if (loop->parent) {
+ for (instr = loop->preheader->firstPCode; instr; instr = nextInstr) {
+ nextInstr = instr->nextPCode;
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ op->data.reg.reg >= n_real_registers[RegClass_GPR] &&
+ op->data.reg.reg < last_virtual_GPR
+ )
+ bitvectorsetbit(op->data.reg.reg, liveonexit);
+ op++;
+ }
+ }
+ }
+
+ for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
+ for (instr = blocklist->block->firstPCode; instr; instr = nextInstr) {
+ nextInstr = instr->nextPCode;
+ if (
+ instr->op == PC_ADDI &&
+ instr->args[0].data.reg.reg >= 32 &&
+ instr->args[0].data.reg.reg < last_virtual_GPR &&
+ instr->args[1].data.reg.reg == instr->args[0].data.reg.reg &&
+ bitvectorgetbit(instr->args[0].data.reg.reg, inductionvariables) &&
+ !bitvectorgetbit(instr->args[0].data.reg.reg, liveonexit)
+ ) {
+ deletepcode(instr);
+ optimizedloops = 1;
+ }
+ }
+ }
+}
+
+static void skiplooptest(Loop *loop) {
+ PCodeBlock *preheader;
+ PCodeLabel *label;
+ PCLink **ptr;
+ PCLink *link;
+ PCode *lastInstr;
+ PCode *instr;
+
+ preheader = loop->preheader;
+ lastInstr = loop->body->lastPCode;
+ CError_ASSERT(340, lastInstr->args[2].kind == PCOp_LABEL);
+
+ label = lastInstr->args[2].data.label.label;
+ preheader->lastPCode->args[0].data.label.label = label;
+ preheader->successors->block = label->block;
+
+ ptr = &loop->body->predecessors;
+ while ((link = *ptr)) {
+ if (link->block == preheader) {
+ *ptr = link->nextLink;
+ break;
+ }
+ ptr = &link->nextLink;
+ }
+
+ link->nextLink = label->block->predecessors;
+ label->block->predecessors = link;
+
+ while (1) {
+ instr = loop->body->firstPCode;
+ CError_ASSERT(369, instr);
+
+ if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
+ break;
+
+ deletepcode(instr);
+ insertpcodebefore(loop->preheader->lastPCode, instr);
+ loop->bodySize--;
+ }
+
+ for (instr = instr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode)
+ insertpcodebefore(loop->preheader->lastPCode, copypcode(instr));
+}
+
+static void unrollloop(Loop *loop) {
+ PCodeBlock *newBlock;
+ int i;
+ int factor;
+ PCode *instr;
+ PCodeBlock *block;
+ PCode *firstInstr;
+ PCode *nextInstr;
+
+ for (factor = copts.unroll_factor_limit; factor > 1; factor--) {
+ if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit)
+ break;
+ }
+
+ if (factor == 1)
+ return;
+ if ((loop->iterationCount / factor) != 1 && loop->bodySize < 4)
+ return;
+
+ newBlock = oalloc(sizeof(PCodeBlock));
+ newBlock->firstPCode = newBlock->lastPCode = NULL;
+ for (i = 0; i < factor - 1; i++) {
+ firstInstr = loop->body->firstPCode;
+ CError_ASSERT(448, firstInstr);
+ if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI)
+ CError_FATAL(450);
+
+ for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode)
+ appendpcode(newBlock, copypcode(instr));
+
+ for (block = loop->preheader->successors->block; block != loop->body; block = block->successors->block) {
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->op != PC_B)
+ appendpcode(newBlock, copypcode(instr));
+ }
+ }
+ }
+
+ block = loop->body->predecessors->block;
+ for (instr = newBlock->firstPCode; instr; instr = nextInstr) {
+ nextInstr = instr->nextPCode;
+ appendpcode(block, instr);
+ }
+ loop->iterationCount /= factor;
+}
+
+void pccomputepredecessors1(PCodeBlock *block) {
+ PCLink *succ;
+ PCLink *pred;
+
+ for (succ = block->successors; succ; succ = succ->nextLink) {
+ if (!succ->block) {
+ CError_FATAL(496);
+ } else {
+ for (pred = succ->block->predecessors; pred; pred = pred->nextLink) {
+ if (pred->block == block)
+ break;
+ }
+
+ if (!pred) {
+ pred = lalloc(sizeof(PCLink));
+ pred->block = block;
+ pred->nextLink = succ->block->predecessors;
+ succ->block->predecessors = pred;
+ }
+ }
+ }
+}
+
+static PCodeBlock *insertnewpcblock(PCodeBlock *block, int loopWeight) {
+ PCodeBlock *newBlock;
+ PCodeBlock *nextBlock;
+ PCodeLabel *label;
+ PCLink *prev;
+ PCLink *link;
+
+ label = makepclabel();
+ newBlock = lalloc(sizeof(PCodeBlock));
+
+ nextBlock = block->nextBlock;
+ newBlock->nextBlock = nextBlock;
+ block->nextBlock = newBlock;
+
+ newBlock->prevBlock = block;
+ nextBlock->prevBlock = newBlock;
+
+ newBlock->labels = NULL;
+ newBlock->predecessors = newBlock->successors = NULL;
+ newBlock->firstPCode = newBlock->lastPCode = NULL;
+ newBlock->pcodeCount = 0;
+ newBlock->loopWeight = loopWeight;
+ newBlock->flags = 0;
+ newBlock->blockIndex = pcblockcount++;
+ pclabel(newBlock, label);
+
+ prev = NULL;
+ for (link = block->successors; link; link = link->nextLink) {
+ if (link->block == nextBlock) {
+ if (!prev)
+ block->successors = link->nextLink;
+ else
+ prev->nextLink = link->nextLink;
+ link->nextLink = NULL;
+ newBlock->successors = link;
+ break;
+ }
+ prev = link;
+ }
+
+ prev = NULL;
+ for (link = nextBlock->predecessors; link; link = link->nextLink) {
+ if (link->block == block) {
+ if (!prev)
+ nextBlock->predecessors = link->nextLink;
+ else
+ prev->nextLink = link->nextLink;
+ link->nextLink = NULL;
+ newBlock->predecessors = link;
+ break;
+ }
+ prev = link;
+ }
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newBlock;
+ link->nextLink = block->successors;
+ block->successors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newBlock;
+ link->nextLink = nextBlock->predecessors;
+ nextBlock->predecessors = link;
+
+ return newBlock;
+}
+
+static void unrollloopconditional(Loop *loop) {
+ PCodeBlock *lastBlock;
+ PCodeLabel *label24;
+ PCode *instr;
+ PCode *instrCopy;
+ int outputBlockCount;
+ int inputBlockCount;
+ PCodeBlock **blocks;
+ PCodeBlock **blocks2;
+ PCodeBlock **blocks3;
+ PCode *firstInstr;
+ int factor;
+ PCodeBlock *block;
+ PCLink *link;
+ PCLink *prev;
+ int i;
+ int j;
+ int k;
+ int instructionCount;
+
+ label24 = NULL;
+ outputBlockCount = 0;
+ instructionCount = 0;
+
+ for (factor = copts.unroll_factor_limit; factor > 1; factor--) {
+ if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit)
+ break;
+ }
+
+ if (factor == 1)
+ return;
+
+ inputBlockCount = 0;
+ for (block = loop->preheader->successors->block; block != loop->body; block = block->nextBlock) {
+ inputBlockCount++;
+ if (!bitvectorgetbit(block->blockIndex, loop->memberblocks))
+ instructionCount += block->pcodeCount;
+ }
+
+ if ((loop->bodySize - instructionCount - 2) < instructionCount || instructionCount > 8)
+ return;
+
+ blocks = oalloc(inputBlockCount * sizeof(PCodeBlock *));
+ blocks2 = oalloc(inputBlockCount * sizeof(PCodeBlock *));
+ blocks3 = oalloc(factor * (inputBlockCount * sizeof(PCodeBlock *)));
+ memclrw(blocks, inputBlockCount * sizeof(PCodeBlock *));
+ memclrw(blocks2, inputBlockCount * sizeof(PCodeBlock *));
+ memclrw(blocks3, factor * (inputBlockCount * sizeof(PCodeBlock *)));
+
+ block = loop->preheader->nextBlock;
+ for (i = 0; i < inputBlockCount; i++) {
+ blocks[i] = block;
+ block = block->nextBlock;
+ }
+
+ lastBlock = blocks[inputBlockCount - 1];
+ for (i = 0; i < factor - 1; i++) {
+ for (j = 0; j < inputBlockCount; j++) {
+ blocks2[j] = insertnewpcblock(lastBlock, loop->loopWeight);
+ blocks3[outputBlockCount++] = blocks2[j];
+ lastBlock = blocks2[j];
+ }
+ if (label24) {
+ pclabel(blocks2[0], label24);
+ label24 = NULL;
+ }
+
+ for (j = 0; j < inputBlockCount; j++) {
+ for (instr = blocks[j]->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->flags & fIsBranch) {
+ PCodeArg *op;
+ int opID;
+ instrCopy = copypcode(instr);
+ op = NULL;
+ for (opID = 0; opID < instr->argCount; opID++) {
+ if (instr->args[opID].kind == PCOp_LABEL) {
+ op = &instr->args[opID];
+ break;
+ }
+ }
+
+ if (op) {
+ if (op->data.label.label->block == loop->body) {
+ if (!label24)
+ label24 = makepclabel();
+ instrCopy->args[opID].data.label.label = label24;
+ } else {
+ for (k = 0; k < inputBlockCount; k++) {
+ if (op->data.label.label->block == blocks[k]) {
+ instrCopy->args[opID].data.label.label = blocks2[k]->labels;
+ break;
+ }
+ }
+ }
+ }
+
+ appendpcode(blocks2[j], instrCopy);
+ if (op)
+ pcbranch(blocks2[j], instrCopy->args[opID].data.label.label);
+ } else {
+ appendpcode(blocks2[j], copypcode(instr));
+ }
+ }
+ }
+
+ firstInstr = loop->body->firstPCode;
+ CError_ASSERT(762, firstInstr != NULL);
+ if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI)
+ CError_FATAL(764);
+
+ for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode)
+ appendpcode(blocks2[inputBlockCount - 1], copypcode(instr));
+
+ for (j = 0; j < inputBlockCount; j++) {
+ for (link = blocks[j]->successors; link; link = link->nextLink) {
+ if (link->block == blocks[j]->nextBlock)
+ break;
+ }
+
+ if (!link) {
+ for (link = blocks2[j]->successors, prev = NULL; link; link = link->nextLink) {
+ if (link->block == blocks2[j]->nextBlock) {
+ if (prev)
+ prev->nextLink = link->nextLink;
+ else
+ blocks2[j]->successors = link->nextLink;
+ } else {
+ prev = link;
+ }
+ }
+
+ for (link = blocks2[j]->nextBlock->predecessors, prev = NULL; link; link = link->nextLink) {
+ if (link->block == blocks2[j]) {
+ if (prev)
+ prev->nextLink = link->nextLink;
+ else
+ blocks2[j]->nextBlock->predecessors = link->nextLink;
+ } else {
+ prev = link;
+ }
+ }
+ }
+ }
+ }
+
+ if (label24)
+ pclabel(loop->body, label24);
+
+ for (i = 0; i < inputBlockCount; i++) {
+ for (instr = blocks[i]->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->flags & fIsBranch) {
+ PCodeArg *op;
+ int opID;
+ op = NULL;
+ for (opID = 0; opID < instr->argCount; opID++) {
+ if (instr->args[opID].kind == PCOp_LABEL) {
+ op = &instr->args[opID];
+ break;
+ }
+ }
+
+ if (op && op->data.label.label->block == loop->body) {
+ instr->args[opID].data.label.label = blocks3[0]->labels;
+
+ for (link = blocks[i]->successors, prev = NULL; link; link = link->nextLink) {
+ if (link->block == loop->body) {
+ if (prev)
+ prev->nextLink = link->nextLink;
+ else
+ blocks[i]->successors = link->nextLink;
+ } else {
+ prev = link;
+ }
+ }
+
+ for (link = loop->body->predecessors, prev = NULL; link; link = link->nextLink) {
+ if (link->block == blocks[i]) {
+ if (prev)
+ prev->nextLink = link->nextLink;
+ else
+ loop->body->predecessors = link->nextLink;
+ } else {
+ prev = link;
+ }
+ }
+
+ link = blocks[i]->successors;
+ while (link && link->block != blocks3[0])
+ link = link->nextLink;
+
+ if (!link) {
+ pcbranch(blocks[i], op->data.label.label);
+ pccomputepredecessors1(blocks[i]);
+ }
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < outputBlockCount; i++)
+ pccomputepredecessors1(blocks3[i]);
+
+ loop->iterationCount /= factor;
+}
+
+static void unrollunknownBDNZ(Loop *loop) {
+ int factor; // r29
+ PCodeBlock *preheader; // r17
+ PCodeBlock *blockA; // r23
+ PCodeBlock *postheader; // r22
+ PCodeBlock *newblock1; // r26
+ PCodeBlock *newblock2; // r31
+ PCodeBlock *newblock3; // r19
+ PCodeBlock *newblock4; // r20
+ PCodeBlock *newblock5; // r24
+ PCode *mtctr; // r21
+ int mtctr_reg; // r27
+ PCode *instr28; // r28
+ PCode *instr6; // r6
+ PCodeBlock *block;
+ PCode *instr;
+ PCodeArg *op;
+ int i;
+ int val;
+ short mtctr_shifted_reg; // r16
+ short reg25; // r25
+ PCLink *link;
+
+ if (loop->bodySize < 4)
+ return;
+
+ factor = 128;
+ while (factor > copts.unroll_factor_limit)
+ factor >>= 1;
+ while (factor > 1 && (loop->bodySize - 2) * factor > copts.unroll_instr_limit)
+ factor >>= 1;
+
+ if (factor < 2)
+ return;
+
+ preheader = loop->preheader;
+ blockA = loop->body->nextBlock;
+ postheader = preheader->nextBlock;
+
+ newblock1 = insertnewpcblock(preheader, loop->loopWeight);
+ newblock2 = insertnewpcblock(newblock1, loop->loopWeight);
+ newblock3 = insertnewpcblock(newblock2, loop->loopWeight);
+ newblock4 = insertnewpcblock(newblock3, loop->loopWeight);
+ newblock5 = insertnewpcblock(newblock4, loop->loopWeight);
+ addblocktoloop(loop, newblock1);
+ addblocktoloop(loop, newblock2);
+ addblocktoloop(loop, newblock3);
+ addblocktoloop(loop, newblock4);
+ addblocktoloop(loop, newblock5);
+
+ for (instr = preheader->lastPCode; instr; instr = instr->prevPCode) {
+ if (instr->op == PC_MTCTR) {
+ mtctr = instr;
+ mtctr_reg = instr->args[0].data.reg.reg;
+ }
+ }
+
+ if (!mtctr)
+ return;
+
+ instr28 = NULL;
+ for (block = postheader; block != loop->body; block = block->successors->block) {
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->op != PC_B) {
+ appendpcode(newblock2, copypcode(instr));
+ if (instr == loop->pc18)
+ instr28 = newblock2->lastPCode;
+ }
+ }
+ }
+
+ if (!instr28) {
+ appendpcode(newblock2, copypcode(loop->pc18));
+ instr28 = newblock2->lastPCode;
+ }
+
+ instr6 = NULL;
+ for (instr = newblock2->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr != instr28) {
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ op->data.reg.reg == loop->pc18->args[0].data.reg.reg &&
+ (op->data.reg.effect & (EffectRead | EffectWrite))
+ ) {
+ instr6 = instr;
+ break;
+ }
+ op++;
+ }
+ }
+ if (instr6)
+ break;
+ }
+
+ if (!instr6) {
+ deletepcode(instr28);
+ deletepcode(loop->pc18);
+ if (loop->footer)
+ blockA = loop->footer;
+ else
+ blockA = insertnewpcblock(loop->body, loop->loopWeight);
+ } else {
+ instr28 = NULL;
+ }
+
+ for (i = 1; i < factor; i++) {
+ for (block = postheader; block != loop->body; block = block->successors->block) {
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->op != PC_B)
+ appendpcode(newblock2, copypcode(instr));
+ }
+ }
+ }
+
+ mtctr_shifted_reg = used_virtual_registers[RegClass_GPR]++;
+ appendpcode(newblock1, makepcode(
+ PC_RLWINM,
+ mtctr_shifted_reg,
+ mtctr_reg,
+ 32 - ispowerof2(factor),
+ ispowerof2(factor),
+ 31));
+
+ appendpcode(newblock1, makepcode(PC_CMPLI, 0, mtctr_shifted_reg, 0));
+
+ if (instr28) {
+ reg25 = used_virtual_registers[RegClass_GPR]++;
+ if (loop->step == 1) {
+ instr = makepcode(PC_MR, reg25, mtctr_reg);
+ } else if (loop->step == -1) {
+ instr = makepcode(PC_NEG, reg25, mtctr_reg);
+ } else {
+ val = ispowerof2(abs(loop->step));
+ if (val > 0) {
+ instr = makepcode(PC_RLWINM, reg25, mtctr_reg, val, 0, 31 - val);
+ if (loop->step < 0) {
+ appendpcode(newblock1, instr);
+ instr = makepcode(PC_NEG, reg25, reg25);
+ }
+ } else {
+ instr = makepcode(PC_MULLI, reg25, mtctr_reg, loop->step);
+ }
+ }
+ appendpcode(newblock1, instr);
+ }
+
+ appendpcode(newblock1, makepcode(PC_MTCTR, mtctr_shifted_reg));
+ appendpcode(newblock1, makepcode(PC_BT, 0, 2, newblock5->labels));
+ pcbranch(newblock1, newblock5->labels);
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newblock1;
+ link->nextLink = newblock5->predecessors;
+ newblock5->predecessors = link;
+
+ appendpcode(newblock3, makepcode(PC_BDNZ, newblock2->labels));
+ pcbranch(newblock3, newblock2->labels);
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newblock3;
+ link->nextLink = newblock2->predecessors;
+ newblock2->predecessors = link;
+
+ appendpcode(newblock4, makepcode(PC_ANDI, mtctr_reg, mtctr_reg, factor - 1));
+ appendpcode(newblock4, makepcode(PC_BT, 0, 2, blockA->labels));
+ pcbranch(newblock4, blockA->labels);
+
+ link = lalloc(sizeof(PCLink));
+ link->block = newblock4;
+ link->nextLink = blockA->predecessors;
+ blockA->predecessors = link;
+
+ deletepcode(mtctr);
+ appendpcode(newblock5, mtctr);
+
+ if (instr28 && bitvectorgetbit(instr28->args[0].data.reg.reg, liveonexit)) {
+ instr = makepcode(PC_ADD, instr28->args[0].data.reg.reg, instr28->args[0].data.reg.reg, reg25);
+ if (blockA->firstPCode)
+ insertpcodebefore(blockA->firstPCode, instr);
+ else
+ appendpcode(blockA, instr);
+ }
+}
+
+static void deleteloop(Loop *loop) {
+ PCodeBlock *body;
+ PCodeBlock *preheader;
+ PCodeBlock *nextblock;
+ PCodeLabel *label;
+ PCode *instr;
+ PCLink **ptr;
+ PCLink *link;
+ PCodeBlock *block;
+
+ body = loop->body;
+ preheader = loop->preheader;
+ nextblock = body->nextBlock;
+ label = makepclabel();
+
+ while (1) {
+ instr = body->firstPCode;
+ CError_ASSERT(1294, instr != NULL);
+
+ if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
+ break;
+
+ deletepcode(instr);
+ insertpcodebefore(loop->preheader->lastPCode, instr);
+ loop->bodySize--;
+ }
+
+ pclabel(nextblock, label);
+ preheader->lastPCode->args[0].data.label.label = label;
+ preheader->successors->block = nextblock;
+
+ ptr = &nextblock->predecessors;
+ while ((link = *ptr)) {
+ if (link->block == body) {
+ link->block = preheader;
+ break;
+ }
+ ptr = &link->nextLink;
+ }
+
+ block = pcbasicblocks;
+ while ((nextblock = block->nextBlock)) {
+ if (bitvectorgetbit(nextblock->blockIndex, loop->memberblocks))
+ block->nextBlock = nextblock->nextBlock;
+ else
+ block = nextblock;
+ }
+}
+
+static void deleteloopheader(Loop *loop) {
+ PCLink *link;
+ PCLink **ptr;
+
+ ptr = &loop->body->successors;
+ while ((link = *ptr)) {
+ if (link->block == loop->body->lastPCode->args[2].data.label.label->block) {
+ *ptr = link->nextLink;
+ break;
+ }
+ ptr = &link->nextLink;
+ }
+
+ ptr = &loop->body->lastPCode->args[2].data.label.label->block->predecessors;
+ while ((link = *ptr)) {
+ if (link->block == loop->body) {
+ *ptr = link->nextLink;
+ break;
+ }
+ ptr = &link->nextLink;
+ }
+
+ deletepcode(loop->body->firstPCode);
+ deletepcode(loop->body->lastPCode);
+ optimizedloop_full_unroll = 1;
+}
+
+static void rewriteloopwithBDNZ(Loop *loop) {
+ PCode *instr;
+
+ if (!FITS_IN_SHORT(loop->iterationCount)) {
+ instr = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(loop->iterationCount));
+ insertpcodebefore(loop->preheader->lastPCode, instr);
+
+ if (loop->iterationCount != 0)
+ insertpcodeafter(instr, makepcode(PC_ADDI, instr->args[0].data.reg.reg, instr->args[0].data.reg.reg, 0, LOW_PART(loop->iterationCount)));
+ } else {
+ instr = makepcode(PC_LI, used_virtual_registers[RegClass_GPR]++, loop->iterationCount);
+ insertpcodebefore(loop->preheader->lastPCode, instr);
+ }
+
+ insertpcodebefore(loop->preheader->lastPCode, makepcode(PC_MTCTR, instr->args[0].data.reg.reg));
+
+ instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label);
+ deletepcode(loop->body->firstPCode);
+ deletepcode(loop->body->lastPCode);
+ appendpcode(loop->body, instr);
+
+ for (loop = loop->parent; loop; loop = loop->parent)
+ loop->x4E = 1;
+}
+
+static void rewriteunknownloopwithBDNZ(Loop *loop) {
+ SInt32 value1; // r24
+ SInt32 value2; // r30
+ Opcode branchOpcode; // r19
+ SInt32 absStep; // r28
+ int branchCondition; // r20
+ int counterReg; // r27
+ int reg1; // r26
+ int reg2; // r22
+ unsigned char mode; // r23
+ PCodeBlock *afterLoop; // r25
+ PCode *addiInstr;
+ PCode *instr;
+ PCLink *link;
+ PCLink **ptr;
+
+ absStep = abs(loop->step);
+ afterLoop = NULL;
+
+ for (addiInstr = loop->body->lastPCode->prevPCode; addiInstr->op == PC_ADDI; addiInstr = loop->body->lastPCode->prevPCode) {
+ deletepcode(addiInstr);
+ if (loop->body->lastPCode->args[2].data.label.label->block->firstPCode)
+ insertpcodebefore(loop->body->lastPCode->args[2].data.label.label->block->firstPCode, addiInstr);
+ else
+ appendpcode(loop->body->lastPCode->args[2].data.label.label->block, addiInstr);
+
+ afterLoop = insertnewpcblock(loop->body, loop->loopWeight);
+ appendpcode(afterLoop, copypcode(addiInstr));
+ loop->footer = afterLoop;
+ }
+
+ if (loop->unknownCondition == ELESS) {
+ branchOpcode = PC_BF;
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 1; // GT
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1 - loop->lower;
+ mode = 0;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 0; // LT
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep - 1 + loop->upper;
+ mode = 1;
+ } else {
+ branchCondition = 0; // LT
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1;
+ mode = 2;
+ }
+ } else if (loop->unknownCondition == ELESSEQU) {
+ branchOpcode = PC_BT;
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 0; // LT
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - loop->lower;
+ mode = 0;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 1; // GT
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep + loop->upper;
+ mode = 1;
+ } else {
+ branchCondition = 1; // GT
+ value1 = 0;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep;
+ mode = 2;
+ }
+ } else if (loop->unknownCondition == EGREATER) {
+ branchOpcode = PC_BF;
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 0; // LT
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1 + loop->lower;
+ mode = 1;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 1; // GT
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep - 1 - loop->upper;
+ mode = 0;
+ } else {
+ branchCondition = 1; // GT
+ value1 = 0;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1;
+ mode = 3;
+ }
+ } else if (loop->unknownCondition == EGREATEREQU) {
+ branchOpcode = PC_BT;
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 1; // GT
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep + loop->lower;
+ mode = 1;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ branchCondition = 0; // LT
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep - loop->upper;
+ mode = 0;
+ } else {
+ branchCondition = 0; // LT
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep;
+ mode = 3;
+ }
+ } else if (loop->unknownCondition == ENOTEQU) {
+ branchOpcode = PC_BT;
+ branchCondition = 2; // EQ
+ if (loop->step > 0) {
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1 - loop->lower;
+ mode = 0;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep - 1 + loop->upper;
+ mode = 1;
+ } else {
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1;
+ mode = 2;
+ }
+ } else {
+ if (loop->lowerType == LOOP_BOUND_CONSTANT) {
+ value1 = loop->lower;
+ reg1 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1 + loop->lower;
+ mode = 1;
+ } else if (loop->upperType == LOOP_BOUND_CONSTANT) {
+ value1 = loop->upper;
+ reg1 = addiInstr->args[1].data.reg.reg;
+ value2 = absStep - 1 - loop->upper;
+ mode = 0;
+ } else {
+ reg1 = addiInstr->args[1].data.reg.reg;
+ reg2 = addiInstr->args[2].data.reg.reg;
+ value2 = absStep - 1;
+ mode = 3;
+ }
+ }
+ }
+
+ while (1) {
+ instr = loop->body->firstPCode;
+ CError_ASSERT(1857, instr);
+
+ if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
+ break;
+
+ deletepcode(instr);
+ insertpcodebefore(loop->preheader->lastPCode, instr);
+ loop->bodySize--;
+ }
+
+ // build a value for mtctr
+ counterReg = used_virtual_registers[RegClass_GPR]++;
+
+ if (mode == 1) {
+ if (value2 == 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_NEG, counterReg, reg1));
+ } else if (FITS_IN_SHORT(value2)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBFIC, counterReg, reg1, value2));
+ } else {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_LIS, counterReg, 0, HIGH_PART(value2)));
+ if (value2 != 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
+ }
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg1, counterReg));
+ }
+ } else if (mode == 0) {
+ if (value2 == 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_MR, counterReg, reg1));
+ } else if (FITS_IN_SHORT(value2)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, reg1, 0, value2));
+ } else {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2)));
+ if (value2 != 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
+ }
+ }
+ } else if (mode == 2) {
+ if (value2 == 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg1, reg2));
+ } else if (FITS_IN_SHORT(value2)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, reg2, 0, value2));
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg1, counterReg));
+ } else {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDIS, counterReg, reg2, 0, HIGH_PART(value2)));
+ if (value2 != 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
+ }
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg1, counterReg));
+ }
+ } else {
+ if (value2 == 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg2, reg1));
+ } else {
+ if (FITS_IN_SHORT(value2)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, reg1, 0, value2));
+ } else {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2)));
+ if (value2 != 0) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
+ }
+ }
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_SUBF, counterReg, reg2, counterReg));
+ }
+ }
+
+ if (absStep > 1) {
+ unsigned char bits;
+ if ((bits = ispowerof2(absStep))) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_RLWINM, counterReg, counterReg, 32 - bits, bits, 31));
+ } else {
+ int reg = used_virtual_registers[RegClass_GPR]++;
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_LI, reg, absStep));
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_DIVWU, counterReg, counterReg, reg));
+ }
+ }
+
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_MTCTR, counterReg));
+
+ if (mode < 2) {
+ if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) {
+ if (FITS_IN_USHORT(value1)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMPLI, 0, reg1, value1));
+ } else {
+ PCode *tmp;
+ tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1));
+ insertpcodebefore(loop->preheader->lastPCode, tmp);
+
+ if (loop->iterationCount != 0)
+ insertpcodeafter(tmp,
+ makepcode(PC_ADDI,
+ tmp->args[0].data.reg.reg,
+ tmp->args[0].data.reg.reg,
+ 0, LOW_PART(value1)));
+
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMPL, 0, reg1, tmp->args[0].data.reg.reg));
+ }
+ } else {
+ if (FITS_IN_SHORT(value1)) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMPI, 0, reg1, value1));
+ } else {
+ PCode *tmp;
+ tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1));
+ insertpcodebefore(loop->preheader->lastPCode, tmp);
+
+ if (loop->iterationCount != 0)
+ insertpcodeafter(tmp,
+ makepcode(PC_ADDI,
+ tmp->args[0].data.reg.reg,
+ tmp->args[0].data.reg.reg,
+ 0, LOW_PART(value1)));
+
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMP, 0, reg1, tmp->args[0].data.reg.reg));
+ }
+ }
+ } else {
+ if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMPL, 0, reg1, reg2));
+ } else {
+ insertpcodebefore(loop->preheader->lastPCode,
+ makepcode(PC_CMP, 0, reg1, reg2));
+ }
+ }
+
+ if (!afterLoop)
+ afterLoop = loop->body->nextBlock;
+
+ instr = makepcode(branchOpcode, 0, branchCondition, afterLoop->labels);
+ deletepcode(loop->preheader->lastPCode);
+ appendpcode(loop->preheader, instr);
+
+ instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label);
+ deletepcode(loop->body->firstPCode);
+ deletepcode(loop->body->lastPCode);
+ appendpcode(loop->body, instr);
+
+ loop->preheader->successors = NULL;
+ ptr = &loop->body->predecessors;
+ while ((link = *ptr)) {
+ if (link->block == loop->preheader) {
+ *ptr = link->nextLink;
+ break;
+ }
+ ptr = &link->nextLink;
+ }
+
+ link = lalloc(sizeof(PCLink));
+ link->block = loop->preheader->nextBlock;
+ link->nextLink = loop->preheader->successors;
+ loop->preheader->successors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = loop->preheader;
+ link->nextLink = loop->preheader->nextBlock->predecessors;
+ loop->preheader->nextBlock->predecessors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = afterLoop;
+ link->nextLink = loop->preheader->successors;
+ loop->preheader->successors = link;
+
+ link = lalloc(sizeof(PCLink));
+ link->block = loop->preheader;
+ link->nextLink = afterLoop->predecessors;
+ afterLoop->predecessors = link;
+
+ for (loop = loop->parent; loop; loop = loop->parent)
+ loop->x4E = 1;
+}
+
+static void optimizeloop(Loop *loop) {
+ computeliveonexit(loop);
+
+ if (loop->x53 || loop->x54)
+ insertupdateinstructions(loop);
+
+ if (loop->isKnownCountingLoop) {
+ if (loop->iterationCount > 0) {
+ skiplooptest(loop);
+ if (!copts.optimizesize && !loop->x4D && !loop->x57) {
+ if (loop->x4F)
+ unrollloop(loop);
+ else if (!loop->x4E)
+ unrollloopconditional(loop);
+ }
+ }
+
+ if (loop->iterationCount != 0) {
+ if (loop->iterationCount == 1)
+ deleteloopheader(loop);
+ else if (!loop->x4E && !loop->x4D)
+ rewriteloopwithBDNZ(loop);
+ }
+
+ optimizedloops = 1;
+ } else if (loop->isUnknownCountingLoop && !loop->x4E && !loop->x4D) {
+ rewriteunknownloopwithBDNZ(loop);
+ if (copts.unroll_speculative && !copts.optimizesize) {
+ if (loop->x4F && !loop->x57 && !loop->x52)
+ unrollunknownBDNZ(loop);
+ }
+ optimizedloops = 1;
+ }
+
+ eliminateinductionvariables(loop);
+}
+
+typedef struct LocalArray {
+ struct LocalArray *next;
+ Object *object;
+ unsigned int invalid:1;
+ unsigned int isFloat:1;
+ unsigned int isSigned:1;
+ SInt32 elementSize;
+ SInt32 arraySize;
+ SInt32 elementCount;
+ int totalUses;
+ int elements[1];
+} LocalArray;
+
+static LocalArray *scanforlocalarrays(void) {
+ SInt32 elementCount;
+ LocalArray *head;
+ LocalArray *array;
+ ObjectList *list;
+ int i;
+ SInt32 arraySize;
+ SInt32 elementSize;
+
+ head = NULL;
+
+ for (list = locals; list; list = list->next) {
+ if (
+ list->object &&
+ !(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) &&
+ list->object->type &&
+ IS_TYPE_ARRAY(list->object->type) &&
+ TPTR_TARGET(list->object->type) &&
+ TPTR_TARGET(list->object->type)->type < TYPESTRUCT
+ ) {
+ arraySize = list->object->type->size;
+ elementSize = TPTR_TARGET(list->object->type)->size;
+ elementCount = arraySize / elementSize;
+ if (elementCount > 0 && elementCount <= 8) {
+ array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalArray));
+ array->next = head;
+ head = array;
+
+ array->object = list->object;
+ array->elementSize = elementSize;
+ array->arraySize = arraySize;
+ array->elementCount = elementCount;
+ array->totalUses = 0;
+ array->isSigned = 1;
+ array->isFloat = IS_TYPE_FLOAT(TPTR_TARGET(list->object->type));
+ array->invalid = 0;
+ array->isSigned = 1;
+ if (!array->isFloat && is_unsigned(TPTR_TARGET(list->object->type)))
+ array->isSigned = 0;
+
+ for (i = 0; i < elementCount; i++) {
+ array->elements[i] = 0;
+ }
+ }
+ }
+ }
+
+ return head;
+}
+
+static LocalArray *lookup_array_object(LocalArray *arrays, Object *object) {
+ while (arrays) {
+ if (arrays->object == object)
+ return arrays;
+ arrays = arrays->next;
+ }
+ return NULL;
+}
+
+void changearraytoregisters(void) {
+ LocalArray *arrays;
+ PCodeBlock *block;
+ PCode *instr;
+ int i;
+ PCodeArg *op;
+ LocalArray **ptr;
+ LocalArray *array;
+ int intCounter;
+ int floatCounter;
+ int reg;
+ int reg2;
+ PCode *newInstr;
+
+ arrays = NULL;
+ if ((arrays = scanforlocalarrays())) {
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (!(instr->flags & fIsBranch) && instr->argCount) {
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_MEMORY &&
+ (PCOpMemoryArg) op->arg == PCOpMemory1 &&
+ (array = lookup_array_object(arrays, op->data.mem.obj)) &&
+ !array->invalid
+ ) {
+ if (
+ (instr->flags & (fIsRead | fIsWrite)) &&
+ (op->data.mem.offset % array->elementSize) == 0 &&
+ op->data.mem.offset < array->arraySize
+ ) {
+ switch (instr->op) {
+ case PC_LBZ:
+ case PC_STB:
+ if (array->elementSize != 1)
+ array->invalid = 1;
+ break;
+ case PC_LHZ:
+ case PC_LHA:
+ case PC_STH:
+ if (array->elementSize != 2)
+ array->invalid = 1;
+ break;
+ case PC_LWZ:
+ case PC_STW:
+ if (array->elementSize != 4)
+ array->invalid = 1;
+ break;
+ case PC_LFD:
+ case PC_STFD:
+ if (array->elementSize != 8)
+ array->invalid = 1;
+ break;
+ default:
+ array->invalid = 1;
+ break;
+ }
+
+ if (!array->invalid)
+ array->elements[op->data.mem.offset / array->elementSize]++;
+ } else {
+ array->invalid = 1;
+ }
+ }
+ op++;
+ }
+ }
+ }
+ }
+
+ intCounter = 0;
+ floatCounter = 0;
+ ptr = &arrays;
+ array = *ptr;
+ while (array) {
+ if (array->invalid) {
+ *ptr = array = array->next;
+ continue;
+ }
+
+ if (array->isFloat)
+ floatCounter += array->elementCount;
+ if (!array->isFloat)
+ intCounter += array->elementCount;
+
+ for (i = 0; i < array->elementCount; i++)
+ array->totalUses += array->elements[i];
+
+ array = array->next;
+ }
+
+ if (arrays) {
+ while (intCounter > 8) {
+ LocalArray *best;
+ int score;
+ score = 0;
+ best = NULL;
+ for (array = arrays; array; array = array->next) {
+ if (!array->isFloat) {
+ if (best) {
+ if (array->totalUses < score) {
+ score = array->totalUses;
+ best = array;
+ }
+ } else {
+ best = array;
+ score = array->totalUses;
+ }
+ }
+ }
+
+ if (!best)
+ break;
+
+ if (best == arrays) {
+ arrays = best->next;
+ } else {
+ for (array = arrays; array; array = array->next) {
+ if (array->next == best) {
+ array->next = best->next;
+ break;
+ }
+ }
+ }
+
+ intCounter -= best->elementCount;
+ }
+
+ while (floatCounter > 8) {
+ LocalArray *best;
+ int score;
+ score = 0;
+ best = NULL;
+ for (array = arrays; array; array = array->next) {
+ if (array->isFloat) {
+ if (best) {
+ if (array->totalUses < score) {
+ score = array->totalUses;
+ best = array;
+ }
+ } else {
+ best = array;
+ score = array->totalUses;
+ }
+ }
+ }
+
+ if (!best)
+ break;
+
+ if (best == arrays) {
+ arrays = best->next;
+ } else {
+ for (array = arrays; array; array = array->next) {
+ if (array->next == best) {
+ array->next = best->next;
+ break;
+ }
+ }
+ }
+
+ floatCounter -= best->elementCount;
+ }
+
+ CError_ASSERT(2394, intCounter <= 8 && floatCounter <= 8);
+
+ if (!arrays)
+ return;
+
+ optimizedloop_trans_regs = 1;
+
+ for (array = arrays; array; array = array->next) {
+ for (i = 0; i < array->elementCount; i++) {
+ if (array->isFloat)
+ array->elements[i] = used_virtual_registers[RegClass_FPR]++;
+ else
+ array->elements[i] = used_virtual_registers[RegClass_GPR]++;
+ }
+ }
+
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (
+ !(instr->flags & fIsBranch) &&
+ instr->argCount &&
+ (instr->flags & (fIsRead | fIsWrite)) &&
+ instr->args[2].kind == PCOp_MEMORY &&
+ (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
+ (array = lookup_array_object(arrays, instr->args[2].data.mem.obj)) &&
+ !(array->invalid)
+ )
+ {
+ reg2 = instr->args[0].data.reg.reg;
+ reg = array->elements[instr->args[2].data.mem.offset / array->elementSize];
+ newInstr = NULL;
+ switch (instr->op) {
+ case PC_LBZ:
+ if (array->isSigned)
+ newInstr = makepcode(PC_MR, reg2, reg);
+ else
+ newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 24, 31);
+ break;
+ case PC_STB:
+ if (array->isSigned)
+ newInstr = makepcode(PC_EXTSB, reg, reg2);
+ else
+ newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 24, 31);
+ break;
+ case PC_LHZ:
+ newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 16, 31);
+ break;
+ case PC_LHA:
+ newInstr = makepcode(PC_EXTSH, reg2, reg);
+ break;
+ case PC_STH:
+ if (array->isSigned)
+ newInstr = makepcode(PC_EXTSH, reg, reg2);
+ else
+ newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 16, 31);
+ break;
+ case PC_LWZ:
+ newInstr = makepcode(PC_MR, reg2, reg);
+ break;
+ case PC_STW:
+ newInstr = makepcode(PC_MR, reg, reg2);
+ break;
+ case PC_LFD:
+ newInstr = makepcode(PC_FMR, reg2, reg);
+ break;
+ case PC_STFD:
+ newInstr = makepcode(PC_FMR, reg, reg2);
+ break;
+ default:
+ CError_FATAL(2494);
+ break;
+ }
+
+ if (newInstr) {
+ insertpcodebefore(instr, newInstr);
+ deletepcode(instr);
+ instr = newInstr;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ freeoheap();
+}
+
+static void optimizenestedloops(Loop *loop) {
+ while (loop) {
+ if (loop->children)
+ optimizenestedloops(loop->children);
+ optimizeloop(loop);
+ loop = loop->nextSibling;
+ }
+}
+
+void optimizeloops(void) {
+ optimizedloops = 0;
+ optimizedloop_full_unroll = 0;
+ optimizedloop_trans_regs = 0;
+ if (loopsinflowgraph) {
+ computeusedefchains(0);
+ last_virtual_GPR = used_virtual_registers[RegClass_GPR];
+ liveonexit = oalloc(4 * ((used_virtual_registers[RegClass_GPR] + 31) >> 5));
+ inductionvariables = oalloc(4 * ((last_virtual_GPR + 31) >> 5));
+ optimizenestedloops(loopsinflowgraph);
+ freeoheap();
+ }
+}
+
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c
new file mode 100644
index 0000000..2b68dca
--- /dev/null
+++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c
@@ -0,0 +1,751 @@
+#include "compiler/StrengthReduction.h"
+#include "compiler/BitVectors.h"
+#include "compiler/CompilerTools.h"
+#include "compiler/LoopDetection.h"
+#include "compiler/PCode.h"
+#include "compiler/PCodeInfo.h"
+#include "compiler/Registers.h"
+#include "compiler/UseDefChains.h"
+
+int strengthreducedloops;
+
+static PCode *findinitializer(Loop *loop, short reg) {
+ UInt32 *vec;
+ PCode *best;
+ RegUseOrDef *list;
+
+ vec = usedefinfo[loop->body->blockIndex].defvec8;
+ best = NULL;
+
+ for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) {
+ if (
+ !(bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) &&
+ bitvectorgetbit(list->id, vec)
+ )
+ {
+ if (best)
+ return NULL;
+ best = Defs[list->id].pcode;
+ }
+ }
+
+ if (best) {
+ if (best->op == PC_LI || best->op == PC_ADDI || best->op == PC_ADD)
+ return best;
+ }
+
+ return NULL;
+}
+
+static int isbasicinductionvariable(Loop *loop, short reg, SInt32 step) {
+ RegUseOrDef *list;
+ PCode *instr;
+
+ for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) {
+ instr = Defs[list->id].pcode;
+ if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) {
+ if (instr->op != PC_ADDI)
+ return 0;
+ if (instr->args[1].data.reg.reg != reg)
+ return 0;
+ if (instr->args[2].data.imm.value != step)
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static void addbasicinductionvariable(Loop *loop, short reg, SInt32 step) {
+ BasicInductionVar *biv;
+ RegUseOrDef *list;
+ PCode *instr;
+ InstrList *instrList;
+
+ for (biv = loop->basicInductionVars; biv; biv = biv->next) {
+ if (biv->reg == reg)
+ return;
+ }
+
+ biv = oalloc(sizeof(BasicInductionVar));
+ biv->next = loop->basicInductionVars;
+ loop->basicInductionVars = biv;
+
+ biv->loop = loop;
+ biv->inductionVars = NULL;
+ biv->instrsC = NULL;
+ biv->step = step;
+ biv->reg = reg;
+
+ for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) {
+ instr = Defs[list->id].pcode;
+ if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) {
+ instrList = oalloc(sizeof(InstrList));
+ instrList->next = biv->instrsC;
+ biv->instrsC = instrList;
+ instrList->instr = instr;
+ }
+ }
+
+ biv->initializer = findinitializer(loop, reg);
+}
+
+static void findbasicinductionvariables(Loop *loop) {
+ SInt16 step;
+ BlockList *block;
+ PCode *instr;
+ short reg;
+
+ for (block = loop->blocks; block; block = block->next) {
+ for (instr = block->block->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr->op == PC_ADDI) {
+ if (
+ (reg = instr->args[0].data.reg.reg) >= 32 &&
+ instr->args[1].data.reg.reg == reg &&
+ isbasicinductionvariable(loop, reg, step = instr->args[2].data.imm.value)
+ )
+ addbasicinductionvariable(loop, reg, step);
+ }
+ }
+ }
+}
+
+static void findallbasicinductionvariables(Loop *loop) {
+ while (loop) {
+ if (loop->children)
+ findallbasicinductionvariables(loop->children);
+ findbasicinductionvariables(loop);
+ loop = loop->nextSibling;
+ }
+}
+
+static int isinductionvariable(BasicInductionVar *biv, int useID, SInt32 *result1, short *result2, short *result3, Loop **result4) {
+ RegUseOrDef *list;
+ int counter;
+ Loop *loop;
+ Loop *scanloop;
+ PCode *instr;
+
+ instr = Uses[useID].pcode;
+ *result2 = 0;
+ *result3 = 0;
+ *result4 = NULL;
+
+ switch (instr->op) {
+ case PC_MULLI:
+ *result1 = instr->args[2].data.imm.value;
+ break;
+
+ case PC_RLWINM:
+ if (instr->args[3].data.imm.value)
+ return 0;
+ if (instr->args[2].data.imm.value > 15)
+ return 0;
+ if (instr->args[4].data.imm.value != (31 - instr->args[2].data.imm.value))
+ return 0;
+ if (PCODE_FLAG_SET_F(instr) & fRecordBit)
+ return 0;
+ *result1 = 1 << instr->args[2].data.imm.value;
+ break;
+
+ case PC_LBZX:
+ case PC_LHZX:
+ case PC_LHAX:
+ case PC_LWZX:
+ case PC_STBX:
+ case PC_STHX:
+ case PC_STWX:
+ case PC_LFSX:
+ case PC_LFDX:
+ case PC_STFSX:
+ case PC_STFDX:
+ *result2 = 0;
+ *result3 = 0;
+ if (instr->args[1].data.reg.reg == biv->reg) {
+ *result2 = 1;
+ *result3 = 2;
+ } else if (instr->args[2].data.reg.reg == biv->reg) {
+ *result2 = 2;
+ *result3 = 1;
+ }
+
+ counter = 0;
+ for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) {
+ if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks))
+ counter++;
+ }
+ if (counter)
+ return 0;
+
+ loop = biv->loop;
+ for (scanloop = loop->parent; scanloop; scanloop = scanloop->parent) {
+ counter = 0;
+ for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) {
+ if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, scanloop->memberblocks))
+ counter++;
+ }
+ if (!biv->initializer || bitvectorgetbit(biv->initializer->block->blockIndex, scanloop->memberblocks))
+ counter++;
+ if (counter)
+ break;
+ loop = scanloop;
+ }
+
+ *result4 = loop;
+ *result1 = 1;
+ return 1;
+
+ default:
+ return 0;
+ }
+
+ counter = 0;
+ for (list = reg_Defs[RegClass_GPR][instr->args[0].data.reg.reg]; list; list = list->next) {
+ if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks))
+ counter++;
+ }
+
+ return counter == 1;
+}
+
+static void addinductionvariable(BasicInductionVar *biv, PCode *instr, SInt32 val1, short val2, short val3, Loop *val4) {
+ InductionVar *iv;
+
+ iv = oalloc(sizeof(InductionVar));
+ iv->next = biv->inductionVars;
+ biv->inductionVars = iv;
+
+ iv->basicVar = biv;
+ iv->instr = instr;
+ iv->instrC = NULL;
+ iv->step = val1;
+ iv->x18 = val2;
+ iv->x1A = val3;
+ iv->someloop = val4;
+ if (instr->flags & (fIsRead | fIsWrite))
+ iv->x1C = -1;
+ else
+ iv->x1C = instr->args[0].data.reg.reg;
+ iv->x1E = -1;
+}
+
+static void findnonbasicinductionvariables(Loop *loop) {
+ BasicInductionVar *biv;
+ RegUseOrDef *list;
+ SInt32 result1;
+ short result2;
+ short result3;
+ Loop *result4;
+
+ for (biv = loop->basicInductionVars; biv; biv = biv->next) {
+ for (list = reg_Uses[RegClass_GPR][biv->reg]; list; list = list->next) {
+ if (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) {
+ if (isinductionvariable(biv, list->id, &result1, &result2, &result3, &result4))
+ addinductionvariable(biv, Uses[list->id].pcode, result1, result2, result3, result4);
+ }
+ }
+ }
+}
+
+static void findallnonbasicinductionvariables(Loop *loop) {
+ while (loop) {
+ if (loop->children)
+ findallnonbasicinductionvariables(loop->children);
+ if (loop->basicInductionVars)
+ findnonbasicinductionvariables(loop);
+ loop = loop->nextSibling;
+ }
+}
+
+static void initializeinductionvariable(InductionVar *iv) {
+ BasicInductionVar *biv; // r31
+ PCode *instr; // r27
+ PCodeBlock *preheader; // r30
+ SInt32 value30; // r30
+ short reg29; // r29
+ short reg26; // r26
+
+ biv = iv->basicVar;
+ preheader = biv->loop->preheader;
+
+ if (iv->x1A) {
+ reg29 = iv->instr->args[iv->x1A].data.reg.reg;
+ reg26 = iv->instr->args[iv->x18].data.reg.reg;
+ instr = NULL;
+
+ if (
+ biv->initializer &&
+ biv->initializer->op == PC_LI &&
+ biv->initializer->block == preheader
+ )
+ {
+ if (biv->initializer->args[1].data.imm.value == 0)
+ instr = makepcode(PC_MR, iv->x1E, reg29);
+ else if (FITS_IN_SHORT(biv->initializer->args[1].data.imm.value))
+ instr = makepcode(PC_ADDI, iv->x1E, reg29, 0, biv->initializer->args[1].data.imm.value);
+ }
+
+ if (!instr)
+ instr = makepcode(PC_ADD, iv->x1E, reg29, reg26);
+
+ if (biv->initializer && instr->op != PC_ADD)
+ insertpcodeafter(biv->initializer, instr);
+ else if (iv->someloop && iv->someloop->preheader->lastPCode)
+ insertpcodebefore(iv->someloop->preheader->lastPCode, instr);
+ else
+ insertpcodebefore(preheader->lastPCode, instr);
+
+ iv->instrC = instr;
+ iv->x1C = reg29;
+ return;
+ }
+
+ if (!biv->initializer || biv->initializer->op != PC_LI) {
+ instr = copypcode(iv->instr);
+ instr->args[0].data.reg.reg = iv->x1E;
+ insertpcodebefore(preheader->lastPCode, instr);
+ } else {
+ value30 = biv->initializer->args[1].data.imm.value * iv->step;
+ if (!FITS_IN_SHORT(value30)) {
+ instr = makepcode(PC_LIS, iv->x1E, 0, HIGH_PART(value30));
+ insertpcodeafter(biv->initializer, instr);
+ if (value30 != 0)
+ insertpcodeafter(instr, makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value30)));
+ } else {
+ instr = makepcode(PC_LI, iv->x1E, value30);
+ insertpcodeafter(biv->initializer, instr);
+ }
+ }
+}
+
+static void incrementinductionvariable(InductionVar *iv) {
+ SInt32 value;
+ BasicInductionVar *biv;
+ PCode *instr;
+ InstrList *list;
+
+ biv = iv->basicVar;
+ value = iv->step * biv->step;
+ for (list = biv->instrsC; list; list = list->next) {
+ if (!FITS_IN_SHORT(value)) {
+ instr = makepcode(PC_ADDIS, iv->x1E, iv->x1E, 0, HIGH_PART(value));
+ insertpcodeafter(list->instr, instr);
+
+ if (value != 0) {
+ instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value));
+ insertpcodeafter(list->instr->nextPCode, instr);
+ }
+ } else {
+ instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, value);
+ insertpcodeafter(list->instr, instr);
+ }
+ }
+}
+
+static void copyinductionvariable(InductionVar *iv) {
+ if (iv->instr->flags & (fIsRead | fIsWrite)) {
+ iv->instr->op -= 2;
+ iv->instr->args[1].data.reg.reg = iv->x1E;
+ iv->instr->args[2].kind = PCOp_IMMEDIATE;
+ iv->instr->args[2].data.imm.value = 0;
+ iv->instr->args[2].data.imm.obj = NULL;
+ } else {
+ insertpcodeafter(iv->instr, makepcode(PC_MR, iv->x1C, iv->x1E));
+ deletepcode(iv->instr);
+ }
+}
+
+static int testnestediv(InductionVar *iv, SInt32 step1, int reg, SInt32 step2, Loop *loop1, Loop *loop2) {
+ SInt32 addend;
+ BlockList *list;
+ PCode *instr;
+ PCodeArg *op;
+ int i;
+
+ if (iv->instrC && iv->x1C == reg) {
+ if (iv->instrC->op == PC_MR)
+ addend = 0;
+ else if (iv->instrC->op == PC_ADDI)
+ addend = iv->instrC->args[2].data.imm.value;
+ else
+ return 0;
+
+ if (step2 == (addend + (step1 * iv->step * loop2->iterationCount))) {
+ for (list = loop1->blocks; list && list->block != loop2->blocks->block; list = list->next) {
+ for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) {
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ op->data.reg.reg == reg
+ )
+ return 0;
+ op++;
+ }
+ }
+ }
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static void strengthreducenestediv(short reg, SInt32 step, PCode *initializer, Loop *loop) {
+ Loop *scanloop;
+ BasicInductionVar *biv;
+ InductionVar *iv;
+ PCode *instr;
+ PCodeArg *op;
+ int i;
+
+ for (scanloop = loop->children; scanloop; scanloop = scanloop->nextSibling) {
+ if (
+ scanloop->isKnownCountingLoop &&
+ scanloop->x4F &&
+ bitvectorgetbit(scanloop->body->blockIndex, loop->vec2C)
+ )
+ {
+ for (biv = scanloop->basicInductionVars; biv; biv = biv->next) {
+ for (iv = biv->inductionVars; iv; iv = iv->next) {
+ if (testnestediv(iv, biv->step, reg, step, loop, scanloop)) {
+ deletepcode(iv->instrC);
+ if (initializer) {
+ insertpcodeafter(initializer, iv->instrC);
+ } else if (loop->body->lastPCode) {
+ for (instr = loop->body->lastPCode; instr; instr = instr->prevPCode) {
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ (op->data.reg.effect & EffectWrite) &&
+ op->data.reg.reg == reg
+ )
+ break;
+ op++;
+ }
+ }
+
+ if (instr)
+ insertpcodeafter(instr, iv->instrC);
+ else
+ insertpcodebefore(loop->body->firstPCode, iv->instrC);
+ } else {
+ appendpcode(loop->body, iv->instrC);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void strengthreducenestedbiv(BasicInductionVar *biv) {
+ Loop *loop;
+ InductionVar *iv;
+
+ loop = biv->loop;
+ for (iv = biv->inductionVars; iv; iv = iv->next)
+ strengthreducenestediv(iv->x1E, iv->step * biv->step, iv->instrC, loop);
+
+ strengthreducenestediv(biv->reg, biv->step, biv->initializer, loop);
+}
+
+static void strengthreduceinductionvariable(BasicInductionVar *biv) {
+ int counter;
+ InductionVar *iv;
+ InductionVar *otherIv;
+ short reg;
+
+ counter = 0;
+ for (iv = biv->inductionVars; iv; iv = iv->next) {
+ if (iv->step == 1)
+ counter++;
+ }
+
+ for (iv = biv->inductionVars; iv; iv = iv->next) {
+ if (
+ (counter <= 4 || iv->step != 1) &&
+ iv->instr->block &&
+ (iv->x1A == 0 || iv->instr->args[2].kind != PCOp_IMMEDIATE)
+ )
+ {
+ if (iv->x1E == -1) {
+ iv->x1E = used_virtual_registers[RegClass_GPR]++;
+ initializeinductionvariable(iv);
+ incrementinductionvariable(iv);
+ if (iv->step == 1) {
+ reg = iv->instr->args[iv->x1A].data.reg.reg;
+ for (otherIv = iv->next; otherIv; otherIv = otherIv->next) {
+ if (otherIv->x1A != 0 && otherIv->instr->args[otherIv->x1A].data.reg.reg == reg)
+ otherIv->x1E = iv->x1E;
+ }
+ } else {
+ for (otherIv = iv->next; otherIv; otherIv = otherIv->next) {
+ if (otherIv->step == iv->step)
+ otherIv->x1E = iv->x1E;
+ }
+ }
+ }
+
+ copyinductionvariable(iv);
+ strengthreducedloops = 1;
+ }
+ }
+}
+
+#ifdef __MWERKS__
+#pragma options align=mac68k
+#endif
+typedef struct BivInit {
+ SInt32 x0;
+ short x4;
+ short x6;
+ short x8;
+ Object *xA;
+} BivInit;
+#ifdef __MWERKS__
+#pragma options align=reset
+#endif
+
+static void calc_biv_init(BasicInductionVar *biv, BivInit *init) {
+ PCode *instr;
+ PCode *scan;
+ PCodeArg *op;
+ int i;
+
+ instr = biv->initializer;
+ init->x0 = 0;
+ init->x4 = -1;
+ init->x6 = -1;
+ init->x8 = 0;
+ init->xA = NULL;
+
+ if (!biv->initializer || (biv->initializer->op != PC_ADDI && biv->initializer->op != PC_ADD))
+ return;
+
+ if (instr->op == PC_ADDI) {
+ if (instr->args[1].data.reg.reg == biv->reg) {
+ init->x0 = instr->args[2].data.imm.value;
+ for (scan = instr->prevPCode; scan; scan = scan->prevPCode) {
+ op = scan->args;
+ i = scan->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ op->data.reg.reg == biv->reg &&
+ (op->data.reg.effect & EffectWrite)
+ )
+ {
+ if (scan->op == PC_ADD) {
+ init->x4 = scan->args[1].data.reg.reg;
+ init->x6 = scan->args[2].data.reg.reg;
+ } else if (scan->op == PC_ADDI) {
+ if (scan->args[2].kind == PCOp_IMMEDIATE) {
+ init->x4 = scan->args[1].data.reg.reg;
+ init->x8 = scan->args[2].data.imm.value;
+ } else if (scan->args[2].kind == PCOp_MEMORY) {
+ init->x4 = scan->args[1].data.reg.reg;
+ init->x8 = scan->args[2].data.mem.offset;
+ init->xA = scan->args[2].data.mem.obj;
+ }
+ }
+ return;
+ }
+ op++;
+ }
+ }
+ } else {
+ if (instr->args[2].kind == PCOp_IMMEDIATE) {
+ init->x4 = instr->args[1].data.reg.reg;
+ init->x8 = instr->args[2].data.imm.value;
+ } else if (instr->args[2].kind == PCOp_MEMORY) {
+ init->x4 = instr->args[1].data.reg.reg;
+ init->x8 = instr->args[2].data.mem.offset;
+ init->xA = instr->args[2].data.mem.obj;
+ }
+ }
+ } else if (instr->op == PC_ADD) {
+ if (instr->args[1].data.reg.reg == biv->reg) {
+ init->x6 = instr->args[2].data.reg.reg;
+ for (scan = instr->prevPCode; scan; scan = scan->prevPCode) {
+ op = scan->args;
+ i = scan->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ op->data.reg.reg == biv->reg &&
+ (op->data.reg.effect & EffectWrite) &&
+ scan->op == PC_ADDI
+ )
+ {
+ if (scan->args[2].kind == PCOp_IMMEDIATE) {
+ init->x4 = scan->args[1].data.reg.reg;
+ init->x8 = scan->args[2].data.imm.value;
+ } else if (scan->args[2].kind == PCOp_MEMORY) {
+ init->x4 = scan->args[1].data.reg.reg;
+ init->x8 = scan->args[2].data.mem.offset;
+ init->xA = scan->args[2].data.mem.obj;
+ }
+ return;
+ }
+ op++;
+ }
+ }
+ } else {
+ init->x4 = instr->args[1].data.reg.reg;
+ init->x6 = instr->args[2].data.reg.reg;
+ }
+ }
+}
+
+static void combineinductionvariables(Loop *loop, BasicInductionVar *biv1, BasicInductionVar *biv2, SInt32 difference) {
+ PCode *instr1; // r31
+ int reg1; // r30
+ int reg2; // r29
+ PCode *instr2; // r24
+ PCodeBlock *nextBlock; // r24
+ BlockList *list;
+ PCodeArg *op;
+ int i;
+ PCode *instr;
+
+ instr1 = NULL;
+ instr2 = NULL;
+
+ reg1 = biv1->reg;
+ CError_ASSERT(930, reg1 >= 0);
+
+ reg2 = biv2->reg;
+ CError_ASSERT(934, reg2 >= 0);
+
+ if (!FITS_IN_SHORT(difference))
+ return;
+
+ for (list = loop->blocks; list; list = list->next) {
+ for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) {
+ if (instr1) {
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ op->arg == RegClass_GPR &&
+ op->data.reg.reg == reg1
+ )
+ return;
+ op++;
+ }
+ }
+
+ if (instr->op == PC_ADDI) {
+ if (instr->args[0].data.reg.reg == reg1) {
+ if (instr1)
+ return;
+ instr1 = instr;
+ } else if (instr->args[0].data.reg.reg == reg2) {
+ if (instr2)
+ return;
+ instr2 = instr;
+ }
+ }
+ }
+ }
+
+ if (loop->body->lastPCode->flags & fIsBranch) {
+ nextBlock = NULL;
+
+ for (i = 0; i < loop->body->lastPCode->argCount; i++) {
+ if (loop->body->lastPCode->args[i].kind == PCOp_LABEL) {
+ nextBlock = loop->body->lastPCode->args[i].data.label.label->block;
+ break;
+ }
+ }
+
+ if (!nextBlock)
+ return;
+ } else {
+ nextBlock = loop->body->nextBlock;
+ }
+
+ deletepcode(instr1);
+ instr1->args[1].data.reg.reg = reg2;
+ instr1->args[2].data.imm.value = difference;
+
+ if (nextBlock->firstPCode)
+ insertpcodebefore(nextBlock->firstPCode, instr1);
+ else
+ appendpcode(nextBlock, instr1);
+
+ biv1->reg = -1;
+ strengthreducedloops = 1;
+}
+
+static void strengthreduceinductionvariables(Loop *loop) {
+ BasicInductionVar *biv1;
+ BasicInductionVar *biv2;
+ BivInit init1;
+ BivInit init2;
+
+ for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) {
+ if (biv1->inductionVars)
+ strengthreduceinductionvariable(biv1);
+ strengthreducenestedbiv(biv1);
+ }
+
+ for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) {
+ if (biv1->reg != -1) {
+ calc_biv_init(biv1, &init1);
+ if (init1.x4 != -1) {
+ for (biv2 = loop->basicInductionVars; biv2; biv2 = biv2->next) {
+ if (biv2->reg != -1 && biv2 != biv1) {
+ calc_biv_init(biv2, &init2);
+ if (
+ init2.x4 != -1 &&
+ init1.x4 == init2.x4 &&
+ init1.x6 == init2.x6 &&
+ init1.x8 == init2.x8 &&
+ init1.xA == init2.xA &&
+ biv1->step == biv2->step
+ )
+ {
+ if (init1.x0 < init2.x0) {
+ combineinductionvariables(loop, biv2, biv1, init2.x0 - init1.x0);
+ } else {
+ combineinductionvariables(loop, biv1, biv2, init1.x0 - init2.x0);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void strengthreduceallinductionvariables(Loop *loop) {
+ while (loop) {
+ if (loop->children)
+ strengthreduceallinductionvariables(loop->children);
+ if (loop->basicInductionVars)
+ strengthreduceinductionvariables(loop);
+ loop = loop->nextSibling;
+ }
+}
+
+void strengthreduceloops(void) {
+ strengthreducedloops = 0;
+ if (loopsinflowgraph) {
+ computeusedefchains(0);
+ findallbasicinductionvariables(loopsinflowgraph);
+ findallnonbasicinductionvariables(loopsinflowgraph);
+ strengthreduceallinductionvariables(loopsinflowgraph);
+ freeoheap();
+ }
+}
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ValueNumbering.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ValueNumbering.c
new file mode 100644
index 0000000..0907fa1
--- /dev/null
+++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ValueNumbering.c
@@ -0,0 +1,661 @@
+#include "compiler/ValueNumbering.h"
+#include "compiler/Alias.h"
+#include "compiler/PCode.h"
+#include "compiler/Registers.h"
+#include "compiler/RegisterInfo.h"
+#include "compiler/CompilerTools.h"
+#include "compiler/CError.h"
+
+typedef struct ValueLabel {
+ struct ValueLabel *next;
+ PCodeArg op;
+} ValueLabel;
+
+typedef struct AvailableValue {
+ struct AvailableValue *next;
+ ValueLabel *labelled;
+ PCode *pcode;
+ int killedregister;
+ int aliasnumber;
+ int opnumbers[0];
+} AvailableValue;
+
+typedef struct RegValue {
+ int number;
+ int x4;
+ AvailableValue *available;
+} RegValue;
+
+typedef struct State {
+ void *stackedvalues;
+ int valueceiling;
+} State;
+
+typedef struct StackedValue {
+ struct StackedValue *next;
+ PCodeArg op;
+ RegValue value;
+ Alias *alias;
+ PCode *valuepcode;
+} StackedValue;
+
+int removedcommonsubexpressions;
+int nextvaluenumber;
+static AvailableValue *opvalue[428];
+static RegValue *regvalue[RegClassMax];
+static StackedValue *stackedvalues;
+static int valueceiling;
+static int moreaggressiveoptimization;
+
+static void allocatecsedatastructures(void) {
+ char rclass;
+
+ for (rclass = 0; rclass < RegClassMax; rclass++)
+ regvalue[(char) rclass] = oalloc(sizeof(RegValue) * used_virtual_registers[(char) rclass]);
+}
+
+static void initializecsedatastructures(void) {
+ RegValue *rv;
+ char rclass;
+ int i;
+
+ nextvaluenumber = 0;
+
+ for (i = 0; i < 428; i++)
+ opvalue[i] = NULL;
+
+ for (rclass = 0; rclass < RegClassMax; rclass++) {
+ rv = regvalue[(char) rclass];
+ for (i = 0; i < used_virtual_registers[(char) rclass]; i++, rv++) {
+ rv->number = nextvaluenumber++;
+ rv->x4 = 0;
+ rv->available = NULL;
+ }
+ }
+
+ initialize_alias_values();
+ stackedvalues = NULL;
+ valueceiling = 0x7FFFFFFF;
+}
+
+static void labelvalue(AvailableValue *av, PCodeArg *op) {
+ ValueLabel *label = oalloc(sizeof(ValueLabel));
+ label->op = *op;
+ label->next = av->labelled;
+ av->labelled = label;
+}
+
+static void unlabelvalue(AvailableValue *av, PCodeArg *op) {
+ ValueLabel *labelled;
+ ValueLabel **ptr;
+
+ ptr = &av->labelled;
+ while ((labelled = *ptr)) {
+ if (labelled->op.data.reg.reg == op->data.reg.reg)
+ *ptr = labelled->next;
+ else
+ ptr = &labelled->next;
+ }
+}
+
+static void stackregistervalue(PCodeArg *op, RegValue *value) {
+ StackedValue *stacked = oalloc(sizeof(StackedValue));
+ stacked->next = stackedvalues;
+ stackedvalues = stacked;
+
+ stacked->op = *op;
+ stacked->value = *value;
+}
+
+static void stackmemoryvalue(Alias *alias) {
+ StackedValue *stacked = oalloc(sizeof(StackedValue));
+ stacked->next = stackedvalues;
+ stackedvalues = stacked;
+
+ stacked->op.kind = PCOp_MEMORY;
+ stacked->alias = alias;
+ stacked->value.number = alias->valuenumber;
+ stacked->valuepcode = alias->valuepcode;
+}
+
+static void unstackvalue(StackedValue *stacked) {
+ PCodeArg *op = &stacked->op;
+ RegValue *value;
+
+ if (stacked->op.kind == PCOp_MEMORY) {
+ stacked->alias->valuenumber = stacked->value.number;
+ stacked->alias->valuepcode = stacked->valuepcode;
+ } else {
+ value = &regvalue[op->arg][op->data.reg.reg];
+ if (value->available)
+ unlabelvalue(value->available, op);
+ value->number = stacked->value.number;
+ value->x4 = stacked->value.x4;
+ value->available = stacked->value.available;
+ if (value->available)
+ labelvalue(value->available, op);
+ }
+}
+
+static int samevalue(PCodeArg *op1, PCodeArg *op2) {
+ return regvalue[op1->arg][op1->data.reg.reg].number == regvalue[op2->arg][op2->data.reg.reg].number;
+}
+
+static int killregister(PCodeArg *op) {
+ RegValue *value;
+
+ value = &regvalue[op->arg][op->data.reg.reg];
+ if (value->number < valueceiling && nextvaluenumber >= valueceiling)
+ stackregistervalue(op, value);
+
+ if (value->available)
+ unlabelvalue(value->available, op);
+ value->available = NULL;
+ value->x4 = 0;
+ return value->number = nextvaluenumber++;
+}
+
+void killmemory(Alias *alias, PCode *newValue) {
+ if (alias->valuenumber < valueceiling && nextvaluenumber >= valueceiling)
+ stackmemoryvalue(alias);
+
+ if (newValue) {
+ alias->valuenumber = regvalue[newValue->args[0].arg][newValue->args[0].data.reg.reg].number;
+ alias->valuepcode = newValue;
+ } else {
+ alias->valuenumber = nextvaluenumber++;
+ alias->valuepcode = NULL;
+ }
+}
+
+static void killspecificCSEs(short op) {
+ AvailableValue *av;
+ ValueLabel *labelled;
+
+ for (av = opvalue[op]; av; av = av->next) {
+ for (labelled = av->labelled; labelled; labelled = labelled->next)
+ killregister(&labelled->op);
+ }
+}
+
+static void killallCSEs(void) {
+ AvailableValue *av;
+ ValueLabel *labelled;
+ int i;
+
+ for (i = 0; i < 428; i++) {
+ for (av = opvalue[i]; av; av = av->next) {
+ for (labelled = av->labelled; labelled; labelled = labelled->next)
+ killregister(&labelled->op);
+ }
+ }
+}
+
+static void killregisters(PCode *pcode) {
+ PCodeArg *op;
+ int i;
+
+ for (i = 0, op = pcode->args; i < pcode->argCount; i++, op++) {
+ if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite))
+ killregister(op);
+ }
+}
+
+static void copyregister(PCodeArg *src, PCodeArg *dest) {
+ RegValue *srcvalue;
+ RegValue *destvalue;
+
+ srcvalue = &regvalue[src->arg][src->data.reg.reg];
+ destvalue = &regvalue[dest->arg][dest->data.reg.reg];
+
+ if (destvalue->number < valueceiling && nextvaluenumber >= valueceiling)
+ stackregistervalue(dest, destvalue);
+
+ if (destvalue->available)
+ unlabelvalue(destvalue->available, dest);
+ destvalue->available = srcvalue->available;
+ if (destvalue->available)
+ labelvalue(destvalue->available, dest);
+
+ destvalue->number = srcvalue->number;
+
+ if (srcvalue->x4 && srcvalue->number == regvalue[src->arg][srcvalue->x4].number)
+ destvalue->x4 = srcvalue->x4;
+ else
+ destvalue->x4 = src->data.reg.reg;
+}
+
+static int matchvalues(AvailableValue *av, PCode *match) {
+ PCodeArg *avOp;
+ PCodeArg *matchOp;
+ int i;
+
+ for (avOp = &av->pcode->args[0], matchOp = &match->args[0], i = 0; i < match->argCount; i++, avOp++, matchOp++) {
+ if (i != 0) {
+ switch (avOp->kind) {
+ case PCOp_REGISTER:
+ if (av->opnumbers[i] != regvalue[matchOp->arg][matchOp->data.reg.reg].number)
+ return 0;
+ break;
+ case PCOp_MEMORY:
+ if (matchOp->kind != PCOp_MEMORY)
+ return 0;
+ if (matchOp->data.mem.obj != avOp->data.mem.obj)
+ return 0;
+ if (matchOp->data.mem.offset != avOp->data.mem.offset)
+ return 0;
+ if ((unsigned char) matchOp->arg != (unsigned char) avOp->arg)
+ return 0;
+ break;
+ case PCOp_IMMEDIATE:
+ if (matchOp->kind != PCOp_IMMEDIATE)
+ return 0;
+ if (matchOp->data.imm.value != avOp->data.imm.value)
+ return 0;
+ break;
+ case PCOp_LABEL:
+ if (matchOp->kind != PCOp_LABEL)
+ return 0;
+ if (matchOp->data.label.label != avOp->data.label.label)
+ return 0;
+ break;
+ case PCOp_SYSREG:
+ CError_FATAL(572);
+ }
+ }
+ }
+
+ if ((match->flags & (fIsRead | fPCodeFlag20000)) && match->alias->valuenumber != av->aliasnumber)
+ return 0;
+
+ return 1;
+}
+
+static void chooselocation(AvailableValue *av, PCodeArg *op) {
+ ValueLabel *labelled;
+ PCodeArg *baseop;
+
+ baseop = &av->pcode->args[0];
+ labelled = av->labelled;
+ while (labelled) {
+ if (labelled->op.data.reg.reg == baseop->data.reg.reg) {
+ *op = labelled->op;
+ return;
+ }
+ labelled = labelled->next;
+ }
+
+ *op = av->labelled[0].op;
+}
+
+static int findavailablevalue(PCode *pcode, PCodeArg *op) {
+ AvailableValue *av;
+ PCodeArg tmp1;
+ PCodeArg tmp2;
+
+ for (av = opvalue[pcode->op]; av; av = av->next) {
+ if (av->labelled && av->pcode->flags == pcode->flags && av->pcode->argCount == pcode->argCount) {
+ if (!matchvalues(av, pcode)) {
+ if (!(pcode->flags & fCommutative))
+ continue;
+
+ tmp1 = pcode->args[1];
+ pcode->args[1] = pcode->args[2];
+ pcode->args[2] = tmp1;
+
+ if (!matchvalues(av, pcode)) {
+ tmp2 = pcode->args[1];
+ pcode->args[1] = pcode->args[2];
+ pcode->args[2] = tmp2;
+ continue;
+ }
+ }
+ chooselocation(av, op);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static void addavailablevalue(PCode *pcode) {
+ AvailableValue *av;
+ PCodeArg *op;
+ int i;
+
+ av = oalloc(sizeof(AvailableValue) + sizeof(int) * pcode->argCount);
+ av->labelled = NULL;
+ av->pcode = pcode;
+ for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
+ if (op->kind == PCOp_REGISTER)
+ av->opnumbers[i] = regvalue[op->arg][op->data.reg.reg].number;
+ }
+
+ if (pcode->flags & (fIsRead | fPCodeFlag20000))
+ av->aliasnumber = pcode->alias->valuenumber;
+
+ op = &pcode->args[0];
+ av->killedregister = killregister(op);
+ labelvalue(av, op);
+ regvalue[op->arg][op->data.reg.reg].available = av;
+ av->next = opvalue[pcode->op];
+ opvalue[pcode->op] = av;
+}
+
+static int isCSEop(PCode *pcode) {
+ PCodeArg *baseOp;
+ PCodeArg *op;
+ int i;
+
+ baseOp = &pcode->args[0];
+
+ switch (pcode->op) {
+ case PC_CMPI:
+ case PC_CMP:
+ case PC_CMPLI:
+ case PC_CMPL:
+ case PC_FCMPU:
+ case PC_FCMPO:
+ if (!moreaggressiveoptimization)
+ return 0;
+ break;
+ case PC_LI:
+ case PC_LIS:
+ if (!moreaggressiveoptimization)
+ return 0;
+ if (pcode->args[0].data.reg.reg < first_fe_temporary_register[RegClass_GPR] || pcode->args[0].data.reg.reg > last_temporary_register[RegClass_GPR])
+ return 0;
+ break;
+ }
+
+ if (PCODE_FLAG_SET_F(pcode) & (fIsVolatile | fSideEffects | fOverflow | fSetsCarry | fRecordBit))
+ return 0;
+
+ for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
+ if (op != baseOp &&
+ op->kind == baseOp->kind &&
+ op->arg == baseOp->arg &&
+ op->data.reg.reg == baseOp->data.reg.reg)
+ return 0;
+ }
+
+ return 1;
+}
+
+static int isCSEload(PCode *pcode) {
+ PCodeArg *op;
+ int i;
+ int count;
+
+ count = 0;
+ for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
+ if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite))
+ count++;
+ }
+
+ return count == 1;
+}
+
+static void registercopy(PCode *pcode) {
+ PCodeArg *op1;
+ PCodeArg *op2;
+
+ op1 = &pcode->args[0];
+ op2 = &pcode->args[1];
+ if (samevalue(op2, op1))
+ deletepcode(pcode);
+ else
+ copyregister(op2, op1);
+}
+
+static PCode *recentlystored(Alias *alias, PCodeArg *op) {
+ PCode *pc;
+ if ((pc = alias->valuepcode) && alias->valuenumber == regvalue[pc->args[0].arg][pc->args[0].data.reg.reg].number) {
+ *op = pc->args[0];
+ return pc;
+ } else {
+ return NULL;
+ }
+}
+
+static void simpleload(PCode *pcode) {
+ PCodeArg *origOp;
+ PCodeArg op;
+ PCode *rs;
+
+ origOp = &pcode->args[0];
+ if ((pcode->flags & fIsVolatile) || !isCSEload(pcode)) {
+ killregisters(pcode);
+ return;
+ }
+
+ if (findavailablevalue(pcode, &op)) {
+ if (!samevalue(origOp, &op)) {
+ insertpcodebefore(pcode, makecopyinstruction(&op, origOp));
+ copyregister(&op, origOp);
+ }
+ deletepcode(pcode);
+ removedcommonsubexpressions = 1;
+ } else if ((rs = recentlystored(pcode->alias, &op)) && can_reuse_stored_value(rs, pcode)) {
+ if (!samevalue(origOp, &op)) {
+ insertpcodebefore(pcode, makecopyinstruction(&op, origOp));
+ copyregister(&op, origOp);
+ }
+ deletepcode(pcode);
+ removedcommonsubexpressions = 1;
+ } else {
+ addavailablevalue(pcode);
+ }
+}
+
+static void simplestore(PCode *pcode) {
+ update_alias_value(pcode->alias, pcode);
+ killregisters(pcode);
+}
+
+static void pointerload(PCode *pcode) {
+ PCodeArg *op;
+ PCodeArg buf;
+
+ op = &pcode->args[0];
+
+ if ((pcode->flags & fIsVolatile) || !isCSEload(pcode)) {
+ killregisters(pcode);
+ return;
+ }
+
+ if (findavailablevalue(pcode, &buf)) {
+ if (!samevalue(op, &buf)) {
+ insertpcodebefore(pcode, makecopyinstruction(&buf, op));
+ copyregister(&buf, op);
+ }
+ deletepcode(pcode);
+ removedcommonsubexpressions = 1;
+ } else {
+ addavailablevalue(pcode);
+ }
+}
+
+static void pointerstore(PCode *pcode) {
+ update_alias_value(pcode->alias, NULL);
+ killregisters(pcode);
+}
+
+static void arithmeticop(PCode *pcode) {
+ PCodeArg *op;
+ PCodeArg buf;
+
+ op = &pcode->args[0];
+
+ if (findavailablevalue(pcode, &buf)) {
+ if (!samevalue(op, &buf)) {
+ insertpcodebefore(pcode, makecopyinstruction(&buf, op));
+ copyregister(&buf, op);
+ }
+ deletepcode(pcode);
+ removedcommonsubexpressions = 1;
+ } else {
+ addavailablevalue(pcode);
+ }
+}
+
+static void functioncall(PCode *pcode) {
+ killregisters(pcode);
+ if (coloring) {
+ update_all_alias_values();
+ killallCSEs();
+ } else {
+ update_alias_value(pcode->alias, NULL);
+ }
+}
+
+static void operatefrommemory(PCode *pcode) {
+ CError_FATAL(980);
+}
+
+static void operatetomemory(PCode *pcode) {
+ CError_FATAL(1011);
+}
+
+static void propagatecopiesto(PCode *pcode) {
+ PCodeArg *op;
+ int i;
+
+ for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
+ if (
+ op->kind == PCOp_REGISTER &&
+ (op->data.reg.effect & (EffectRead | EffectWrite | Effect8)) == EffectRead &&
+ op->data.reg.reg >= n_real_registers[op->arg] &&
+ regvalue[op->arg][op->data.reg.reg].x4 &&
+ regvalue[op->arg][op->data.reg.reg].x4 >= n_real_registers[op->arg] &&
+ regvalue[op->arg][op->data.reg.reg].number == regvalue[op->arg][regvalue[op->arg][op->data.reg.reg].x4].number
+ ) {
+ op->data.reg.reg = regvalue[op->arg][op->data.reg.reg].x4;
+ }
+ }
+}
+
+static void removecsesfrombasicblock(PCodeBlock *block) {
+ PCode *pcode;
+ PCode *next;
+
+ for (pcode = block->firstPCode; pcode; pcode = next) {
+ next = pcode->nextPCode;
+ propagatecopiesto(pcode);
+ if (pcode->flags & fIsMove) {
+ registercopy(pcode);
+ } else if ((pcode->flags & fIsCall) && (pcode->flags & (fLink | fSideEffects))) {
+ functioncall(pcode);
+ } else if (pcode->flags & fIsRead) {
+ if (pcode->flags & fIsPtrOp)
+ pointerload(pcode);
+ else
+ simpleload(pcode);
+ } else if (pcode->flags & fIsWrite) {
+ if (pcode->flags & fIsPtrOp)
+ pointerstore(pcode);
+ else
+ simplestore(pcode);
+ } else if (pcode->flags & fPCodeFlag20000) {
+ operatefrommemory(pcode);
+ } else if (pcode->flags & fPCodeFlag40000) {
+ operatetomemory(pcode);
+ } else if ((pcode->flags & fIsCSE) && isCSEop(pcode)) {
+ arithmeticop(pcode);
+ } else {
+ killregisters(pcode);
+ }
+ }
+
+ block->flags |= fVisited;
+}
+
+static void getvaluestate(State *state) {
+ state->stackedvalues = stackedvalues;
+ state->valueceiling = valueceiling;
+}
+
+static void setvaluestate(State *state) {
+ stackedvalues = state->stackedvalues;
+ valueceiling = state->valueceiling;
+}
+
+static void forkvaluestate(int number) {
+ stackedvalues = NULL;
+ valueceiling = number;
+}
+
+static void regressvaluestate(void) {
+ AvailableValue *av;
+ AvailableValue **ptr;
+ int i;
+ StackedValue *stacked;
+
+ for (i = 0; i < 428; i++) {
+ ptr = &opvalue[i];
+ while ((av = *ptr)) {
+ if (av->killedregister >= valueceiling)
+ *ptr = av->next;
+ else
+ ptr = &av->next;
+ }
+ }
+
+ for (stacked = stackedvalues; stacked; stacked = stacked->next)
+ unstackvalue(stacked);
+}
+
+static void removecsesfromextendedbasicblock(PCodeBlock *block) {
+ PCLink *succ;
+ int counter;
+ State state;
+
+ removecsesfrombasicblock(block);
+ while (block->successors &&
+ !block->successors->nextLink &&
+ block->successors->block->predecessors &&
+ !block->successors->block->predecessors->nextLink) {
+ block = block->successors->block;
+ removecsesfrombasicblock(block);
+ }
+
+ counter = 0;
+ for (succ = block->successors; succ; succ = succ->nextLink) {
+ if (!(succ->block->flags & fVisited) && succ->block->predecessors && !succ->block->predecessors->nextLink)
+ counter++;
+ }
+
+ if (counter) {
+ getvaluestate(&state);
+ forkvaluestate(nextvaluenumber);
+ for (succ = block->successors; succ; succ = succ->nextLink) {
+ if (!(succ->block->flags & fVisited) && succ->block->predecessors && !succ->block->predecessors->nextLink) {
+ removecsesfromextendedbasicblock(succ->block);
+ regressvaluestate();
+ }
+ }
+ setvaluestate(&state);
+ }
+}
+
+void removecommonsubexpressions(Object *proc, int flag) {
+ PCodeBlock *block;
+
+ moreaggressiveoptimization = flag;
+ removedcommonsubexpressions = 0;
+ gather_alias_info();
+ allocatecsedatastructures();
+
+ for (block = pcbasicblocks; block; block = block->nextBlock)
+ block->flags &= ~fVisited;
+
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ if (!(block->flags & fVisited)) {
+ initializecsedatastructures();
+ removecsesfromextendedbasicblock(block);
+ }
+ }
+
+ freeoheap();
+}
+
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/VectorArraysToRegs.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/VectorArraysToRegs.c
new file mode 100644
index 0000000..fde27f1
--- /dev/null
+++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/VectorArraysToRegs.c
@@ -0,0 +1,548 @@
+#include "compiler/VectorArraysToRegs.h"
+#include "compiler/CError.h"
+#include "compiler/CFunc.h"
+#include "compiler/BitVectors.h"
+#include "compiler/CompilerTools.h"
+#include "compiler/PCode.h"
+#include "compiler/PCodeInfo.h"
+#include "compiler/Registers.h"
+#include "compiler/UseDefChains.h"
+#include "compiler/objects.h"
+#include "compiler/types.h"
+
+typedef struct LocalVectorArray {
+ struct LocalVectorArray *next;
+ Object *object;
+ unsigned int invalid:1;
+ SInt32 arraySize;
+ SInt32 elementCount;
+ int totalUses;
+ int elements[1];
+} LocalVectorArray;
+
+typedef struct VectorPropInfo {
+ UInt32 *use;
+ UInt32 *def;
+ UInt32 *in;
+ UInt32 *out;
+} VectorPropInfo;
+
+typedef struct ADDI {
+ PCode *instr;
+ RegUseOrDef *list;
+} ADDI;
+
+static int number_of_ADDIs;
+static ADDI *ADDIs;
+static VectorPropInfo *vectorpropinfo;
+static int *naddsinblock;
+static int *firstaddinblock;
+static Boolean converted_arrays;
+
+static LocalVectorArray *scanforlocalvectorarrays(void) {
+ SInt32 elementCount;
+ LocalVectorArray *head;
+ LocalVectorArray *array;
+ ObjectList *list;
+ int i;
+ SInt32 arraySize;
+
+ head = NULL;
+
+ for (list = locals; list; list = list->next) {
+ if (
+ list->object &&
+ !(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) &&
+ list->object->type &&
+ IS_TYPE_ARRAY(list->object->type) &&
+ IS_TYPE_VECTOR(TPTR_TARGET(list->object->type))
+ ) {
+ arraySize = list->object->type->size;
+ elementCount = arraySize / 16;
+ if (elementCount > 0 && elementCount <= 8) {
+ array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalVectorArray));
+ array->next = head;
+ head = array;
+
+ array->object = list->object;
+ array->arraySize = arraySize;
+ array->elementCount = elementCount;
+ array->totalUses = 0;
+ array->invalid = 0;
+
+ for (i = 0; i < elementCount; i++) {
+ array->elements[i] = 0;
+ }
+ }
+ }
+ }
+
+ return head;
+}
+
+static LocalVectorArray *lookup_vector_array_object(LocalVectorArray *arrays, Object *object) {
+ while (arrays) {
+ if (arrays->object == object)
+ return arrays;
+ arrays = arrays->next;
+ }
+ return NULL;
+}
+
+static void scaninstructions(LocalVectorArray *arrays) {
+ PCodeBlock *block;
+ PCode *instr;
+ int counter;
+ int i;
+ PCodeArg *op;
+ LocalVectorArray *array;
+ int element;
+
+ naddsinblock = oalloc(sizeof(int) * pcblockcount);
+ memclrw(naddsinblock, sizeof(int) * pcblockcount);
+
+ firstaddinblock = oalloc(sizeof(int) * pcblockcount);
+ memclrw(firstaddinblock, sizeof(int) * pcblockcount);
+
+ number_of_ADDIs = 0;
+
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ firstaddinblock[block->blockIndex] = number_of_ADDIs;
+ counter = 0;
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (!(instr->flags & fIsBranch) && instr->argCount) {
+ op = instr->args;
+ i = instr->argCount;
+ while (i--) {
+ if (
+ op->kind == PCOp_MEMORY &&
+ (PCOpMemoryArg) op->arg == PCOpMemory1 &&
+ (array = lookup_vector_array_object(arrays, op->data.mem.obj)) &&
+ !array->invalid
+ )
+ {
+ if (instr->op != PC_ADDI) {
+ array->invalid = 1;
+ } else if (instr->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) {
+ array->invalid = 1;
+ } else {
+ number_of_ADDIs++;
+ counter++;
+ }
+
+ if (!array->invalid) {
+ element = op->data.mem.offset / 16;
+ if (element < array->elementCount)
+ array->elements[element]++;
+ else
+ array->invalid = 1;
+ }
+ }
+ op++;
+ }
+ }
+ }
+ naddsinblock[block->blockIndex] = counter;
+ }
+}
+
+static void computeaddilist(LocalVectorArray *arrays) {
+ PCodeBlock *block;
+ PCode *instr;
+ RegUseOrDef *list;
+ ADDI *addi;
+ UInt32 *vec;
+ LocalVectorArray *array;
+ UseOrDef *def;
+ int defID;
+ UseOrDef *use;
+ int useID;
+
+ ADDIs = oalloc(sizeof(ADDI) * number_of_ADDIs);
+ memclrw(ADDIs, sizeof(ADDI) * number_of_ADDIs);
+
+ vec = oalloc(4 * ((number_of_Uses + 31) >> 5));
+
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ if (naddsinblock[block->blockIndex]) {
+ bitvectorcopy(vec, usedefinfo[block->blockIndex].usevec1C, number_of_Uses);
+ addi = &ADDIs[firstaddinblock[block->blockIndex] + naddsinblock[block->blockIndex] - 1];
+ for (instr = block->lastPCode; instr; instr = instr->prevPCode) {
+ if (!(instr->flags & fIsBranch) && instr->argCount) {
+ int reg; // r18
+ if (
+ instr->op == PC_ADDI &&
+ (reg = instr->args[0].data.reg.reg) >= n_real_registers[RegClass_GPR] &&
+ instr->args[2].kind == PCOp_MEMORY &&
+ (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
+ (array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) &&
+ !array->invalid
+ )
+ {
+ addi->instr = instr;
+ addi->list = NULL;
+ for (list = reg_Uses[RegClass_GPR][reg]; list; list = list->next) {
+ if (bitvectorgetbit(list->id, vec)) {
+ RegUseOrDef *node = oalloc(sizeof(RegUseOrDef));
+ node->id = list->id;
+ node->next = addi->list;
+ addi->list = node;
+ }
+ }
+ addi--;
+ }
+
+ for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) {
+ if (def->v.kind == PCOp_REGISTER) {
+ RegUseOrDef *l;
+ for (l = reg_Uses[def->v.arg][def->v.u.reg]; l; l = l->next)
+ bitvectorclearbit(l->id, vec);
+ }
+ }
+
+ for (use = &Uses[useID = instr->useID]; useID < number_of_Uses && use->pcode == instr; use++, useID++) {
+ if (use->v.kind == PCOp_REGISTER)
+ bitvectorsetbit(useID, vec);
+ }
+ }
+ }
+ }
+ }
+}
+
+static void allocatevectorpropinfo(void) {
+ VectorPropInfo *info;
+ int i;
+
+ vectorpropinfo = oalloc(sizeof(VectorPropInfo) * pcblockcount);
+ for (i = 0, info = vectorpropinfo; i < pcblockcount; i++, info++) {
+ info->use = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
+ info->def = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
+ info->in = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
+ info->out = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
+ }
+}
+
+static void computelocalvectorpropinfo(LocalVectorArray *arrays) {
+ VectorPropInfo *info;
+ PCodeBlock *block;
+ PCode *instr;
+ UInt32 *vec0;
+ UInt32 *vec4;
+ int index;
+ PCodeArg *op;
+ int i;
+ int addi_i;
+ ADDI *addi;
+ LocalVectorArray *array;
+
+ for (block = pcbasicblocks; block; block = block->nextBlock) {
+ info = &vectorpropinfo[block->blockIndex];
+ vec0 = info->use;
+ vec4 = info->def;
+ bitvectorinitialize(vec0, number_of_ADDIs, 0);
+ bitvectorinitialize(vec4, number_of_ADDIs, 0);
+ index = firstaddinblock[block->blockIndex];
+
+ for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
+ if (!(instr->flags & fIsBranch) && instr->argCount) {
+ i = instr->argCount;
+ op = instr->args;
+ while (i--) {
+ if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && (op->data.reg.effect & EffectWrite)) {
+ for (addi_i = 0, addi = ADDIs; addi_i < number_of_ADDIs; addi_i++, addi++) {
+ if (
+ addi->instr &&
+ addi->instr->args[0].arg == op->arg &&
+ addi->instr->args[0].data.reg.reg == op->data.reg.reg
+ )
+ {
+ if (addi->instr->block == block)
+ bitvectorclearbit(addi_i, vec0);
+ else
+ bitvectorsetbit(addi_i, vec4);
+ }
+ }
+ }
+ op++;
+ }
+
+ if (
+ instr->op == PC_ADDI &&
+ instr->args[2].kind == PCOp_MEMORY &&
+ (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
+ (array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) &&
+ !array->invalid
+ )
+ {
+ bitvectorsetbit(index, vec0);
+ index++;
+ }
+ }
+ }
+ }
+}
+
+static void computeglobalvectorpropinfo(void) {
+ VectorPropInfo *info;
+ PCodeBlock *block;
+ UInt32 *vec0;
+ UInt32 *vec4;
+ UInt32 *vec8;
+ UInt32 *vecC;
+ int bitvecsize;
+ int blockIndex;
+ int i;
+ int j;
+ int flag;
+ PCLink *preds;
+ UInt32 val;
+
+ bitvecsize = (number_of_ADDIs + 31) >> 5;
+ flag = 1;
+ info = &vectorpropinfo[pcbasicblocks->blockIndex];
+ bitvectorinitialize(info->in, number_of_ADDIs, 0);
+ bitvectorcopy(info->out, info->use, number_of_ADDIs);
+
+ for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) {
+ info = &vectorpropinfo[block->blockIndex];
+ vecC = info->out;
+ vec4 = info->def;
+ for (i = 0; i < bitvecsize; vecC++, vec4++, i++)
+ *vecC = ~*vec4;
+ }
+
+ while (flag) {
+ flag = 0;
+ for (blockIndex = 0; blockIndex < pcblockcount; blockIndex++) {
+ if (depthfirstordering[blockIndex]) {
+ info = &vectorpropinfo[depthfirstordering[blockIndex]->blockIndex];
+ if ((preds = depthfirstordering[blockIndex]->predecessors)) {
+ vec8 = info->in;
+ bitvectorcopy(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs);
+ for (preds = preds->nextLink; preds; preds = preds->nextLink)
+ bitvectorintersect(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs);
+ }
+
+ vecC = info->out;
+ vec8 = info->in;
+ vec0 = info->use;
+ vec4 = info->def;
+ for (j = 0; j < bitvecsize; j++) {
+ val = *vec0 | (*vec8 & ~*vec4);
+ if (val != *vecC) {
+ *vecC = val;
+ flag = 1;
+ }
+ vec8++;
+ vecC++;
+ vec4++;
+ vec0++;
+ }
+ }
+ }
+ }
+}
+
+static int precedes(PCode *a, PCode *b) {
+ PCode *scan;
+
+ for (scan = a->nextPCode; scan; scan = scan->nextPCode) {
+ if (scan == b)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int checkvectorstoreorload(int addiID, int useID) {
+ PCode *addiInstr;
+ UseOrDef *use;
+
+ addiInstr = ADDIs[addiID].instr;
+ use = Uses + useID;
+ if (!addiInstr)
+ return 0;
+
+ if (addiInstr->args[0].data.reg.reg < n_real_registers[RegClass_GPR])
+ return 0;
+
+ if (use->pcode->op != PC_LVX && use->pcode->op != PC_STVX)
+ return 0;
+
+ if (
+ use->pcode->args[1].kind != PCOp_REGISTER ||
+ use->pcode->args[1].arg != RegClass_GPR ||
+ use->pcode->args[1].data.reg.reg != 0
+ )
+ return 0;
+
+ return use->pcode->args[2].data.reg.reg == addiInstr->args[0].data.reg.reg;
+}
+
+static int checkalluses(LocalVectorArray *arrays, int addiID) {
+ RegUseOrDef *list;
+ PCode *instr;
+ LocalVectorArray *array;
+
+ instr = ADDIs[addiID].instr;
+ for (list = ADDIs[addiID].list; list; list = list->next) {
+ if (list && !checkvectorstoreorload(addiID, list->id)) {
+ array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj);
+ array->invalid = 1;
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static void convert_array_to_register(LocalVectorArray *arrays, int addiID) {
+ ADDI *addi;
+ int newReg;
+ RegUseOrDef *list;
+ PCode *instr;
+ PCode *useInstr;
+ LocalVectorArray *array;
+ int element;
+
+ addi = ADDIs + addiID;
+
+ if (!(instr = addi->instr))
+ return;
+
+ if (
+ !(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) ||
+ array->invalid
+ )
+ return;
+
+ element = instr->args[2].data.mem.offset / 16;
+ if (element > array->elementCount)
+ return;
+
+ newReg = array->elements[element];
+ for (list = addi->list; list; list = list->next) {
+ useInstr = Uses[list->id].pcode;
+ if (useInstr->op == PC_LVX) {
+ converted_arrays = 1;
+ change_opcode(useInstr, PC_VMR);
+ change_num_operands(useInstr, 2);
+ useInstr->args[1].kind = PCOp_REGISTER;
+ useInstr->args[1].arg = RegClass_VR;
+ useInstr->args[1].data.reg.reg = newReg;
+ useInstr->args[1].data.reg.effect = EffectRead;
+ } else if (useInstr->op == PC_STVX) {
+ converted_arrays = 1;
+ change_opcode(useInstr, PC_VMR);
+ change_num_operands(useInstr, 2);
+ useInstr->args[1] = useInstr->args[0];
+ useInstr->args[0].kind = PCOp_REGISTER;
+ useInstr->args[0].arg = RegClass_VR;
+ useInstr->args[0].data.reg.reg = newReg;
+ useInstr->args[0].data.reg.effect = EffectWrite;
+ } else {
+ CError_FATAL(661);
+ }
+ }
+ deletepcode(addi->instr);
+}
+
+static void convert_arrays_to_registers(LocalVectorArray *arrays) {
+ int i;
+ int counter;
+ LocalVectorArray **ptr;
+ LocalVectorArray *array;
+
+ for (i = 0; i < number_of_ADDIs; i++)
+ checkalluses(arrays, i);
+
+ counter = 0;
+ ptr = &arrays;
+ array = *ptr;
+ while (array) {
+ if (array->invalid) {
+ *ptr = array->next;
+ array = *ptr;
+ continue;
+ }
+
+ counter += array->elementCount;
+
+ for (i = 0; i < array->elementCount; i++)
+ array->totalUses += array->elements[i];
+
+ array = array->next;
+ }
+
+ if (arrays) {
+ while (counter > 32) {
+ LocalVectorArray *best;
+ int score;
+ score = 0;
+ best = NULL;
+ for (array = arrays; array; array = array->next) {
+ if (best) {
+ if (array->totalUses < score) {
+ score = array->totalUses;
+ best = array;
+ }
+ } else {
+ best = array;
+ score = array->totalUses;
+ }
+ }
+
+ if (!best)
+ break;
+
+ if (best == arrays) {
+ arrays = best->next;
+ } else {
+ for (array = arrays; array; array = array->next) {
+ if (array->next == best) {
+ array->next = best->next;
+ break;
+ }
+ }
+ }
+
+ counter -= best->elementCount;
+ }
+
+ if (!(array = arrays))
+ return;
+
+ while (array) {
+ for (i = 0; i < array->elementCount; i++)
+ array->elements[i] = used_virtual_registers[RegClass_VR]++;
+ array = array->next;
+ }
+
+ if (arrays) {
+ for (i = 0; i < number_of_ADDIs; i++)
+ convert_array_to_register(arrays, i);
+ }
+ }
+}
+
+int vectorarraystoregs(void) {
+ LocalVectorArray *arrays;
+
+ converted_arrays = 0;
+ if ((arrays = scanforlocalvectorarrays())) {
+ scaninstructions(arrays);
+ if (number_of_ADDIs > 0) {
+ computeusedefchains(0);
+ computeaddilist(arrays);
+ allocatevectorpropinfo();
+ computelocalvectorpropinfo(arrays);
+ computedepthfirstordering();
+ computeglobalvectorpropinfo();
+ convert_arrays_to_registers(arrays);
+ }
+ }
+
+ freeoheap();
+ return converted_arrays;
+}