diff options
author | Ash Wolf <ninji@wuffs.org> | 2023-01-26 11:30:47 +0000 |
---|---|---|
committer | Ash Wolf <ninji@wuffs.org> | 2023-01-26 11:30:47 +0000 |
commit | 094b96ca1df4a035b5f93c351f773306c0241f3f (patch) | |
tree | 95ce05e3ebe816c7ee7996206bb37ea17d8ca33c /compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer | |
parent | fc0c4c0df7b583b55a08317cf1ef6a71d27c0440 (diff) | |
download | MWCC-094b96ca1df4a035b5f93c351f773306c0241f3f.tar.gz MWCC-094b96ca1df4a035b5f93c351f773306c0241f3f.zip |
move lots of source files around to match their actual placement in the original treemain
Diffstat (limited to 'compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer')
8 files changed, 6694 insertions, 0 deletions
diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/Alias.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/Alias.c new file mode 100644 index 0000000..8223bf4 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/Alias.c @@ -0,0 +1,747 @@ +#include "compiler/Alias.h" +#include "compiler/CClass.h" +#include "compiler/CError.h" +#include "compiler/CParser.h" +#include "compiler/CMachine.h" +#include "compiler/CodeGen.h" +#include "compiler/CopyPropagation.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/RegisterInfo.h" +#include "compiler/UseDefChains.h" +#include "compiler/ValueNumbering.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +static Alias *aliases; +static int n_aliases; +static int n_gathered_aliases; +static Alias *alias_hash[997]; +Alias *worst_case; +Object worst_case_obj; + +static TypePointer worst_case_memory_type = { + TYPEARRAY, + 0xFFFFFF, + TYPE(&stchar) +}; + +static Boolean is_safe_const(Object *obj) { + Type *type; + + type = obj->type; + while (IS_TYPE_ARRAY(type)) + type = TPTR_TARGET(type); + + if (TYPE_FITS_IN_REGISTER(type) || IS_TYPE_VECTOR(type) || IS_TYPE_FLOAT(type) || IS_TYPE_STRUCT(type)) + return is_const_object(obj); + + if (IS_TYPE_CLASS(type)) + return is_const_object(obj) && CClass_IsPODClass(TYPE_CLASS(type)); + + return 0; +} + +void initialize_aliases(void) { + int i; + + memclrw(&worst_case_obj, sizeof(Object)); + worst_case_obj.otype = OT_OBJECT; + worst_case_obj.type = TYPE(&worst_case_memory_type); + worst_case_obj.datatype = DDATA; + worst_case_obj.name = GetHashNameNodeExport("@worst_case@"); + + aliases = NULL; + n_aliases = 0; + n_gathered_aliases = 0; + for (i = 0; i < 997; i++) + alias_hash[i] = NULL; + + worst_case = make_alias_set(); + add_alias_member(worst_case, make_alias(&worst_case_obj, 0, 0)); +} + +static UInt32 hash_alias(Object *object, SInt32 offset, SInt32 size) { + return (UInt32) (object->name->hashval * offset * size) % 997; +} + +static Alias *create_alias(AliasType type, Object *object, SInt32 offset, SInt32 size, Boolean addToHash) { + Alias *alias; + UInt32 hash; + + alias = lalloc(sizeof(Alias)); + memclrw(alias, sizeof(Alias)); + alias->type = type; + alias->index = n_aliases++; + alias->next = aliases; + aliases = alias; + alias->object = object; + alias->offset = offset; + alias->size = size; + + if (addToHash) { + hash = hash_alias(object, offset, size); + alias->hashNext = alias_hash[hash]; + alias_hash[hash] = alias; + } + + return alias; +} + +static Alias *lookup_alias(Object *object, SInt32 offset, SInt32 size) { + Alias *scan; + + for (scan = alias_hash[hash_alias(object, offset, size)]; scan; scan = scan->hashNext) { + if (scan->object == object && scan->offset == offset && scan->size == size) + return scan; + } + + return NULL; +} + +Alias *make_alias(Object *object, SInt32 offset, SInt32 size) { + Alias *alias; + Alias *alias2; + + if (!offset && !size) + size = object->type->size; + + alias = lookup_alias(object, offset, size); + if (!alias) { + if (offset > 0 || size != object->type->size) { + alias2 = make_alias(object, 0, object->type->size); + alias = create_alias(AliasType1, object, offset, size, 1); + add_alias_member(alias2, alias); + } else { + alias = create_alias(AliasType0, object, offset, size, 1); + } + + switch (object->datatype) { + case DLOCAL: + case DNONLAZYPTR: + break; + default: + if (!is_safe_const(object)) + add_alias_member(worst_case, make_alias(object, 0, 0)); + } + } + + if (offset > object->type->size) + return NULL; + else + return alias; +} + +Alias *make_alias_set(void) { + return create_alias(AliasType2, NULL, 0, 0, 0); +} + +void add_alias_member(Alias *parent, Alias *child) { + AliasMember *member; + + if (child->type == AliasType2) { + for (member = child->parents; member; member = member->nextParent) + add_alias_member(parent, member->child); + } else { + if (parent == worst_case && child->type == AliasType1) + child = make_alias(child->object, 0, 0); + + for (member = parent->parents; member; member = member->nextParent) { + if (member->child == child) + return; + } + + member = lalloc(sizeof(AliasMember)); + member->parent = parent; + member->child = child; + member->nextParent = parent->parents; + parent->parents = member; + member->nextChild = child->children; + child->children = member; + } +} + +Alias *make_alias_set_from_IR(void) { + CError_FATAL(333); + return NULL; +} + +static Boolean aliases_overlap(Alias *a, Alias *b) { + return ( + a->offset == b->offset || + (a->offset > b->offset && a->offset < (b->offset + b->size)) || + (b->offset > a->offset && b->offset < (a->offset + a->size)) + ); +} + +static int is_address_load(PCode *pcode) { + Object *obj; + + switch (pcode->op) { + case PC_LWZ: + if (pcode->args[2].kind == PCOp_MEMORY && pcode->args[2].data.mem.obj->datatype == DNONLAZYPTR) + return 1; + break; + case PC_LBZU: + case PC_LBZUX: + case PC_LHZU: + case PC_LHZUX: + case PC_LHAU: + case PC_LHAUX: + case PC_LWZU: + case PC_LWZUX: + case PC_STBU: + case PC_STBUX: + case PC_STHU: + case PC_STHUX: + case PC_STWU: + case PC_STWUX: + return 1; + case PC_ADDI: + case PC_ADDIS: + if (pcode->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) { + if (pcode->args[2].kind == PCOp_MEMORY) { + obj = pcode->args[2].data.mem.obj; + if (obj->datatype == DLOCAL && !is_safe_const(obj)) + add_alias_member(worst_case, make_alias(obj, 0, 0)); + return 0; + } + } else { + return 1; + } + break; + case PC_ADD: + return 1; + } + + return 0; +} + +static int addresspropagatestouse(int candidateID, int useID) { + PCode *candidate_pcode; // r30 + PCode *use_pcode; // r29 + int reg; // r28 + short reg2; + Object *object; // r27 + SInt32 offset; // r26 + Alias *alias; // r25 + Boolean flag24; // r24 + SInt32 size; // r23 + Alias *aliasSet; // r22 + int i; + PCode *scan; + PCodeArg *op; + + candidate_pcode = Candidates[candidateID].pcode; + use_pcode = Uses[useID].pcode; + flag24 = 0; + size = 1; + reg = candidate_pcode->args[0].data.reg.reg; + + if (candidate_pcode->alias && (candidate_pcode->alias->type == AliasType0 || candidate_pcode->alias->type == AliasType1)) { + object = candidate_pcode->alias->object; + offset = candidate_pcode->alias->offset; + if (offset == 0 && candidate_pcode->alias->size == object->type->size) + flag24 = 1; + } else if (candidate_pcode->args[2].kind == PCOp_MEMORY) { + object = candidate_pcode->args[2].data.mem.obj; + if (candidate_pcode->op == PC_ADDIS) + offset = candidate_pcode->args[2].data.mem.offset << 16; + else + offset = candidate_pcode->args[2].data.mem.offset; + } else { + return 0; + } + + CError_ASSERT(478, object->otype == OT_OBJECT); + + if ((candidate_pcode->flags & (fIsRead | fIsWrite)) && (candidate_pcode->flags & fUpdatesPtr)) { + reg = candidate_pcode->args[1].data.reg.reg; + offset = 0; + flag24 = 1; + } else if (candidate_pcode->op == PC_LWZ) { + if (object->datatype != DNONLAZYPTR) + return 0; + + object = object->u.var.realObj; + CError_ASSERT(495, object->otype == OT_OBJECT); + offset = 0; + } else if (candidate_pcode->op == PC_ADDI) { + if (!candidate_pcode->alias && object) + candidate_pcode->alias = make_alias(object, offset, 1); + } else if (candidate_pcode->op == PC_ADDIS) { + if (!candidate_pcode->alias && object) + candidate_pcode->alias = make_alias(object, offset, 1); + } else if (candidate_pcode->op == PC_ADD) { + offset = 0; + flag24 = 1; + } else { + CError_FATAL(509); + } + + if ( + !(use_pcode->flags & (fIsRead | fIsWrite)) && + use_pcode->op != PC_ADDI && + use_pcode->op != PC_ADD && + use_pcode->op != PC_ADDIS + ) { + if (object->datatype == DLOCAL && !is_safe_const(object)) + add_alias_member(worst_case, make_alias(object, 0, 0)); + return 1; + } + + if ( + (use_pcode->flags & (fIsWrite | fPCodeFlag40000)) && + use_pcode->args[0].kind == PCOp_REGISTER && + use_pcode->args[0].arg == RegClass_GPR && + use_pcode->args[0].data.reg.reg == reg && + object->datatype == DLOCAL && + !is_safe_const(object) + ) + add_alias_member(worst_case, make_alias(object, 0, 0)); + + if (use_pcode->argCount < 3) + return 1; + + CError_ASSERT(543, use_pcode->args[1].kind == PCOp_REGISTER); + + if (candidate_pcode->block == use_pcode->block && precedes(candidate_pcode, use_pcode)) { + for (scan = candidate_pcode->nextPCode; scan && scan != use_pcode; scan = scan->nextPCode) { + for (op = scan->args, i = scan->argCount; i--; op++) { + if (op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) && + op->data.reg.reg == reg) + return 1; + } + } + } else { + if (!bitvectorgetbit(candidateID, propinfo[use_pcode->block->blockIndex].vec8)) { + if (bitvectorgetbit(candidate_pcode->defID, usedefinfo[use_pcode->block->blockIndex].defvec8)) { + for (scan = use_pcode->block->firstPCode; scan && scan != use_pcode; scan = scan->nextPCode) { + for (op = scan->args, i = scan->argCount; i--; op++) { + if (op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) && + op->data.reg.reg == reg) + return 1; + } + } + } else { + return 1; + } + } + + for (scan = use_pcode->block->firstPCode; scan; scan = scan->nextPCode) { + if (scan == use_pcode) + break; + for (op = scan->args, i = scan->argCount; i--; op++) { + if (op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) && + op->data.reg.reg == reg) + return 1; + } + } + } + + CError_ASSERT(598, object != NULL); + + if (use_pcode->op == PC_ADDI || use_pcode->op == PC_ADD || use_pcode->op == PC_ADDIS) { + if (use_pcode->args[0].data.reg.reg < n_real_registers[RegClass_GPR] && !is_safe_const(object)) + add_alias_member(worst_case, make_alias(object, 0, 0)); + } + + if (use_pcode->flags & (fIsRead | fIsWrite)) + size = nbytes_loaded_or_stored_by(use_pcode); + + if (use_pcode->args[2].kind == PCOp_REGISTER) { + if (use_pcode->args[1].data.reg.reg == 0) { + if (use_pcode->args[2].data.reg.reg == reg) + alias = make_alias(object, offset, size); + } else { + if (use_pcode->args[1].data.reg.reg == reg) + reg2 = use_pcode->args[2].data.reg.reg; + else if (use_pcode->args[2].data.reg.reg == reg) + reg2 = use_pcode->args[1].data.reg.reg; + else + return 1; + + for (scan = use_pcode->prevPCode; scan; scan = scan->prevPCode) { + if (scan->op == PC_LI && scan->args[0].data.reg.reg == reg2) + break; + + for (i = 0; i < scan->argCount; i++) { + if (scan->args[i].kind == PCOp_REGISTER && + scan->args[i].arg == RegClass_GPR && + scan->args[i].data.reg.reg == reg2 && + (scan->args[i].data.reg.effect & EffectWrite)) { + scan = NULL; + break; + } + } + + if (!scan) + break; + } + + if (scan) { + offset += scan->args[1].data.mem.offset; + alias = make_alias(object, offset, size); + } else { + alias = make_alias(object, 0, 0); + } + } + } else { + if (use_pcode->args[1].kind != PCOp_REGISTER || + use_pcode->args[1].arg != RegClass_GPR || + use_pcode->args[1].data.reg.reg != reg) + return 1; + + if (use_pcode->args[1].data.reg.effect & EffectWrite) { + alias = make_alias(object, 0, 0); + } else if (use_pcode->args[2].kind == PCOp_IMMEDIATE) { + if (use_pcode->op == PC_ADDIS) { + offset += use_pcode->args[2].data.imm.value << 16; + alias = make_alias(object, offset, 1); + } else { + offset += use_pcode->args[2].data.imm.value; + alias = make_alias(object, offset, size); + } + } else { + return 1; + } + } + + if (flag24) + alias = make_alias(object, 0, 0); + + if (!alias) + return 1; + + if (!use_pcode->alias) { + if ( + use_pcode->op == PC_ADDI || + use_pcode->op == PC_ADD || + use_pcode->op == PC_ADDIS || + ((candidate_pcode->flags & (fIsRead | fIsWrite)) && (candidate_pcode->flags & fUpdatesPtr)) + ) + recursive_propagation = 1; + } + + if (use_pcode->alias) { + if (use_pcode->alias == worst_case) { + add_alias_member(worst_case, make_alias(object, 0, 0)); + } else if (use_pcode->alias == alias) { + return 1; + } else if (use_pcode->alias->type == AliasType0 || use_pcode->alias->type == AliasType1) { + if (object == use_pcode->alias->object) { + use_pcode->alias = make_alias(object, 0, 0); + } else { + aliasSet = make_alias_set(); + if ( + use_pcode->op == PC_ADDI || + use_pcode->op == PC_ADD || + use_pcode->op == PC_ADDIS || + ((use_pcode->flags & (fIsRead | fIsWrite)) && (use_pcode->flags & fUpdatesPtr)) + ) { + if (alias->type == AliasType2) + add_alias_member(worst_case, alias); + else + add_alias_member(worst_case, make_alias(use_pcode->alias->object, 0, 0)); + } + add_alias_member(aliasSet, use_pcode->alias); + add_alias_member(aliasSet, alias); + use_pcode->alias = aliasSet; + } + } else { + add_alias_member(use_pcode->alias, alias); + } + } else { + use_pcode->alias = alias; + } + + propagated_instructions = 1; + return 1; +} + +static void finishpropagatealiases(int id) { + propagated_instructions = 1; +} + +static Propagation alias_prop = { + &is_address_load, + &addresspropagatestouse, + &finishpropagatealiases, + "ALIAS", + "ALIASES", + "A%" PRId32, + 1 +}; + +static void propagatealiasinfo(Object *proc) { + propagateinstructions(proc, &alias_prop, (copts.optimizationlevel >= 4) ? 4 : 1, 1); +} + +void gather_alias_info(void) { + UInt32 *myvec; // r31 + Alias *alias; // r22 + AliasMember *member; + AliasMember *member2; + PCodeBlock *block; // r21 + PCode *pcode; // r20 + PCodeArg *op; // r19 + RegUseOrDef *list; // r18 + int i; // r17 + Alias *alias_choice; // r16 + int aliases_idx; // r15 (helper in r23) + PCode *defpcode; // r14 + Alias *alias_array[3]; + UseOrDef *def; + int defID; + + if (coloring) { + propagatealiasinfo(gFunction); + myvec = oalloc(4 * ((number_of_Defs + 31) >> 5)); + + for (block = pcbasicblocks; block; block = block->nextBlock) { + bitvectorcopy(myvec, usedefinfo[block->blockIndex].defvec8, number_of_Defs); + + for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) { + if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) { + if (!pcode->alias) { + pcode->alias = worst_case; + } else { + if ((pcode->alias->type == AliasType0 || pcode->alias->type == AliasType1) && + pcode->alias->size == nbytes_loaded_or_stored_by(pcode)) { + pcode->flags &= ~fIsPtrOp; + } else { + pcode->flags |= fIsPtrOp; + } + + if (pcode->alias != worst_case) { + aliases_idx = 0; + alias_choice = NULL; + op = pcode->args; + for (i = 0; i < pcode->argCount; i++, op++) { + if ( + (!(pcode->flags & (fIsWrite | fPCodeFlag40000)) || op != pcode->args) && + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectRead) + ) { + alias_array[aliases_idx] = NULL; + if (aliases_idx >= 2) { + alias_choice = worst_case; + break; + } + alias_array[aliases_idx] = pcode->alias; + + for (list = reg_Defs[RegClass_GPR][op->data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(list->id, myvec)) { + defpcode = Defs[list->id].pcode; + if (!defpcode->alias || !is_address_load(defpcode) || defpcode->alias == worst_case) { + alias_array[aliases_idx] = worst_case; + break; + } + } + } + + aliases_idx++; + } + } + + if (!alias_choice) { + if (aliases_idx > 0) { + alias_choice = alias_array[0]; + if (aliases_idx == 2) { + if (alias_array[0] != worst_case) { + if (alias_array[1] != worst_case) + alias_choice = worst_case; + } else if (alias_array[1] != worst_case) { + alias_choice = alias_array[1]; + } + } + } + + if (alias_choice == worst_case) { + pcode->flags |= fIsPtrOp; + if (pcode->alias->type == AliasType2) + add_alias_member(worst_case, pcode->alias); + else + add_alias_member(worst_case, make_alias(pcode->alias->object, 0, 0)); + } + + if (alias_choice) + pcode->alias = alias_choice; + } + } + } + } else { + if ((pcode->flags & fIsCall) && !pcode->alias) + pcode->alias = worst_case; + } + + for (def = &Defs[defID = pcode->defID]; defID < number_of_Defs && def->pcode == pcode; defID++, def++) { + if (def->v.kind == PCOp_REGISTER && def->v.arg == RegClass_GPR) { + for (list = reg_Defs[RegClass_GPR][def->v.u.reg]; list; list = list->next) + bitvectorclearbit(list->id, myvec); + } + bitvectorsetbit(defID, myvec); + } + } + } + + freeoheap(); + } else { + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) { + if ((pcode->flags & (fIsRead | fIsWrite | fIsCall | fPCodeFlag20000 | fPCodeFlag40000)) && !pcode->alias) + pcode->alias = worst_case; + } + } + } + + if (n_gathered_aliases != n_aliases) { + for (alias = aliases; alias; alias = alias->next) { + if (alias->type == AliasType2) { + alias->vec24 = lalloc(4 * ((n_aliases + 31) >> 5)); + bitvectorinitialize(alias->vec24, n_aliases, 0); + for (member = alias->parents; member; member = member->nextParent) { + bitvectorsetbit(member->child->index, alias->vec24); + for (member2 = member->child->parents; member2; member2 = member2->nextParent) + bitvectorsetbit(member2->child->index, alias->vec24); + } + } + } + n_gathered_aliases = n_aliases; + } +} + +static Boolean may_alias_alias(Alias *a, Alias *b) { + switch ((a->type * 3) + b->type) { + case (AliasType0 * 3) + AliasType0: + return a == b; + case (AliasType0 * 3) + AliasType1: + case (AliasType1 * 3) + AliasType0: + return a->object == b->object; + case (AliasType1 * 3) + AliasType1: + return (a->object == b->object) && aliases_overlap(a, b); + case (AliasType0 * 3) + AliasType2: + case (AliasType1 * 3) + AliasType2: + return bitvectorgetbit(a->index, b->vec24) != 0; + case (AliasType2 * 3) + AliasType0: + case (AliasType2 * 3) + AliasType1: + return bitvectorgetbit(b->index, a->vec24) != 0; + case (AliasType2 * 3) + AliasType2: + return (a == b) || !bitvectorintersectionisempty(a->vec24, b->vec24, n_aliases); + default: + CError_FATAL(1054); + return 1; + } +} + +Boolean may_alias(PCode *a, PCode *b) { + return may_alias_alias(a->alias, b->alias); +} + +Boolean uniquely_aliases(PCode *a, PCode *b) { + if (may_alias_alias(a->alias, b->alias)) { + if ( + a->alias->type != AliasType2 && + b->alias->type != AliasType2 && + a->alias && + b->alias && + a->alias->size == nbytes_loaded_or_stored_by(a) && + b->alias->size == nbytes_loaded_or_stored_by(b) + ) + return 1; + } + + return 0; +} + +Boolean may_alias_worst_case(PCode *pcode) { + return may_alias_alias(pcode->alias, worst_case); +} + +Boolean may_alias_object(PCode *pcode, Object *object) { + return may_alias_alias(pcode->alias, make_alias(object, 0, 0)); +} + +void initialize_alias_values(void) { + Alias *alias; + + for (alias = aliases; alias; alias = alias->next) { + alias->valuenumber = nextvaluenumber++; + alias->valuepcode = NULL; + } +} + +void update_alias_value(Alias *alias, PCode *pcode) { + AliasMember *member; + AliasMember *member2; + AliasMember *member3; + + switch (alias->type) { + case AliasType0: + killmemory(alias, pcode); + for (member = alias->children; member; member = member->nextChild) { + CError_ASSERT(1152, member->parent->type == AliasType2); + killmemory(member->parent, NULL); + } + for (member = alias->parents; member; member = member->nextParent) { + CError_ASSERT(1157, member->child->type == AliasType1); + killmemory(member->child, NULL); + for (member2 = member->child->children; member2; member2 = member2->nextChild) { + if (member2->parent != alias) { + CError_ASSERT(1163, member2->parent->type == AliasType2); + killmemory(member2->parent, NULL); + } + } + } + break; + + case AliasType1: + killmemory(alias, pcode); + for (member = alias->children; member; member = member->nextChild) { + killmemory(member->parent, NULL); + if (member->parent->type == AliasType0) { + for (member2 = member->parent->parents; member2; member2 = member2->nextParent) { + if (member2->child != alias && aliases_overlap(alias, member2->child)) { + killmemory(member2->child, NULL); + } + } + } + } + break; + + case AliasType2: + killmemory(alias, NULL); + for (member = alias->parents; member; member = member->nextParent) { + killmemory(member->child, NULL); + for (member2 = member->child->children; member2; member2 = member2->nextChild) { + if (member2->parent != alias) + killmemory(member2->parent, NULL); + } + for (member3 = member->child->parents; member3; member3 = member3->nextParent) { + killmemory(member3->child, NULL); + for (member2 = member3->child->children; member2; member2 = member2->nextChild) { + if (member2->parent != member->child) + killmemory(member2->parent, NULL); + } + } + } + break; + } +} + +void update_all_alias_values(void) { + Alias *alias; + + for (alias = aliases; alias; alias = alias->next) + killmemory(alias, NULL); +} + diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/CodeMotion.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/CodeMotion.c new file mode 100644 index 0000000..8ce2962 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/CodeMotion.c @@ -0,0 +1,906 @@ +#include "compiler/CodeMotion.h" +#include "compiler/Alias.h" +#include "compiler/BitVectors.h" +#include "compiler/LoopDetection.h" +#include "compiler/LoopOptimization.h" +#include "compiler/CompilerTools.h" +#include "compiler/PCode.h" +#include "compiler/UseDefChains.h" +#include "compiler/RegisterInfo.h" + +int movedloopinvariantcode; +int unswitchedinvariantcode; + +static int isloopinvariant(PCode *pcode, Loop *loop, UInt32 *vec, int flag1, int flag2) { + PCodeArg *op; + RegUseOrDef *list; + int i; + + if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) { + if (pcode->alias) { + if (pcode->alias->type == AliasType2 || (pcode->flags & (fIsVolatile | fSideEffects))) + return 0; + + if (pcode->flags & fIsRead) { + for (list = findobjectusedef(pcode->alias->object)->defs; list; list = list->next) { + if ( + may_alias(pcode, Defs[list->id].pcode) && + bitvectorgetbit(list->id, vec) && + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 0; + } + } + + if (pcode->flags & fIsWrite) { + for (list = findobjectusedef(pcode->alias->object)->uses; list; list = list->next) { + if ( + may_alias(pcode, Uses[list->id].pcode) && + bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 0; + } + } + } else { + return 0; + } + } + + if ((pcode->flags & fIsWrite) && !bitvectorgetbit(pcode->block->blockIndex, loop->vec2C)) + return 0; + + op = pcode->args; + i = pcode->argCount; + while (i--) { + switch (op->kind) { + case PCOp_MEMORY: + if ((pcode->flags & fIsRead) && ((pcode->flags == 0) & 0x40)) { + for (list = findobjectusedef(op->data.mem.obj)->defs; list; list = list->next) { + if ( + may_alias(pcode, Defs[list->id].pcode) && + bitvectorgetbit(list->id, vec) && + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 0; + } + } + if (pcode->flags & fIsWrite) { + for (list = findobjectusedef(op->data.mem.obj)->uses; list; list = list->next) { + if ( + may_alias(pcode, Uses[list->id].pcode) && + bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 0; + } + } + break; + case PCOp_REGISTER: + if (op->data.reg.effect & (EffectRead | EffectWrite)) { + if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) { + if (op->data.reg.reg == _FP_) + break; + if (op->data.reg.reg == _CALLER_SP_) + break; + if (op->data.reg.reg == 2) + break; + } + if (op->data.reg.reg < n_real_registers[op->arg]) { + if (op->arg == RegClass_CRFIELD) { + if (!flag2 || (op->data.reg.effect & EffectRead)) + return 0; + } else if (op->arg == RegClass_SPR) { + if (!flag1) + return 0; + } else { + return 0; + } + } else if (op->data.reg.effect & EffectRead) { + if (flag1 && op->kind == PCOp_REGISTER && op->arg == RegClass_SPR) + break; + if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) { + if (op->data.reg.reg == _FP_) + break; + if (op->data.reg.reg == _CALLER_SP_) + break; + if (op->data.reg.reg == 2) + break; + } + + for (list = reg_Defs[op->arg][op->data.reg.reg]; list; list = list->next) { + if ( + bitvectorgetbit(list->id, vec) && + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 0; + } + } + } + break; + } + op++; + } + + return 1; +} + +static int isuniquedefinition(PCode *pcode, Loop *loop) { + RegUseOrDef *list; + int defID; + UseOrDef *def; + + defID = pcode->defID; + def = &Defs[defID]; + if (defID >= number_of_Defs) + return 0; + if (def->pcode != pcode) + return 0; + if ((defID + 1) < number_of_Defs && def[1].pcode == pcode) + return 0; + + if (def->v.kind == PCOp_REGISTER) { + for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) { + if ( + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) && + list->id != defID + ) + return 0; + } + } else if (def->v.kind == PCOp_MEMORY) { + for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) { + if ( + may_alias(pcode, Defs[list->id].pcode) && + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) && + list->id != defID + ) + return 0; + } + } else { + CError_FATAL(292); + } + + return 1; +} + +static int uniquelyreachesuse(int defID, int useID) { + UseOrDef *def; + UseOrDef *use; + RegUseOrDef *list; + PCode *pcode; + + def = &Defs[defID]; + use = &Uses[useID]; + if (def->v.kind == PCOp_REGISTER) { + for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) { + if ( + list->id != defID && + bitvectorgetbit(list->id, usedefinfo[use->pcode->block->blockIndex].defvec8) + ) + break; + } + } else if (def->v.kind == PCOp_MEMORY) { + for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) { + if ( + may_alias(def->pcode, Defs[list->id].pcode) && + list->id != defID && + bitvectorgetbit(list->id, usedefinfo[use->pcode->block->blockIndex].defvec8) + ) + break; + } + } + + if (!list) + return 1; + + if (def->pcode->block == use->pcode->block) { + for (pcode = use->pcode->prevPCode; pcode; pcode = pcode->prevPCode) { + if (pcode == def->pcode) + return 1; + } + } + + return 0; +} + +static int uniquelyreachesalluses(int defID, Loop *loop) { + UseOrDef *def; + RegUseOrDef *list; + + def = &Defs[defID]; + + if (def->v.kind == PCOp_REGISTER) { + for (list = reg_Uses[def->v.arg][def->v.u.reg]; list; list = list->next) { + if ( + bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].usevec1C) || + (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) && !uniquelyreachesuse(defID, list->id)) + ) + return 0; + } + } else if (def->v.kind == PCOp_MEMORY) { + for (list = findobjectusedef(def->v.u.object)->uses; list; list = list->next) { + if (may_alias(def->pcode, Uses[list->id].pcode)) { + if ( + bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].usevec1C) || + (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) && !uniquelyreachesuse(defID, list->id)) + ) + return 0; + } + } + } else { + CError_FATAL(382); + } + + return 1; +} + +static int isliveonexit(TinyValue *v, Loop *loop) { + RegUseOrDef *list; + UInt32 *vec; + + vec = usedefinfo[loop->preheader->blockIndex].usevec1C; + + if (v->kind == PCOp_REGISTER) { + for (list = reg_Uses[v->arg][v->u.reg]; list; list = list->next) { + if ( + bitvectorgetbit(list->id, vec) && + !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 1; + } + } else if (v->kind == PCOp_MEMORY) { + for (list = findobjectusedef(v->u.object)->uses; list; list = list->next) { + if ( + bitvectorgetbit(list->id, vec) && + !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) + ) + return 1; + } + } + + return 0; +} + +static int dominatesallexits(PCode *pcode, Loop *loop) { + return bitvectorgetbit(pcode->block->blockIndex, loop->vec28) != 0; +} + +static int maymove(PCode *pcode, Loop *loop) { + short reg; + + if (!isuniquedefinition(pcode, loop)) + return 0; + if (!uniquelyreachesalluses(pcode->defID, loop)) + return 0; + if (!dominatesallexits(pcode, loop) && isliveonexit(&Defs[pcode->defID].v, loop)) + return 0; + + if (loop->bodySize > 25) { + switch (pcode->op) { + case PC_LI: + if ( + pcode->nextPCode && + pcode->nextPCode->op == PC_LVX && + (pcode->nextPCode->flags & fIsConst) + ) { + reg = pcode->args[0].data.reg.reg; + if (pcode->nextPCode->args[1].data.reg.reg == reg || + pcode->nextPCode->args[2].data.reg.reg == reg) + return 1; + } + case PC_VSPLTISB: + case PC_VSPLTISH: + case PC_VSPLTISW: + return 0; + default: + if (!bitvectorgetbit(pcode->block->blockIndex, loop->vec2C)) + return 0; + } + } + + return 1; +} + +static void moveinvariantcomputation(PCode *pcode, Loop *loop) { + ObjectUseDef *oud; + BlockList *blocklist; + RegUseOrDef *list; + UseOrDef *def; + int defID; + + defID = pcode->defID; + def = &Defs[defID]; + deletepcode(pcode); + insertpcodebefore(loop->preheader->lastPCode, pcode); + loop->bodySize--; + movedloopinvariantcode = 1; + + if (def->v.kind == PCOp_REGISTER) { + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) + bitvectorclearbit(list->id, usedefinfo[blocklist->block->blockIndex].defvec8); + bitvectorsetbit(defID, usedefinfo[blocklist->block->blockIndex].defvec8); + } + } else if (def->v.kind == PCOp_MEMORY) { + oud = findobjectusedef(def->v.u.object); + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + for (list = oud->defs; list; list = list->next) { + if (uniquely_aliases(pcode, Defs[list->id].pcode)) + bitvectorclearbit(list->id, usedefinfo[blocklist->block->blockIndex].defvec8); + } + bitvectorsetbit(defID, usedefinfo[blocklist->block->blockIndex].defvec8); + } + } else { + CError_FATAL(545); + } +} + +static int srawi_addze_maymove(PCode *pcode, Loop *loop) { + RegUseOrDef *list; + UseOrDef *def; + int defID; + int nextDefID; + + defID = pcode->defID; + nextDefID = pcode->nextPCode->defID; + + def = &Defs[defID]; + if (defID >= number_of_Defs) + return 0; + if (def->pcode != pcode) + return 0; + if ((defID + 1) < number_of_Defs && def[1].pcode == pcode) + return 0; + + if (def->v.kind == PCOp_REGISTER && def->v.arg == RegClass_GPR) { + for (list = reg_Defs[RegClass_GPR][def->v.u.reg]; list; list = list->next) { + if ( + bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) && + list->id != defID && + list->id != nextDefID + ) + return 0; + } + } else { + CError_FATAL(582); + } + + if (!uniquelyreachesalluses(pcode->defID, loop)) + return 0; + if (!dominatesallexits(pcode, loop) && isliveonexit(&Defs[pcode->defID].v, loop)) + return 0; + if (!dominatesallexits(pcode->nextPCode, loop) && isliveonexit(&Defs[pcode->nextPCode->defID].v, loop)) + return 0; + + return 1; +} + +static int srawi_addze_isloopinvariant(PCode *pcode, Loop *loop, UInt32 *vec) { + static PCode *oldNextInstr; + PCode *nextInstr; + + nextInstr = pcode->nextPCode; + if ( + pcode->op == PC_ADDZE && + oldNextInstr == pcode + ) { + oldNextInstr = NULL; + return 1; + } else if ( + pcode->op == PC_SRAWI && + nextInstr && + nextInstr->op == PC_ADDZE && + pcode->args[0].data.reg.reg == nextInstr->args[0].data.reg.reg && + nextInstr->args[0].data.reg.reg == nextInstr->args[1].data.reg.reg && + !(pcode->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) && + !(nextInstr->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) && + isloopinvariant(pcode, loop, vec, 1, 0) && + srawi_addze_maymove(pcode, loop) + ) { + oldNextInstr = nextInstr; + return 1; + } else { + oldNextInstr = NULL; + return 0; + } +} + +static void removeblockfromloop(Loop *loop, PCodeBlock *block) { + BlockList *list; + BlockList **ptr; + + bitvectorclearbit(block->blockIndex, loop->memberblocks); + bitvectorclearbit(block->blockIndex, loop->vec24); + bitvectorclearbit(block->blockIndex, loop->vec28); + bitvectorclearbit(block->blockIndex, loop->vec2C); + loop->bodySize -= block->pcodeCount; + + ptr = &loop->blocks; + while ((list = *ptr)) { + if (list->block == block) + *ptr = list->next; + else + ptr = &list->next; + } +} + +static void changesuccessor(PCodeBlock *block, PCodeBlock *from, PCodeBlock *to) { + PCLink **ptr; + PCLink *link; + + for (link = block->successors; link; link = link->nextLink) { + if (link->block == from) + link->block = to; + } + + ptr = &from->predecessors; + while ((link = *ptr)) { + if (link->block == block) { + *ptr = link->nextLink; + link->nextLink = to->predecessors; + to->predecessors = link; + } else { + ptr = &link->nextLink; + } + } +} + +static void movesuccessor(PCodeBlock *to, PCodeBlock *from, PCodeBlock *block) { + PCLink **ptr; + PCLink *link; + + for (link = block->predecessors; link; link = link->nextLink) { + if (link->block == from) + link->block = to; + } + + ptr = &from->successors; + while ((link = *ptr)) { + if (link->block == block) { + *ptr = link->nextLink; + link->nextLink = to->successors; + to->successors = link; + } else { + ptr = &link->nextLink; + } + } +} + +static void movecmptopreheader(Loop *loop, PCodeBlock *block, PCode *pc1, PCode *pc2, PCodeArg *op) { + PCodeBlock *preheader; + PCode *pc3; + + preheader = loop->preheader; + if (PCODE_FLAG_SET_F(pc1) & fRecordBit) { + moveinvariantcomputation(pc1, loop); + } else { + deletepcode(pc1); + insertpcodebefore(loop->preheader->lastPCode, pc1); + loop->bodySize--; + movedloopinvariantcode = 1; + } + loop->preheader = NULL; + + insertpreheaderblock(loop); + + pc3 = preheader->lastPCode; + CError_ASSERT(775, pc3->op == PC_B); + deletepcode(pc3); + deletepcode(pc2); + appendpcode(preheader, pc2); + movesuccessor(preheader, block, op->data.label.label->block); +} + +static PCodeBlock *appendheadercopy(Loop *loop, PCodeBlock *block1, PCodeBlock *block2, PCodeBlock *block3) { + PCodeBlock *newblock1; + PCodeBlock *newblock2; + PCLink *link; + PCode *scan; + + newblock1 = lalloc(sizeof(PCodeBlock)); + newblock2 = lalloc(sizeof(PCodeBlock)); + + newblock1->labels = NULL; + newblock1->predecessors = newblock1->successors = NULL; + newblock1->firstPCode = newblock1->lastPCode = NULL; + newblock1->pcodeCount = 0; + newblock1->loopWeight = loop->body->loopWeight; + newblock1->flags = 0; + newblock1->blockIndex = pcblockcount++; + + newblock2->labels = NULL; + newblock2->predecessors = newblock2->successors = NULL; + newblock2->firstPCode = newblock2->lastPCode = NULL; + newblock2->pcodeCount = 0; + newblock2->loopWeight = loop->body->loopWeight; + newblock2->flags = 0; + newblock2->blockIndex = pcblockcount++; + + newblock1->nextBlock = newblock2; + newblock2->prevBlock = newblock1; + newblock1->prevBlock = block1; + newblock2->nextBlock = block1->nextBlock; + block1->nextBlock = newblock1; + newblock2->nextBlock->prevBlock = newblock2; + + pclabel(newblock1, makepclabel()); + pclabel(newblock2, makepclabel()); + + changesuccessor(block1, block1->successors->block, newblock1); + + link = lalloc(sizeof(PCLink)); + link->block = newblock2; + link->nextLink = newblock1->successors; + newblock1->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = newblock1; + link->nextLink = newblock2->predecessors; + newblock2->predecessors = link; + + appendpcode(newblock2, makepcode(PC_B, block2->nextBlock->labels)); + pcbranch(newblock2, block2->nextBlock->labels); + pccomputepredecessors1(newblock2); + + for (scan = block2->firstPCode; scan; scan = scan->nextPCode) + appendpcode(newblock1, copypcode(scan)); + + pcbranch(newblock1, block3->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock1; + link->nextLink = block3->predecessors; + block3->predecessors = link; + + addblocktoloop(loop, newblock1); + if (bitvectorgetbit(block2->blockIndex, loop->vec28)) + bitvectorsetbit(newblock1->blockIndex, loop->vec28); + if (bitvectorgetbit(block2->blockIndex, loop->vec2C)) + bitvectorsetbit(newblock1->blockIndex, loop->vec2C); + + for (loop = loop->parent; loop; loop = loop->parent) { + addblocktoloop(loop, newblock1); + if (bitvectorgetbit(block2->blockIndex, loop->vec28)) + bitvectorsetbit(newblock1->blockIndex, loop->vec28); + if (bitvectorgetbit(block2->blockIndex, loop->vec2C)) + bitvectorsetbit(newblock1->blockIndex, loop->vec2C); + + addblocktoloop(loop, newblock2); + if (bitvectorgetbit(block2->blockIndex, loop->vec28)) + bitvectorsetbit(newblock2->blockIndex, loop->vec28); + if (bitvectorgetbit(block2->blockIndex, loop->vec2C)) + bitvectorsetbit(newblock2->blockIndex, loop->vec2C); + } + + return newblock1; +} + +static BlockList *findswitchpath(Loop *loop, PCodeBlock *block) { + BlockList *head; + BlockList *tail; + BlockList *node; + PCodeBlock *scan; + + head = NULL; + tail = NULL; + + for (scan = block; scan && scan != loop->body; scan = scan->successors->block) { + if (!bitvectorgetbit(scan->blockIndex, loop->memberblocks)) + return NULL; + if (scan->successors && scan->successors->nextLink) + return NULL; + + node = oalloc(sizeof(BlockList)); + node->block = scan; + node->next = NULL; + if (head) { + tail->next = node; + tail = node; + } else { + head = node; + tail = node; + } + } + + return head; +} + +static void simpleunswitchloop(Loop *loop) { + PCode *pc29; + PCodeArg *op27; + UInt32 *myvec; + PCodeBlock *block26; + PCode *pc25; // r25 + BlockList *path2_24; + PCodeArg *op23; + PCode *pc23; // r23 + BlockList *scanlist; // r23 + BlockList *bestpath1; // r23 + BlockList *bestpath2; // r22 + PCodeBlock *headercopy; // r22 + Loop *newloop; // r21 + PCodeBlock *preheader21; + BlockList *path20; + PCode *scan20; + PCode *lastpcode; + int i; + BlockList *pathiter1; + BlockList *pathiter2; + + if (!(lastpcode = loop->body->lastPCode)) + return; + if (lastpcode->op != PC_BT && lastpcode->op != PC_BF) + return; + if (lastpcode->args[2].kind != PCOp_LABEL) + return; + if (!bitvectorgetbit(lastpcode->args[2].data.label.label->block->blockIndex, loop->memberblocks)) + return; + if (loop->x57) + return; + if (loop->x4D) + return; + if (bitvectorgetbit(loop->body->nextBlock->blockIndex, loop->memberblocks)) + return; + + for (block26 = pcbasicblocks; block26; block26 = block26->nextBlock) { + if (bitvectorgetbit(block26->blockIndex, loop->memberblocks)) + break; + } + + if (!block26) + return; + + myvec = oalloc(4 * ((number_of_Defs + 31) >> 5)); + bitvectorcopy(myvec, usedefinfo[block26->blockIndex].defvec8, number_of_Defs); + for (pc25 = loop->preheader->nextBlock->firstPCode; pc25; pc25 = pc25->nextPCode) { + if (!(PCODE_FLAG_SET_F(pc25) & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects | fRecordBit))) { + if (isloopinvariant(pc25, loop, myvec, 0, 1)) + break; + } + } + + if (!pc25 || pc25->argCount < 1) + return; + + if ( + pc25->argCount < 1 || + pc25->args[0].kind != PCOp_REGISTER || + pc25->args[0].arg != RegClass_CRFIELD + ) + return; + + pc29 = pc25->block->lastPCode; + if ( + !pc29 || + !(pc29->flags & fIsBranch) || + pc29->args[0].kind != PCOp_REGISTER || + pc29->args[0].arg != RegClass_CRFIELD + ) + return; + + if (pc29->args[0].data.reg.reg != pc25->args[0].data.reg.reg) + return; + + op27 = NULL; + for (i = 0; i < pc29->argCount; i++) { + if (pc29->args[i].kind == PCOp_LABEL) + op27 = &pc29->args[i]; + } + + if (op27) { + preheader21 = loop->preheader; + + path20 = findswitchpath(loop, block26->nextBlock); + if (!path20) + return; + + path2_24 = findswitchpath(loop, op27->data.label.label->block); + if (!path2_24) + return; + + bestpath1 = NULL; + bestpath2 = NULL; + for (pathiter1 = path20; pathiter1; pathiter1 = pathiter1->next) { + for (pathiter2 = path2_24; pathiter2; pathiter2 = pathiter2->next) { + if (pathiter1->block == pathiter2->block) { + bestpath1 = pathiter1; + break; + } + } + if (bestpath1) + break; + bestpath2 = pathiter1; + } + + CError_ASSERT(1192, bestpath2->block); + + if (bestpath2->block->lastPCode && bestpath2->block->lastPCode->op == PC_B) + deletepcode(bestpath2->block->lastPCode); + + while (bestpath1) { + for (scan20 = bestpath1->block->firstPCode; scan20; scan20 = scan20->nextPCode) { + if (scan20->op != PC_B) + appendpcode(bestpath2->block, copypcode(scan20)); + } + bestpath1 = bestpath1->next; + } + + headercopy = appendheadercopy(loop, bestpath2->block, loop->body, block26); + movecmptopreheader(loop, block26, pc25, pc29, op27); + + if (block26->pcodeCount) { + if (path2_24->block->firstPCode) { + pc23 = path2_24->block->firstPCode; + for (scan20 = block26->firstPCode; scan20; scan20 = scan20->nextPCode) { + if (scan20->op != PC_B) + insertpcodebefore(pc23, copypcode(scan20)); + } + } else { + for (scan20 = block26->firstPCode; scan20; scan20 = scan20->nextPCode) { + if (scan20->op != PC_B) + appendpcode(path2_24->block, copypcode(scan20)); + } + } + } + + op23 = NULL; + for (i = 0; i < loop->body->lastPCode->argCount; i++) { + if (loop->body->lastPCode->args[i].kind == PCOp_LABEL) + op23 = &loop->body->lastPCode->args[i]; + } + + CError_ASSERT(1250, op23 != NULL); + + changesuccessor(loop->body, op23->data.label.label->block, path2_24->block); + op23->data.label.label = path2_24->block->labels; + + op23 = NULL; + for (i = 0; i < preheader21->lastPCode->argCount; i++) { + if (preheader21->lastPCode->args[i].kind == PCOp_LABEL) + op23 = &preheader21->lastPCode->args[i]; + } + + CError_ASSERT(1267, op23 != NULL); + + changesuccessor(preheader21, op23->data.label.label->block, loop->body); + op23->data.label.label = loop->body->labels; + + op23 = NULL; + for (i = 0; i < loop->preheader->lastPCode->argCount; i++) { + if (loop->preheader->lastPCode->args[i].kind == PCOp_LABEL) + op23 = &loop->preheader->lastPCode->args[i]; + } + + CError_ASSERT(1284, op23 != NULL); + + changesuccessor(loop->preheader, op23->data.label.label->block, headercopy); + op23->data.label.label = headercopy->labels; + + newloop = lalloc(sizeof(Loop)); + newloop->parent = loop->parent; + newloop->children = NULL; + newloop->nextSibling = loop->nextSibling; + loop->nextSibling = newloop; + newloop->body = loop->body; + newloop->preheader = NULL; + newloop->blocks = NULL; + newloop->basicInductionVars = NULL; + newloop->footer = NULL; + newloop->pc18 = NULL; + newloop->loopWeight = loop->loopWeight; + + bitvectorinitialize(newloop->memberblocks = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(newloop->vec24 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(newloop->vec28 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(newloop->vec2C = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + + removeblockfromloop(loop, newloop->body); + addblocktoloop(newloop, newloop->body); + + bitvectorsetbit(newloop->body->blockIndex, newloop->vec24); + bitvectorsetbit(newloop->body->blockIndex, newloop->vec2C); + bitvectorsetbit(newloop->body->blockIndex, newloop->vec28); + + for (scanlist = path2_24; scanlist; scanlist = scanlist->next) { + removeblockfromloop(loop, scanlist->block); + addblocktoloop(newloop, scanlist->block); + bitvectorsetbit(scanlist->block->blockIndex, newloop->vec2C); + } + + newloop->preheader = NULL; + insertpreheaderblock(newloop); + analyzeloop(newloop); + + loop->body = headercopy; + + for (scanlist = loop->blocks; scanlist; scanlist = scanlist->next) + bitvectorsetbit(scanlist->block->blockIndex, loop->vec2C); + + bitvectorsetbit(headercopy->blockIndex, loop->vec24); + analyzeloop(loop); + + unswitchedinvariantcode = 1; + } +} + +static void simpleunswitchloops(Loop *loop) { + while (loop) { + if (loop->children) + simpleunswitchloops(loop->children); + else if (!loop->x4F) + simpleunswitchloop(loop); + loop = loop->nextSibling; + } +} + +static void moveinvariantsfromloop(Loop *loop) { + RegUseOrDef *list; + BlockList *blocklist; + PCode *instr; + PCode *nextInstr; + UInt32 *myvec; + UseOrDef *def; + int defID; + int flag; + PCodeBlock *block; + + myvec = oalloc(4 * ((number_of_Defs + 31) >> 5)); + do { + flag = 0; + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + block = blocklist->block; + bitvectorcopy(myvec, usedefinfo[block->blockIndex].defvec8, number_of_Defs); + for (instr = block->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + if (!(instr->flags & fIsBranch) && instr->argCount) { + if ( + !(instr->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) && + isloopinvariant(instr, loop, myvec, 0, 0) && + maymove(instr, loop) + ) { + moveinvariantcomputation(instr, loop); + flag = 1; + } else if (srawi_addze_isloopinvariant(instr, loop, myvec)) { + moveinvariantcomputation(instr, loop); + flag = 1; + } + + for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) { + if (def->v.kind == PCOp_REGISTER) { + for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) + bitvectorclearbit(list->id, myvec); + } else if (def->v.kind == PCOp_MEMORY) { + if (def->v.arg == PCOpMemory0) { + for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) { + if (uniquely_aliases(instr, Defs[list->id].pcode)) + bitvectorclearbit(list->id, myvec); + } + } + } else { + CError_FATAL(1434); + } + + bitvectorsetbit(defID, myvec); + } + } + } + } + } while (flag); +} + +static void moveinvariantsfromloops(Loop *loop) { + while (loop) { + if (loop->children) + moveinvariantsfromloops(loop->children); + moveinvariantsfromloop(loop); + loop = loop->nextSibling; + } +} + +void moveloopinvariantcode(void) { + unswitchedinvariantcode = 0; + movedloopinvariantcode = 0; + if (loopsinflowgraph) { + moveinvariantsfromloops(loopsinflowgraph); + simpleunswitchloops(loopsinflowgraph); + } + freeoheap(); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ConstantPropagation.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ConstantPropagation.c new file mode 100644 index 0000000..2b40453 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ConstantPropagation.c @@ -0,0 +1,643 @@ +#include "compiler/ConstantPropagation.h" +#include "compiler/Alias.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/RegisterInfo.h" +#include "compiler/StackFrame.h" +#include "compiler/UseDefChains.h" +#include "compiler/objects.h" + +int propagatedconstants; +static int changed; +static PCode **defininginstruction; +static PCode **vrdefininginstruction; + +static void computedefininginstructions(PCodeBlock *block) { + RegUseOrDef *list; + PCode *instr; + int i; + + for (i = 0; i < used_virtual_registers[RegClass_GPR]; i++) { + instr = NULL; + for (list = reg_Defs[RegClass_GPR][i]; list; list = list->next) { + if (bitvectorgetbit(list->id, usedefinfo[block->blockIndex].defvec8)) { + if (instr == NULL) { + instr = Defs[list->id].pcode; + } else { + instr = NULL; + break; + } + } + } + defininginstruction[i] = instr; + } + + for (i = 0; i < used_virtual_registers[RegClass_VR]; i++) { + instr = NULL; + for (list = reg_Defs[RegClass_VR][i]; list; list = list->next) { + if (bitvectorgetbit(list->id, usedefinfo[block->blockIndex].defvec8)) { + if (instr == NULL) { + instr = Defs[list->id].pcode; + } else { + instr = NULL; + break; + } + } + } + vrdefininginstruction[i] = instr; + } +} + +static PCode *isstackoperand(PCodeArg *op, SInt16 *resultValue, SInt16 addend) { + PCode *instr; + + if ((instr = defininginstruction[op->data.reg.reg]) && instr->op == PC_ADDI) { + if ( + instr->args[2].kind == PCOp_MEMORY && + (instr->args[1].data.reg.reg == _FP_ || instr->args[1].data.reg.reg == _CALLER_SP_) && + instr->args[2].data.mem.obj->datatype == DLOCAL + ) + { + if (can_add_displ_to_local(instr->args[2].data.mem.obj, addend)) { + *resultValue = instr->args[2].data.mem.offset; + return instr; + } else { + return NULL; + } + } else { + return NULL; + } + } else { + return NULL; + } +} + +static int isconstantoperand(PCodeArg *op, SInt16 *resultValue) { + PCode *instr; + + if ( + (instr = defininginstruction[op->data.reg.reg]) && + instr->op == PC_LI && + instr->args[1].kind == PCOp_IMMEDIATE + ) + { + *resultValue = instr->args[1].data.imm.value; + return 1; + } else { + return 0; + } +} + +static int isuint16constantoperand(PCodeArg *op, SInt16 *resultValue) { + PCode *instr; + + if ( + (instr = defininginstruction[op->data.reg.reg]) && + instr->op == PC_LI && + instr->args[1].kind == PCOp_IMMEDIATE && + FITS_IN_USHORT(instr->args[1].data.imm.value) + ) + { + *resultValue = instr->args[1].data.imm.value; + return 1; + } else { + return 0; + } +} + +static int isvectorconstantoperand(PCodeArg *op, SInt16 *resultValue, Opcode *resultNewOp) { + PCode *instr; + + if ( + (instr = vrdefininginstruction[op->data.reg.reg]) && + (instr->op == PC_VSPLTISB || instr->op == PC_VSPLTISH || instr->op == PC_VSPLTISW) && + instr->args[1].kind == PCOp_IMMEDIATE + ) + { + *resultValue = instr->args[1].data.imm.value; + *resultNewOp = instr->op; + return 1; + } else { + return 0; + } +} + +static int isunsignedloadoperand(PCodeArg *op) { + PCode *instr; + + if ((instr = defininginstruction[op->data.reg.reg])) { + if (instr->flags & fIsRead) { + if (instr->op >= PC_LHZ && instr->op <= PC_LHZUX) + return 2; + if (instr->op >= PC_LBZ && instr->op <= PC_LBZUX) + return 1; + } else if (instr->op == PC_RLWINM) { + int var3 = instr->args[3].data.imm.value; + int var4 = instr->args[4].data.imm.value; + if (var4 == 31) { + if (var3 == 24) + return 1; + if (var3 == 16) + return 2; + } + } + } + + return 0; +} + +static int ismaskedoperand(PCodeArg *op, UInt32 *resultMask) { + PCode *instr; + UInt32 mask; + + if ((instr = defininginstruction[op->data.reg.reg]) && instr->op == PC_RLWINM) { + if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) { + mask = + ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) & + ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1))); + } else { + mask = + ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) | + ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1))); + } + *resultMask = mask; + return 1; + } + + return 0; +} + +static int issignedloadoperand(PCodeArg *op) { + PCode *instr; + + if ((instr = defininginstruction[op->data.reg.reg])) { + if (instr->flags & fIsRead) { + if (instr->op >= PC_LHA && instr->op <= PC_LHAUX) + return 2; + } else if (instr->op == PC_EXTSB) { + return 1; + } else if (instr->op == PC_EXTSH) { + return 2; + } + } + + return 0; +} + +static void propagateconstantstoblock(PCodeBlock *block) { + PCode *instr; + SInt16 immAddend; + SInt16 value1; + SInt16 valueU16; + Opcode newOpcode; + SInt16 value2; + UInt32 mask; + UInt32 mask2; + int loadSize; + PCodeArg *op; + int i; + + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + switch (instr->op) { + case PC_MR: + if (isconstantoperand(&instr->args[1], &value1)) { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value1; + instr->args[1].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } + break; + case PC_VMR: + if (isvectorconstantoperand(&instr->args[1], &value1, &newOpcode)) { + change_opcode(instr, newOpcode); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value1; + instr->args[1].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } + break; + case PC_RLWINM: + if ( + !(PCODE_FLAG_SET_F(instr) & fRecordBit) && + instr->args[2].data.imm.value == 0 && + instr->args[4].data.imm.value == 31 + ) + { + if (isconstantoperand(&instr->args[1], &value1)) { + if ( + (instr->args[3].data.imm.value == 16 && value1 == (value1 & 0x7FFF)) || + (instr->args[3].data.imm.value == 24 && value1 == (value1 & 0xFF)) + ) + { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + break; + } + } + + loadSize = isunsignedloadoperand(&instr->args[1]); + if ( + (loadSize == 2 && instr->args[3].data.imm.value <= 16) || + (loadSize == 1 && instr->args[3].data.imm.value <= 24) + ) + { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + break; + } + + if (ismaskedoperand(&instr->args[1], &mask)) { + if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) { + mask2 = + ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) & + ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1))); + } else { + mask2 = + ((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) | + ~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1))); + } + if (mask == (mask & mask2)) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + } + } + break; + + case PC_EXTSH: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + if (isconstantoperand(&instr->args[1], &value1)) { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + break; + } + + loadSize = issignedloadoperand(&instr->args[1]); + if (loadSize == 1 || loadSize == 2) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + break; + + case PC_EXTSB: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + if ( + isconstantoperand(&instr->args[1], &value1) && + value1 >= -128 && + value1 <= 127 + ) + { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + break; + } + + loadSize = issignedloadoperand(&instr->args[1]); + if (loadSize == 1) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + break; + + case PC_ADDI: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + immAddend = instr->args[2].data.imm.value; + if ( + isconstantoperand(&instr->args[1], &value1) && + FITS_IN_SHORT(immAddend + value1) + ) + { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = immAddend + value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + break; + + case PC_ADD: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + if (isconstantoperand(&instr->args[2], &value1)) { + if (value1 == 0) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + } else { + change_opcode(instr, PC_ADDI); + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = value1; + instr->args[2].data.imm.obj = NULL; + } + propagatedconstants = 1; + changed = 1; + immAddend = value1; + } + + if (isconstantoperand(&instr->args[1], &value1)) { + if (instr->op == PC_ADDI || instr->op == PC_MR) { + if (FITS_IN_SHORT(immAddend + value1)) { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = immAddend + value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + } else { + instr->args[1] = instr->args[2]; + if (value1 == 0) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + } else { + change_opcode(instr, PC_ADDI); + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = value1; + instr->args[2].data.imm.obj = NULL; + } + propagatedconstants = 1; + changed = 1; + } + } + + if (changed) { + if (instr->op == PC_MR) { + PCode *stackInstr; + if ((stackInstr = isstackoperand(&instr->args[1], &value1, 0))) { + change_opcode(instr, PC_ADDI); + instr->flags = stackInstr->flags; + instr->args[1] = stackInstr->args[1]; + instr->args[2] = stackInstr->args[2]; + change_num_operands(instr, 3); + propagatedconstants = 1; + changed = 1; + } + } else if (instr->op == PC_ADDI && instr->args[2].kind == PCOp_IMMEDIATE) { + PCode *stackInstr; + SInt16 addend = instr->args[2].data.imm.value; + if ((stackInstr = isstackoperand(&instr->args[1], &value1, addend))) { + change_opcode(instr, PC_ADDI); + instr->flags = stackInstr->flags; + instr->args[1] = stackInstr->args[1]; + instr->args[2] = stackInstr->args[2]; + instr->args[2].data.imm.value = value1 + addend; + if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) + instr->alias = make_alias(instr->args[2].data.imm.obj, instr->args[2].data.imm.value, 1); + propagatedconstants = 1; + changed = 1; + } + } + } + break; + + case PC_OR: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + value1 = 0; + immAddend = 0; + if (isconstantoperand(&instr->args[2], &value1)) { + if (isuint16constantoperand(&instr->args[2], &valueU16)) { + if (valueU16 != 0) { + change_opcode(instr, PC_ORI); + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = valueU16; + instr->args[2].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } else { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + value1 = valueU16; + } else if (value1 == 0) { + change_opcode(instr, PC_MR); + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + immAddend = value1; + } + + if (isconstantoperand(&instr->args[1], &value1)) { + if (instr->op == PC_ORI || instr->op == PC_MR) { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = immAddend | value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } else if (isuint16constantoperand(&instr->args[1], &valueU16)) { + if (valueU16 != 0) { + change_opcode(instr, PC_ORI); + instr->args[1] = instr->args[2]; + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = valueU16; + instr->args[2].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } else { + change_opcode(instr, PC_MR); + instr->args[1] = instr->args[2]; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + } else if (value1 == 0) { + change_opcode(instr, PC_MR); + instr->args[1] = instr->args[2]; + change_num_operands(instr, 2); + propagatedconstants = 1; + changed = 1; + } + } + break; + + case PC_SUBF: + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + break; + + if (isconstantoperand(&instr->args[1], &value1) && FITS_IN_SHORT(-value1)) { + if (isconstantoperand(&instr->args[2], &value2) && FITS_IN_SHORT(value2 - value1)) { + change_opcode(instr, PC_LI); + instr->args[1].kind = PCOp_IMMEDIATE; + instr->args[1].data.imm.value = value2 - value1; + instr->args[1].data.imm.obj = NULL; + change_num_operands(instr, 2); + } else if (value1 == 0) { + change_opcode(instr, PC_MR); + instr->args[1] = instr->args[2]; + change_num_operands(instr, 2); + } else { + change_opcode(instr, PC_ADDI); + instr->args[1] = instr->args[2]; + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = -value1; + instr->args[2].data.imm.obj = NULL; + } + propagatedconstants = 1; + changed = 1; + value2 = value1; + } else if (isconstantoperand(&instr->args[2], &value1) && FITS_IN_SHORT(-value1)) { + if (value1 == 0) { + change_opcode(instr, PC_NEG); + change_num_operands(instr, 2); + } else { + instr->flags = opcodeinfo[PC_SUBFIC].flags | (instr->flags & ~opcodeinfo[PC_SUBF].flags); + change_opcode(instr, PC_SUBFIC); + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = value1; + instr->args[2].data.imm.obj = NULL; + instr->args[3].kind = PCOp_REGISTER; + instr->args[3].arg = RegClass_SPR; + instr->args[3].data.reg.reg = 0; + instr->args[3].data.reg.effect = EffectWrite; + change_num_operands(instr, 4); + } + propagatedconstants = 1; + changed = 1; + } + + break; + + case PC_LBZ: + case PC_LHZ: + case PC_LHA: + case PC_LWZ: + case PC_STB: + case PC_STH: + case PC_STW: + case PC_LFS: + case PC_LFD: + case PC_STFS: + case PC_STFD: + if (instr->args[2].kind == PCOp_IMMEDIATE) { + PCode *stackInstr; + SInt16 addend = instr->args[2].data.imm.value; + + if ((stackInstr = isstackoperand(&instr->args[1], &value1, addend))) { + instr->args[1] = stackInstr->args[1]; + instr->args[2] = stackInstr->args[2]; + instr->args[2].data.imm.value = value1 + addend; + if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) + instr->alias = make_alias(instr->args[2].data.imm.obj, instr->args[2].data.imm.value, + nbytes_loaded_or_stored_by(instr)); + propagatedconstants = 1; + changed = 1; + } + } + break; + + case PC_LBZX: + case PC_LHZX: + case PC_LHAX: + case PC_LWZX: + case PC_STBX: + case PC_STHX: + case PC_STWX: + case PC_LFSX: + case PC_LFDX: + case PC_STFSX: + case PC_STFDX: + if (isconstantoperand(&instr->args[2], &value1)) { + instr->op -= 2; + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = value1; + instr->args[2].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } else if (isconstantoperand(&instr->args[1], &value1)) { + instr->op -= 2; + instr->args[1] = instr->args[2]; + instr->args[2].kind = PCOp_IMMEDIATE; + instr->args[2].data.imm.value = value1; + instr->args[2].data.imm.obj = NULL; + propagatedconstants = 1; + changed = 1; + } + + break; + } + + for (i = 0, op = instr->args; i < instr->argCount; i++, op++) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) + ) + { + defininginstruction[op->data.reg.reg] = instr; + } + else if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_VR && + (op->data.reg.effect & EffectWrite) + ) + { + vrdefininginstruction[op->data.reg.reg] = instr; + } + } + } +} + +void propagateconstants(void) { + PCodeBlock *block; + int i; + + propagatedconstants = 0; + computeusedefchains(0); + defininginstruction = galloc(sizeof(PCode *) * used_virtual_registers[RegClass_GPR]); + vrdefininginstruction = galloc(sizeof(PCode *) * used_virtual_registers[RegClass_VR]); + + do { + changed = 0; + for (i = 0; i < pcblockcount; i++) { + if ((block = depthfirstordering[i])) { + computedefininginstructions(block); + propagateconstantstoblock(block); + } + } + } while (changed); + + freeoheap(); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopDetection.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopDetection.c new file mode 100644 index 0000000..6bb2d51 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopDetection.c @@ -0,0 +1,885 @@ +#include "compiler/LoopDetection.h" +#include "compiler/CFunc.h" +#include "compiler/PCode.h" +#include "compiler/TOC.h" +#include "compiler/UseDefChains.h" +#include "compiler/CompilerTools.h" +#include "compiler/BitVectors.h" +#include "compiler/enode.h" +#include "compiler/objects.h" + +Loop *loopsinflowgraph; +int loopdetection_nblocks; +static UInt32 **dominators; +static BlockList *loopheaders; +static int nloopheaders; +static PCodeBlock **loopstack; +BitVector *LoopTemp; +struct LoopList *LoopList_First; + +static void computedominators(void) { + int i; + PCodeBlock *block; + int blockCount; + int flag; + UInt32 *myvec; + PCLink *link; + + blockCount = pcblockcount; + flag = 1; + + dominators = oalloc(sizeof(UInt32 *) * pcblockcount); + for (i = 0; i < pcblockcount; i++) + dominators[i] = oalloc(4 * ((blockCount + 31) >> 5)); + + myvec = oalloc(4 * ((blockCount + 31) >> 5)); + + bitvectorinitialize(dominators[pcbasicblocks->blockIndex], blockCount, 0); + //dominators[pcbasicblocks->blockIndex][0] |= 1; + bitvectorsetbit(0, dominators[pcbasicblocks->blockIndex]); + + for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) + bitvectorinitialize(dominators[block->blockIndex], blockCount, 0xFFFFFFFF); + + computedepthfirstordering(); + + while (flag) { + flag = 0; + for (i = 0; i < pcblockcount; i++) { + block = depthfirstordering[i]; + if (block && block->blockIndex != pcbasicblocks->blockIndex) { + bitvectorcopy(myvec, dominators[block->predecessors->block->blockIndex], blockCount); + for (link = block->predecessors->nextLink; link; link = link->nextLink) + bitvectorintersect(myvec, dominators[link->block->blockIndex], blockCount); + //myvec[block->blockIndex >> 5] |= 1 << (block->blockIndex & 31); + bitvectorsetbit(block->blockIndex, myvec); + + if (bitvectorchanged(dominators[block->blockIndex], myvec, blockCount)) + flag = 1; + } + } + } +} + +static BlockList *findloopheaders(void) { + PCodeBlock *block; + PCLink *link; + BlockList *list; + + loopheaders = NULL; + nloopheaders = 0; + + for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) { + for (link = block->predecessors; link; link = link->nextLink) { + //if ((1 << (block->blockIndex & 31)) & dominators[link->block->blockIndex][block->blockIndex >> 5]) + if (bitvectorgetbit(block->blockIndex, dominators[link->block->blockIndex])) + break; + } + + if (link) { + list = oalloc(sizeof(BlockList)); + list->block = block; + list->next = loopheaders; + loopheaders = list; + nloopheaders++; + } + } + + return loopheaders; +} + +void addblocktoloop(Loop *loop, PCodeBlock *block) { + BlockList *list = lalloc(sizeof(BlockList)); + + //loop->memberblocks[block->blockIndex >> 5] |= 1 << (block->blockIndex & 31); + bitvectorsetbit(block->blockIndex, loop->memberblocks); + + list->block = block; + list->next = loop->blocks; + loop->blocks = list; +} + +static void findnaturalloop(Loop *loop) { + BlockList *list; + BlockList *list2; + PCLink *link; + PCodeBlock *block; + int i; + + i = 0; + addblocktoloop(loop, loop->body); + for (link = loop->body->predecessors; link; link = link->nextLink) { + if (bitvectorgetbit(loop->body->blockIndex, dominators[link->block->blockIndex]) && link->block != loop->body) { + addblocktoloop(loop, link->block); + loopstack[i++] = link->block; + } + } + + while (i) { + link = loopstack[--i]->predecessors; + while (link) { + if (!bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) { + addblocktoloop(loop, link->block); + loopstack[i++] = link->block; + } + link = link->nextLink; + } + } + + for (list = loop->blocks; list; list = list->next) { + block = list->block; + for (link = block->successors; link; link = link->nextLink) { + if (!bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) { + bitvectorsetbit(block->blockIndex, loop->vec24); + break; + } + } + } + + for (list = loop->blocks; list; list = list->next) { + for (list2 = loop->blocks; list2; list2 = list2->next) { + if (bitvectorgetbit(list2->block->blockIndex, loop->vec24) && + !bitvectorgetbit(list->block->blockIndex, dominators[list2->block->blockIndex])) + break; + } + + if (!list2) + bitvectorsetbit(list->block->blockIndex, loop->vec28); + } + + for (list = loop->blocks; list; list = list->next) { + for (link = loop->body->predecessors; link; link = link->nextLink) { + if (bitvectorgetbit(link->block->blockIndex, loop->memberblocks) && + !bitvectorgetbit(list->block->blockIndex, dominators[link->block->blockIndex])) + break; + } + + if (!link) + bitvectorsetbit(list->block->blockIndex, loop->vec2C); + } +} + +static void addlooptolist(Loop *loop, Loop **list) { + Loop **scan; + Loop *scanloop; + + scan = list; + while ((scanloop = *scan)) { + if (bitvectorgetbit(loop->body->blockIndex, scanloop->memberblocks)) { + loop->parent = scanloop; + addlooptolist(loop, &scanloop->children); + return; + } + + if (bitvectorgetbit(scanloop->body->blockIndex, loop->memberblocks)) { + *scan = scanloop->nextSibling; + scanloop->parent = loop; + scanloop->nextSibling = loop->children; + loop->children = scanloop; + } else { + scan = &scanloop->nextSibling; + } + } + + loop->nextSibling = *list; + *list = loop; +} + +static void findnaturalloops(void) { + Loop *loop; + int size; + + loopdetection_nblocks = pcblockcount + 5 * nloopheaders; + loopstack = oalloc(sizeof(PCodeBlock *) * pcblockcount); + while (loopheaders) { + loop = lalloc(sizeof(Loop)); + loop->parent = loop->nextSibling = loop->children = NULL; + loop->body = loopheaders->block; + loop->preheader = NULL; + loop->blocks = NULL; + loop->basicInductionVars = NULL; + loop->footer = NULL; + loop->pc18 = NULL; + loop->loopWeight = loop->body->loopWeight; + + bitvectorinitialize(loop->memberblocks = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(loop->vec24 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(loop->vec28 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + bitvectorinitialize(loop->vec2C = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0); + + findnaturalloop(loop); + addlooptolist(loop, &loopsinflowgraph); + + loopheaders = loopheaders->next; + } +} + +static PCodeBlock *makepreheaderblock(void) { + PCodeLabel *label; + PCodeBlock *block; + + label = makepclabel(); + block = lalloc(sizeof(PCodeBlock)); + block->nextBlock = NULL; + block->prevBlock = NULL; + block->labels = NULL; + block->successors = NULL; + block->predecessors = NULL; + block->firstPCode = block->lastPCode = NULL; + block->pcodeCount = 0; + block->flags = 0; + block->blockIndex = pcblockcount++; + pclabel(block, label); + return block; +} + +static void insertpreheaderbefore(PCodeBlock *a, PCodeBlock *b) { + a->nextBlock = b; + a->prevBlock = b->prevBlock; + b->prevBlock->nextBlock = a; + b->prevBlock = a; +} + +void insertpreheaderblock(Loop *loop) { + PCodeBlock *preheader; + PCodeBlock *block29; + PCodeBlock *block28; + PCode *pcode27; + PCLink *link; // r26 + PCLink **linkptr; // r25 + PCodeLabel *newlabel; // r23 + PCLink *innerlink; + PCodeBlock *block; + PCodeArg *arg; + int i; + + preheader = loop->preheader = makepreheaderblock(); + block29 = NULL; + block28 = loop->body; + + if (!block28->labels) + pclabel(block28, makepclabel()); + + appendpcode(preheader, makepcode(PC_B, block28->labels)); + preheader->loopWeight = loop->parent ? loop->parent->loopWeight : 1; + + linkptr = &block28->predecessors; + while ((link = *linkptr)) { + if (bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) { + linkptr = &link->nextLink; + } else { + if (link->block->pcodeCount) { + pcode27 = link->block->lastPCode; + if (pcode27->op == PC_B) { + CError_ASSERT(462, pcode27->args[0].kind == PCOp_LABEL); + if (pcode27->args[0].data.label.label->block == block28) + pcode27->args[0].data.label.label = preheader->labels; + } else if (pcode27->op == PC_BT || pcode27->op == PC_BF) { + CError_ASSERT(474, pcode27->args[2].kind == PCOp_LABEL); + if (pcode27->args[2].data.label.label->block == block28) + pcode27->args[2].data.label.label = preheader->labels; + } else if (pcode27->op == PC_BCTR) { + if (pcode27->argCount > 1 && pcode27->args[1].kind == PCOp_MEMORY) { + Object *obj = pcode27->args[1].data.mem.obj; + UInt32 *array = (UInt32 *) obj->u.data.u.switchtable.data; + int i; + for (i = 0; i < obj->u.data.u.switchtable.size; i++) { + if (((PCodeLabel *) CTool_ResolveIndexToPointer(array[i]))->block == block28) + array[i] = CTool_CreateIndexFromPointer(preheader->labels); + } + } else { + CodeLabelList *cll; + for (cll = codelabellist; cll; cll = cll->next) { + if (cll->label->pclabel->block == block28) + cll->label->pclabel = preheader->labels; + } + } + } else { + CError_ASSERT(505, link->block->nextBlock == block28); + } + } + + for (innerlink = link->block->successors; innerlink; innerlink = innerlink->nextLink) { + if (innerlink->block == block28) + innerlink->block = preheader; + } + + *linkptr = link->nextLink;; + link->nextLink = preheader->predecessors; + preheader->predecessors = link; + } + } + + if (!bitvectorgetbit(block28->prevBlock->blockIndex, loop->memberblocks)) { + insertpreheaderbefore(preheader, block28); + + if ( + (!block28->nextBlock || !bitvectorgetbit(block28->nextBlock->blockIndex, loop->memberblocks)) && + block28->lastPCode && + (block28->lastPCode->flags & fIsBranch) && + block28->lastPCode->op != PC_BDNZ + ) { + i = block28->lastPCode->argCount; + arg = block28->lastPCode->args; + while (i && arg->kind != PCOp_LABEL) { + arg++; + i--; + } + + if (i && arg->kind == PCOp_LABEL && arg->data.label.label->block == block28) { + block29 = makepreheaderblock(); + insertpreheaderbefore(block29, block28); + newlabel = makepclabel(); + pclabel(block29, newlabel); + arg->data.label.label = newlabel; + + link = lalloc(sizeof(PCLink)); + link->block = block28; + link->nextLink = block29->predecessors; + block29->predecessors = link; + + link = lalloc(sizeof(PCLink)); + link->block = block28; + link->nextLink = block29->successors; + block29->successors = link; + + for (link = block28->successors; link; link = link->nextLink) { + if (link->block == block28) + link->block = block29; + } + for (link = block28->predecessors; link; link = link->nextLink) { + if (link->block == block28) + link->block = block29; + } + + bitvectorsetbit(block29->blockIndex, loop->vec2C); + addblocktoloop(loop, block29); + } + } + } else { + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (bitvectorgetbit(block->blockIndex, loop->memberblocks)) + break; + } + insertpreheaderbefore(preheader, block); + } + + link = lalloc(sizeof(PCLink)); + link->block = preheader; + link->nextLink = block28->predecessors; + block28->predecessors = link; + + link = lalloc(sizeof(PCLink)); + link->block = block28; + link->nextLink = preheader->successors; + preheader->successors = link; + + for (loop = loop->parent; loop; loop = loop->parent) { + addblocktoloop(loop, preheader); + if (bitvectorgetbit(block28->blockIndex, loop->vec28)) { + bitvectorsetbit(preheader->blockIndex, loop->vec28); + if (block29) + bitvectorsetbit(block29->blockIndex, loop->vec28); + } + if (bitvectorgetbit(block28->blockIndex, loop->vec2C)) { + bitvectorsetbit(preheader->blockIndex, loop->vec2C); + if (block29) + bitvectorsetbit(block29->blockIndex, loop->vec2C); + } + } +} + +static void insertpreheaderblocks(Loop *loop) { + while (loop) { + if (loop->children) + insertpreheaderblocks(loop->children); + insertpreheaderblock(loop); + loop = loop->nextSibling; + } +} + +void findloopsinflowgraph(void) { + loopsinflowgraph = NULL; + computedominators(); + if (findloopheaders()) { + findnaturalloops(); + insertpreheaderblocks(loopsinflowgraph); + } + freeoheap(); +} + +static int checklooplimits(SInt32 opcode, SInt32 condition, SInt32 c, SInt32 d, SInt32 addend, SInt32 *result) { + if (opcode == PC_BT) { + if (condition == 0) { + if (addend <= 0) + return 0; + if (c < d) + *result = (d - c + addend - 1) / addend; + else + *result = 0; + } else if (condition == 1) { + if (addend >= 0) + return 0; + if (c > d) + *result = (c - d - addend - 1) / -addend; + else + *result = 0; + } else { + return 0; + } + } else { + if (condition == 0) { + if (addend >= 0) + return 0; + if (c >= d) + *result = (c - d - addend) / -addend; + else + *result = 0; + } else if (condition == 1) { + if (addend <= 0) + return 0; + if (c <= d) + *result = (d - c + addend) / addend; + else + *result = 0; + } else if (c < d) { + if (addend <= 0) + return 0; + if ((d - c) % addend) + return 0; + *result = (d - c) / addend; + } else if (c > d) { + if (addend >= 0) + return 0; + if ((c - d) % -addend) + return 0; + *result = (c - d) / -addend; + } else { + *result = 0; + } + } + + return 1; +} + +static int checkunsignedlooplimits(SInt32 opcode, SInt32 condition, UInt32 c, UInt32 d, SInt32 addend, UInt32 *result) { + if (opcode == PC_BT) { + if (condition == 0) { + if (addend <= 0) + return 0; + if (c < d) + *result = (d - c + addend - 1) / addend; + else + *result = 0; + } else if (condition == 1) { + if (addend >= 0) + return 0; + if (c > d) + *result = (c - d - addend - 1) / -addend; + else + *result = 0; + } else { + return 0; + } + } else { + if (condition == 0) { + if (addend >= 0) + return 0; + if (c >= d) + *result = (c - d - addend) / -addend; + else + *result = 0; + } else if (condition == 1) { + if (addend <= 0) + return 0; + if (c <= d) + *result = (d - c + addend) / addend; + else + *result = 0; + } else if (c < d) { + if (addend <= 0) + return 0; + if ((d - c) % addend) + return 0; + *result = (d - c) / addend; + } else if (c > d) { + if (addend >= 0) + return 0; + if ((c - d) % -addend) + return 0; + *result = (c - d) / -addend; + } else { + *result = 0; + } + } + + return (*result & 0x80000000) == 0; +} + +static int checkunknownloop(int a, int b, int c, unsigned char *op) { + if (a == PC_BT) { + if (b == 0) { + if (c <= 0) + return 0; + *op = ELESS; + } else if (b == 1) { + if (c >= 0) + return 0; + *op = EGREATER; + } else { + return 0; + } + } else { + if (b == 0) { + if (c >= 0) + return 0; + *op = EGREATEREQU; + } else if (b == 1) { + if (c <= 0) + return 0; + *op = ELESSEQU; + } else if (c == 1) { + *op = ENOTEQU; + } else if (c == -1) { + *op = ENOTEQU; + } else { + return 0; + } + } + + return 1; +} + +static void checkcountingloop(Loop *loop) { + RegUseOrDef *list; + PCode *lastpcode; + PCode *prevpcode; + PCode *pc8; + PCode *check; + short op12; + short reg11; + SInt16 reg4; + short reg11b; + Loop *child; + + if (!(lastpcode = loop->body->lastPCode)) + return; + if (lastpcode->op != PC_BT && lastpcode->op != PC_BF) + return; + if (lastpcode->args[2].kind != PCOp_LABEL) + return; + + if (!bitvectorgetbit(lastpcode->args[2].data.label.label->block->blockIndex, loop->memberblocks)) + return; + if (bitvectorgetbit(loop->body->nextBlock->blockIndex, loop->memberblocks)) + return; + + reg11 = lastpcode->args[0].data.reg.reg; + reg4 = lastpcode->args[1].data.imm.value; + prevpcode = lastpcode->prevPCode; + if (!prevpcode) + return; + + op12 = prevpcode->op; + if (op12 == PC_ADDI && prevpcode->args[2].kind == PCOp_IMMEDIATE) { + pc8 = prevpcode; + prevpcode = prevpcode->prevPCode; + if (!prevpcode) + return; + + op12 = prevpcode->op; + if (pc8->args[0].data.reg.reg != pc8->args[1].data.reg.reg) + return; + if (op12 != PC_CMP && op12 != PC_CMPL && op12 != PC_CMPI && op12 != PC_CMPLI) + return; + if (prevpcode->args[1].data.reg.reg != pc8->args[0].data.reg.reg) + return; + if ((loop->step = pc8->args[2].data.imm.value) == 0) + return; + } + + if (op12 != PC_CMP && op12 != PC_CMPL && op12 != PC_CMPI && op12 != PC_CMPLI) + return; + + if (prevpcode->args[0].data.reg.reg != reg11) + return; + + reg11b = prevpcode->args[1].data.reg.reg; + if (reg11b < 32) + return; + + if (loop->preheader->nextBlock != lastpcode->args[2].data.label.label->block) + return; + + if (op12 == PC_CMPI) { + if (prevpcode->prevPCode) + return; + loop->upper = prevpcode->args[2].data.imm.value; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if (op12 == PC_CMPLI) { + if (prevpcode->prevPCode) + return; + loop->upper = prevpcode->args[2].data.imm.value & 0xFFFF; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if (op12 == PC_CMP || op12 == PC_CMPL) { + if (prevpcode->prevPCode) { + if ( + prevpcode->prevPCode->op == PC_LI && + prevpcode->prevPCode->args[1].kind == PCOp_IMMEDIATE && + prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg && + !prevpcode->prevPCode->prevPCode + ) { + loop->upper = prevpcode->prevPCode->args[1].data.imm.value; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if ( + prevpcode->prevPCode->op == PC_LIS && + prevpcode->prevPCode->args[1].kind == PCOp_IMMEDIATE && + prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg && + !prevpcode->prevPCode->prevPCode + ) { + loop->upper = prevpcode->prevPCode->args[1].data.imm.value << 16; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if ( + prevpcode->prevPCode->op == PC_ADDI && + prevpcode->prevPCode->args[2].kind == PCOp_IMMEDIATE && + prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg && + prevpcode->prevPCode->args[1].data.reg.reg == prevpcode->args[2].data.reg.reg && + prevpcode->prevPCode->prevPCode && + prevpcode->prevPCode->prevPCode->op == PC_LIS && + prevpcode->prevPCode->prevPCode->args[1].kind == PCOp_IMMEDIATE && + prevpcode->prevPCode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg && + !prevpcode->prevPCode->prevPCode->prevPCode + ) { + loop->upper = prevpcode->prevPCode->args[2].data.imm.value + + (prevpcode->prevPCode->prevPCode->args[1].data.imm.value << 16); + loop->upperType = LOOP_BOUND_CONSTANT; + } else { + return; + } + } else { + pc8 = NULL; + for (list = reg_Defs[RegClass_GPR][prevpcode->args[2].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) + return; + } + for (list = reg_Defs[RegClass_GPR][prevpcode->args[2].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].defvec8)) { + if (!pc8) { + pc8 = Defs[list->id].pcode; + if ( + pc8->op == PC_LI && + pc8->args[1].kind == PCOp_IMMEDIATE + ) { + loop->upper = pc8->args[1].data.imm.value; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if ( + pc8->op == PC_LIS && + pc8->args[1].kind == PCOp_IMMEDIATE + ) { + loop->upper = pc8->args[1].data.imm.value << 16; + loop->upperType = LOOP_BOUND_CONSTANT; + } else if ( + pc8->op == PC_ADDI && + pc8->args[2].kind == PCOp_IMMEDIATE && + pc8->args[1].data.reg.reg == prevpcode->args[2].data.reg.reg && + pc8->prevPCode && + pc8->prevPCode->op == PC_LIS && + pc8->prevPCode->args[1].kind == PCOp_IMMEDIATE && + pc8->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg + ) { + loop->upper = pc8->args[2].data.imm.value + + (pc8->prevPCode->args[1].data.imm.value << 16); + loop->upperType = LOOP_BOUND_CONSTANT; + } else { + loop->upperType = LOOP_BOUND_VARIABLE; + break; + } + } else { + loop->upperType = LOOP_BOUND_VARIABLE; + break; + } + } + } + + if (loop->upperType == LOOP_BOUND_INDETERMINATE) + loop->upperType = LOOP_BOUND_VARIABLE; + } + } + + pc8 = NULL; + + for (list = reg_Defs[RegClass_GPR][reg11b]; list; list = list->next) { + check = Defs[list->id].pcode; + if (bitvectorgetbit(check->block->blockIndex, loop->memberblocks)) { + if (!pc8) { + pc8 = check; + if (check->op != PC_ADDI) + return; + if (check->args[1].data.reg.reg != reg11b) + return; + if (check->args[2].kind != PCOp_IMMEDIATE) + return; + if ((loop->step = check->args[2].data.imm.value) == 0) + return; + } else { + return; + } + } + } + + if (!pc8) + return; + + if (pc8->block != prevpcode->block && !bitvectorgetbit(prevpcode->block->blockIndex, loop->vec2C)) + return; + + if (loop->children) { + for (child = loop->children; child; child = child->nextSibling) { + if (bitvectorgetbit(pc8->block->blockIndex, child->memberblocks)) + return; + } + } + + loop->pc18 = pc8; + + pc8 = NULL; + + for (list = reg_Defs[RegClass_GPR][reg11b]; list; list = list->next) { + if (bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].defvec8)) { + if (!pc8) { + pc8 = Defs[list->id].pcode; + if ( + pc8->op == PC_LI && + pc8->args[1].kind == PCOp_IMMEDIATE + ) { + loop->lower = pc8->args[1].data.imm.value; + loop->lowerType = LOOP_BOUND_CONSTANT; + } else if ( + pc8->op == PC_LIS && + pc8->args[1].kind == PCOp_IMMEDIATE + ) { + loop->lower = pc8->args[1].data.imm.value << 16; + loop->lowerType = LOOP_BOUND_CONSTANT; + } else if ( + pc8->op == PC_ADDI && + pc8->args[2].kind == PCOp_IMMEDIATE && + pc8->args[1].data.reg.reg == reg11b && + pc8->prevPCode && + pc8->prevPCode->op == PC_LIS && + pc8->prevPCode->args[1].kind == PCOp_IMMEDIATE && + pc8->prevPCode->args[0].data.reg.reg == reg11b + ) { + loop->lower = pc8->args[2].data.imm.value + + (pc8->prevPCode->args[1].data.imm.value << 16); + loop->lowerType = LOOP_BOUND_CONSTANT; + } else { + loop->lowerType = LOOP_BOUND_VARIABLE; + break; + } + } else { + loop->lowerType = LOOP_BOUND_INDETERMINATE; + break; + } + } + } + + if (loop->lowerType == LOOP_BOUND_INDETERMINATE) + loop->lowerType = LOOP_BOUND_VARIABLE; + + if (loop->lowerType == LOOP_BOUND_CONSTANT && loop->upperType == LOOP_BOUND_CONSTANT) { + if (op12 == PC_CMP || op12 == PC_CMPI) { + if (!checklooplimits(lastpcode->op, reg4, loop->lower, loop->upper, loop->step, &loop->iterationCount)) + return; + } else { + if (!checkunsignedlooplimits(lastpcode->op, reg4, loop->lower, loop->upper, loop->step, (UInt32 *) &loop->iterationCount)) + return; + } + loop->isKnownCountingLoop = 1; + } else if (loop->lowerType != LOOP_BOUND_INDETERMINATE || loop->upperType != LOOP_BOUND_INDETERMINATE) { + if (!checkunknownloop(lastpcode->op, reg4, loop->step, &loop->unknownCondition)) + return; + loop->isUnknownCountingLoop = 1; + } +} + +void analyzeForCountableLoops(Loop *loop) { + if (!loop) + return; + + while (loop) { + if (loop->children) + analyzeForCountableLoops(loop->children); + checkcountingloop(loop); + loop = loop->nextSibling; + } +} + +void analyzeloop(Loop *loop) { + BlockList *list; + PCodeBlock *block; + PCode *pcode; + + loop->bodySize = 0; + loop->x4D = 0; + loop->x4E = 0; + loop->x4F = 1; + loop->isKnownCountingLoop = 0; + loop->isUnknownCountingLoop = 0; + loop->lowerType = LOOP_BOUND_INDETERMINATE; + loop->upperType = LOOP_BOUND_INDETERMINATE; + loop->iterationCount = -1; + loop->x57 = 0; + loop->x52 = 0; + + for (list = loop->blocks; list; list = list->next) { + block = list->block; + if (!loop->children) + block->flags |= fPCBlockFlag2000; + loop->bodySize += block->pcodeCount; + + if (block != loop->body) { + if (!block->successors || !block->predecessors || block->successors->nextLink || block->predecessors->nextLink) + loop->x4F = 0; + } + + if ((block->flags & fPCBlockFlag4000) == fPCBlockFlag4000) + loop->x52 = 1; + + for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) { + if (PCODE_FLAG_SET_T(pcode) & fLink) + loop->x4D = 1; + + if (pcode->op == PC_BCTRL || pcode->op == PC_BCTR || pcode->op == PC_BCCTR || pcode->op == PC_MTCTR || pcode->op == PC_MFCTR) { + loop->x4E = 1; + } else if (pcode->flags & fIsRead) { + if (pcode->op == PC_LBZX || pcode->op == PC_LHZX || pcode->op == PC_LHAX || pcode->op == PC_LWZX || pcode->op == PC_LFSX || pcode->op == PC_LFDX) + loop->x53 = 1; + } else if (pcode->flags & fIsWrite) { + if (pcode->op == PC_STBX || pcode->op == PC_STHX || pcode->op == PC_STWX || pcode->op == PC_STFSX || pcode->op == PC_STFDX) + loop->x54 = 1; + } else { + if (pcode->op == PC_EIEIO || pcode->op == PC_SYNC || pcode->op == PC_ISYNC) + loop->x57 = 1; + } + } + } + + if (!loop->children && !loop->x4D && loop->bodySize < 32) { + for (list = loop->blocks; list; list = list->next) + list->block->flags |= fPCBlockFlag2000; + } +} + +static void analyzeloops(Loop *loop) { + while (loop) { + if (loop->children) + analyzeloops(loop->children); + analyzeloop(loop); + loop = loop->nextSibling; + } +} + +void analyzeloopsinflowgraph(void) { + if (loopsinflowgraph) + analyzeloops(loopsinflowgraph); +} diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c new file mode 100644 index 0000000..b2aef1e --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c @@ -0,0 +1,1553 @@ +#include "compiler/LoopOptimization.h" +#include "compiler/CFunc.h" +#include "compiler/CParser.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/LoopDetection.h" +#include "compiler/PCode.h" +#include "compiler/Registers.h" +#include "compiler/UseDefChains.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +int optimizedloops; +int optimizedloop_full_unroll; +int optimizedloop_trans_regs; +static UInt32 *liveonexit; +static UInt32 *inductionvariables; +static int last_virtual_GPR; + +static int ispowerof2(SInt32 value) { + int bit = getbit(value); + return (bit > 0 && bit < 31) ? bit : 0; +} + +static void insertupdateinstructions(Loop *loop) { + // nothing +} + +static void computeliveonexit(Loop *loop) { + UInt32 *usevec; + UInt32 *defvec; + BlockList *blocklist; + RegUseOrDef *list; + int gpr; + + bitvectorinitialize(usevec = oalloc(4 * ((number_of_Uses + 31) >> 5)), number_of_Uses, 0); + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + if (bitvectorgetbit(blocklist->block->blockIndex, loop->vec24)) + bitvectorunion(usevec, usedefinfo[blocklist->block->blockIndex].usevec1C, number_of_Uses); + } + + bitvectorinitialize(defvec = oalloc(4 * ((number_of_Defs + 31) >> 5)), number_of_Defs, 0); + if (loop->preheader) + bitvectorunion(defvec, usedefinfo[loop->preheader->blockIndex].defvec8, number_of_Defs); + + bitvectorinitialize(liveonexit, last_virtual_GPR, 0); + + for (gpr = 32; gpr < last_virtual_GPR; gpr++) { + for (list = reg_Defs[RegClass_GPR][gpr]; list; list = list->next) { + if (bitvectorgetbit(list->id, defvec)) { + if (!Defs[list->id].pcode->block || bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) { + bitvectorsetbit(gpr, liveonexit); + break; + } + } + } + + for (list = reg_Uses[RegClass_GPR][gpr]; list; list = list->next) { + if (bitvectorgetbit(list->id, usevec)) { + if (!Uses[list->id].pcode->block || !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) { + bitvectorsetbit(gpr, liveonexit); + break; + } + } + } + } +} + +static void eliminateinductionvariables(Loop *loop) { + BlockList *blocklist; + PCode *instr; + PCode *nextInstr; + PCodeArg *op; + int i; + + bitvectorinitialize(inductionvariables, last_virtual_GPR, 0xFFFFFFFF); + + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + for (instr = blocklist->block->firstPCode; instr; instr = instr->nextPCode) { + if ( + instr->op != PC_ADDI || + instr->args[0].data.reg.reg < 32 || + instr->args[0].data.reg.reg >= last_virtual_GPR || + instr->args[1].data.reg.reg != instr->args[0].data.reg.reg + ) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg >= n_real_registers[RegClass_GPR] && + op->data.reg.reg < last_virtual_GPR + ) + bitvectorclearbit(op->data.reg.reg, inductionvariables); + op++; + } + } + } + } + + if (loop->parent) { + for (instr = loop->preheader->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg >= n_real_registers[RegClass_GPR] && + op->data.reg.reg < last_virtual_GPR + ) + bitvectorsetbit(op->data.reg.reg, liveonexit); + op++; + } + } + } + + for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) { + for (instr = blocklist->block->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + if ( + instr->op == PC_ADDI && + instr->args[0].data.reg.reg >= 32 && + instr->args[0].data.reg.reg < last_virtual_GPR && + instr->args[1].data.reg.reg == instr->args[0].data.reg.reg && + bitvectorgetbit(instr->args[0].data.reg.reg, inductionvariables) && + !bitvectorgetbit(instr->args[0].data.reg.reg, liveonexit) + ) { + deletepcode(instr); + optimizedloops = 1; + } + } + } +} + +static void skiplooptest(Loop *loop) { + PCodeBlock *preheader; + PCodeLabel *label; + PCLink **ptr; + PCLink *link; + PCode *lastInstr; + PCode *instr; + + preheader = loop->preheader; + lastInstr = loop->body->lastPCode; + CError_ASSERT(340, lastInstr->args[2].kind == PCOp_LABEL); + + label = lastInstr->args[2].data.label.label; + preheader->lastPCode->args[0].data.label.label = label; + preheader->successors->block = label->block; + + ptr = &loop->body->predecessors; + while ((link = *ptr)) { + if (link->block == preheader) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + link->nextLink = label->block->predecessors; + label->block->predecessors = link; + + while (1) { + instr = loop->body->firstPCode; + CError_ASSERT(369, instr); + + if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) + break; + + deletepcode(instr); + insertpcodebefore(loop->preheader->lastPCode, instr); + loop->bodySize--; + } + + for (instr = instr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode) + insertpcodebefore(loop->preheader->lastPCode, copypcode(instr)); +} + +static void unrollloop(Loop *loop) { + PCodeBlock *newBlock; + int i; + int factor; + PCode *instr; + PCodeBlock *block; + PCode *firstInstr; + PCode *nextInstr; + + for (factor = copts.unroll_factor_limit; factor > 1; factor--) { + if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit) + break; + } + + if (factor == 1) + return; + if ((loop->iterationCount / factor) != 1 && loop->bodySize < 4) + return; + + newBlock = oalloc(sizeof(PCodeBlock)); + newBlock->firstPCode = newBlock->lastPCode = NULL; + for (i = 0; i < factor - 1; i++) { + firstInstr = loop->body->firstPCode; + CError_ASSERT(448, firstInstr); + if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI) + CError_FATAL(450); + + for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode) + appendpcode(newBlock, copypcode(instr)); + + for (block = loop->preheader->successors->block; block != loop->body; block = block->successors->block) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op != PC_B) + appendpcode(newBlock, copypcode(instr)); + } + } + } + + block = loop->body->predecessors->block; + for (instr = newBlock->firstPCode; instr; instr = nextInstr) { + nextInstr = instr->nextPCode; + appendpcode(block, instr); + } + loop->iterationCount /= factor; +} + +void pccomputepredecessors1(PCodeBlock *block) { + PCLink *succ; + PCLink *pred; + + for (succ = block->successors; succ; succ = succ->nextLink) { + if (!succ->block) { + CError_FATAL(496); + } else { + for (pred = succ->block->predecessors; pred; pred = pred->nextLink) { + if (pred->block == block) + break; + } + + if (!pred) { + pred = lalloc(sizeof(PCLink)); + pred->block = block; + pred->nextLink = succ->block->predecessors; + succ->block->predecessors = pred; + } + } + } +} + +static PCodeBlock *insertnewpcblock(PCodeBlock *block, int loopWeight) { + PCodeBlock *newBlock; + PCodeBlock *nextBlock; + PCodeLabel *label; + PCLink *prev; + PCLink *link; + + label = makepclabel(); + newBlock = lalloc(sizeof(PCodeBlock)); + + nextBlock = block->nextBlock; + newBlock->nextBlock = nextBlock; + block->nextBlock = newBlock; + + newBlock->prevBlock = block; + nextBlock->prevBlock = newBlock; + + newBlock->labels = NULL; + newBlock->predecessors = newBlock->successors = NULL; + newBlock->firstPCode = newBlock->lastPCode = NULL; + newBlock->pcodeCount = 0; + newBlock->loopWeight = loopWeight; + newBlock->flags = 0; + newBlock->blockIndex = pcblockcount++; + pclabel(newBlock, label); + + prev = NULL; + for (link = block->successors; link; link = link->nextLink) { + if (link->block == nextBlock) { + if (!prev) + block->successors = link->nextLink; + else + prev->nextLink = link->nextLink; + link->nextLink = NULL; + newBlock->successors = link; + break; + } + prev = link; + } + + prev = NULL; + for (link = nextBlock->predecessors; link; link = link->nextLink) { + if (link->block == block) { + if (!prev) + nextBlock->predecessors = link->nextLink; + else + prev->nextLink = link->nextLink; + link->nextLink = NULL; + newBlock->predecessors = link; + break; + } + prev = link; + } + + link = lalloc(sizeof(PCLink)); + link->block = newBlock; + link->nextLink = block->successors; + block->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = newBlock; + link->nextLink = nextBlock->predecessors; + nextBlock->predecessors = link; + + return newBlock; +} + +static void unrollloopconditional(Loop *loop) { + PCodeBlock *lastBlock; + PCodeLabel *label24; + PCode *instr; + PCode *instrCopy; + int outputBlockCount; + int inputBlockCount; + PCodeBlock **blocks; + PCodeBlock **blocks2; + PCodeBlock **blocks3; + PCode *firstInstr; + int factor; + PCodeBlock *block; + PCLink *link; + PCLink *prev; + int i; + int j; + int k; + int instructionCount; + + label24 = NULL; + outputBlockCount = 0; + instructionCount = 0; + + for (factor = copts.unroll_factor_limit; factor > 1; factor--) { + if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit) + break; + } + + if (factor == 1) + return; + + inputBlockCount = 0; + for (block = loop->preheader->successors->block; block != loop->body; block = block->nextBlock) { + inputBlockCount++; + if (!bitvectorgetbit(block->blockIndex, loop->memberblocks)) + instructionCount += block->pcodeCount; + } + + if ((loop->bodySize - instructionCount - 2) < instructionCount || instructionCount > 8) + return; + + blocks = oalloc(inputBlockCount * sizeof(PCodeBlock *)); + blocks2 = oalloc(inputBlockCount * sizeof(PCodeBlock *)); + blocks3 = oalloc(factor * (inputBlockCount * sizeof(PCodeBlock *))); + memclrw(blocks, inputBlockCount * sizeof(PCodeBlock *)); + memclrw(blocks2, inputBlockCount * sizeof(PCodeBlock *)); + memclrw(blocks3, factor * (inputBlockCount * sizeof(PCodeBlock *))); + + block = loop->preheader->nextBlock; + for (i = 0; i < inputBlockCount; i++) { + blocks[i] = block; + block = block->nextBlock; + } + + lastBlock = blocks[inputBlockCount - 1]; + for (i = 0; i < factor - 1; i++) { + for (j = 0; j < inputBlockCount; j++) { + blocks2[j] = insertnewpcblock(lastBlock, loop->loopWeight); + blocks3[outputBlockCount++] = blocks2[j]; + lastBlock = blocks2[j]; + } + if (label24) { + pclabel(blocks2[0], label24); + label24 = NULL; + } + + for (j = 0; j < inputBlockCount; j++) { + for (instr = blocks[j]->firstPCode; instr; instr = instr->nextPCode) { + if (instr->flags & fIsBranch) { + PCodeArg *op; + int opID; + instrCopy = copypcode(instr); + op = NULL; + for (opID = 0; opID < instr->argCount; opID++) { + if (instr->args[opID].kind == PCOp_LABEL) { + op = &instr->args[opID]; + break; + } + } + + if (op) { + if (op->data.label.label->block == loop->body) { + if (!label24) + label24 = makepclabel(); + instrCopy->args[opID].data.label.label = label24; + } else { + for (k = 0; k < inputBlockCount; k++) { + if (op->data.label.label->block == blocks[k]) { + instrCopy->args[opID].data.label.label = blocks2[k]->labels; + break; + } + } + } + } + + appendpcode(blocks2[j], instrCopy); + if (op) + pcbranch(blocks2[j], instrCopy->args[opID].data.label.label); + } else { + appendpcode(blocks2[j], copypcode(instr)); + } + } + } + + firstInstr = loop->body->firstPCode; + CError_ASSERT(762, firstInstr != NULL); + if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI) + CError_FATAL(764); + + for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode) + appendpcode(blocks2[inputBlockCount - 1], copypcode(instr)); + + for (j = 0; j < inputBlockCount; j++) { + for (link = blocks[j]->successors; link; link = link->nextLink) { + if (link->block == blocks[j]->nextBlock) + break; + } + + if (!link) { + for (link = blocks2[j]->successors, prev = NULL; link; link = link->nextLink) { + if (link->block == blocks2[j]->nextBlock) { + if (prev) + prev->nextLink = link->nextLink; + else + blocks2[j]->successors = link->nextLink; + } else { + prev = link; + } + } + + for (link = blocks2[j]->nextBlock->predecessors, prev = NULL; link; link = link->nextLink) { + if (link->block == blocks2[j]) { + if (prev) + prev->nextLink = link->nextLink; + else + blocks2[j]->nextBlock->predecessors = link->nextLink; + } else { + prev = link; + } + } + } + } + } + + if (label24) + pclabel(loop->body, label24); + + for (i = 0; i < inputBlockCount; i++) { + for (instr = blocks[i]->firstPCode; instr; instr = instr->nextPCode) { + if (instr->flags & fIsBranch) { + PCodeArg *op; + int opID; + op = NULL; + for (opID = 0; opID < instr->argCount; opID++) { + if (instr->args[opID].kind == PCOp_LABEL) { + op = &instr->args[opID]; + break; + } + } + + if (op && op->data.label.label->block == loop->body) { + instr->args[opID].data.label.label = blocks3[0]->labels; + + for (link = blocks[i]->successors, prev = NULL; link; link = link->nextLink) { + if (link->block == loop->body) { + if (prev) + prev->nextLink = link->nextLink; + else + blocks[i]->successors = link->nextLink; + } else { + prev = link; + } + } + + for (link = loop->body->predecessors, prev = NULL; link; link = link->nextLink) { + if (link->block == blocks[i]) { + if (prev) + prev->nextLink = link->nextLink; + else + loop->body->predecessors = link->nextLink; + } else { + prev = link; + } + } + + link = blocks[i]->successors; + while (link && link->block != blocks3[0]) + link = link->nextLink; + + if (!link) { + pcbranch(blocks[i], op->data.label.label); + pccomputepredecessors1(blocks[i]); + } + } + } + } + } + + for (i = 0; i < outputBlockCount; i++) + pccomputepredecessors1(blocks3[i]); + + loop->iterationCount /= factor; +} + +static void unrollunknownBDNZ(Loop *loop) { + int factor; // r29 + PCodeBlock *preheader; // r17 + PCodeBlock *blockA; // r23 + PCodeBlock *postheader; // r22 + PCodeBlock *newblock1; // r26 + PCodeBlock *newblock2; // r31 + PCodeBlock *newblock3; // r19 + PCodeBlock *newblock4; // r20 + PCodeBlock *newblock5; // r24 + PCode *mtctr; // r21 + int mtctr_reg; // r27 + PCode *instr28; // r28 + PCode *instr6; // r6 + PCodeBlock *block; + PCode *instr; + PCodeArg *op; + int i; + int val; + short mtctr_shifted_reg; // r16 + short reg25; // r25 + PCLink *link; + + if (loop->bodySize < 4) + return; + + factor = 128; + while (factor > copts.unroll_factor_limit) + factor >>= 1; + while (factor > 1 && (loop->bodySize - 2) * factor > copts.unroll_instr_limit) + factor >>= 1; + + if (factor < 2) + return; + + preheader = loop->preheader; + blockA = loop->body->nextBlock; + postheader = preheader->nextBlock; + + newblock1 = insertnewpcblock(preheader, loop->loopWeight); + newblock2 = insertnewpcblock(newblock1, loop->loopWeight); + newblock3 = insertnewpcblock(newblock2, loop->loopWeight); + newblock4 = insertnewpcblock(newblock3, loop->loopWeight); + newblock5 = insertnewpcblock(newblock4, loop->loopWeight); + addblocktoloop(loop, newblock1); + addblocktoloop(loop, newblock2); + addblocktoloop(loop, newblock3); + addblocktoloop(loop, newblock4); + addblocktoloop(loop, newblock5); + + for (instr = preheader->lastPCode; instr; instr = instr->prevPCode) { + if (instr->op == PC_MTCTR) { + mtctr = instr; + mtctr_reg = instr->args[0].data.reg.reg; + } + } + + if (!mtctr) + return; + + instr28 = NULL; + for (block = postheader; block != loop->body; block = block->successors->block) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op != PC_B) { + appendpcode(newblock2, copypcode(instr)); + if (instr == loop->pc18) + instr28 = newblock2->lastPCode; + } + } + } + + if (!instr28) { + appendpcode(newblock2, copypcode(loop->pc18)); + instr28 = newblock2->lastPCode; + } + + instr6 = NULL; + for (instr = newblock2->firstPCode; instr; instr = instr->nextPCode) { + if (instr != instr28) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == loop->pc18->args[0].data.reg.reg && + (op->data.reg.effect & (EffectRead | EffectWrite)) + ) { + instr6 = instr; + break; + } + op++; + } + } + if (instr6) + break; + } + + if (!instr6) { + deletepcode(instr28); + deletepcode(loop->pc18); + if (loop->footer) + blockA = loop->footer; + else + blockA = insertnewpcblock(loop->body, loop->loopWeight); + } else { + instr28 = NULL; + } + + for (i = 1; i < factor; i++) { + for (block = postheader; block != loop->body; block = block->successors->block) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op != PC_B) + appendpcode(newblock2, copypcode(instr)); + } + } + } + + mtctr_shifted_reg = used_virtual_registers[RegClass_GPR]++; + appendpcode(newblock1, makepcode( + PC_RLWINM, + mtctr_shifted_reg, + mtctr_reg, + 32 - ispowerof2(factor), + ispowerof2(factor), + 31)); + + appendpcode(newblock1, makepcode(PC_CMPLI, 0, mtctr_shifted_reg, 0)); + + if (instr28) { + reg25 = used_virtual_registers[RegClass_GPR]++; + if (loop->step == 1) { + instr = makepcode(PC_MR, reg25, mtctr_reg); + } else if (loop->step == -1) { + instr = makepcode(PC_NEG, reg25, mtctr_reg); + } else { + val = ispowerof2(abs(loop->step)); + if (val > 0) { + instr = makepcode(PC_RLWINM, reg25, mtctr_reg, val, 0, 31 - val); + if (loop->step < 0) { + appendpcode(newblock1, instr); + instr = makepcode(PC_NEG, reg25, reg25); + } + } else { + instr = makepcode(PC_MULLI, reg25, mtctr_reg, loop->step); + } + } + appendpcode(newblock1, instr); + } + + appendpcode(newblock1, makepcode(PC_MTCTR, mtctr_shifted_reg)); + appendpcode(newblock1, makepcode(PC_BT, 0, 2, newblock5->labels)); + pcbranch(newblock1, newblock5->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock1; + link->nextLink = newblock5->predecessors; + newblock5->predecessors = link; + + appendpcode(newblock3, makepcode(PC_BDNZ, newblock2->labels)); + pcbranch(newblock3, newblock2->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock3; + link->nextLink = newblock2->predecessors; + newblock2->predecessors = link; + + appendpcode(newblock4, makepcode(PC_ANDI, mtctr_reg, mtctr_reg, factor - 1)); + appendpcode(newblock4, makepcode(PC_BT, 0, 2, blockA->labels)); + pcbranch(newblock4, blockA->labels); + + link = lalloc(sizeof(PCLink)); + link->block = newblock4; + link->nextLink = blockA->predecessors; + blockA->predecessors = link; + + deletepcode(mtctr); + appendpcode(newblock5, mtctr); + + if (instr28 && bitvectorgetbit(instr28->args[0].data.reg.reg, liveonexit)) { + instr = makepcode(PC_ADD, instr28->args[0].data.reg.reg, instr28->args[0].data.reg.reg, reg25); + if (blockA->firstPCode) + insertpcodebefore(blockA->firstPCode, instr); + else + appendpcode(blockA, instr); + } +} + +static void deleteloop(Loop *loop) { + PCodeBlock *body; + PCodeBlock *preheader; + PCodeBlock *nextblock; + PCodeLabel *label; + PCode *instr; + PCLink **ptr; + PCLink *link; + PCodeBlock *block; + + body = loop->body; + preheader = loop->preheader; + nextblock = body->nextBlock; + label = makepclabel(); + + while (1) { + instr = body->firstPCode; + CError_ASSERT(1294, instr != NULL); + + if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) + break; + + deletepcode(instr); + insertpcodebefore(loop->preheader->lastPCode, instr); + loop->bodySize--; + } + + pclabel(nextblock, label); + preheader->lastPCode->args[0].data.label.label = label; + preheader->successors->block = nextblock; + + ptr = &nextblock->predecessors; + while ((link = *ptr)) { + if (link->block == body) { + link->block = preheader; + break; + } + ptr = &link->nextLink; + } + + block = pcbasicblocks; + while ((nextblock = block->nextBlock)) { + if (bitvectorgetbit(nextblock->blockIndex, loop->memberblocks)) + block->nextBlock = nextblock->nextBlock; + else + block = nextblock; + } +} + +static void deleteloopheader(Loop *loop) { + PCLink *link; + PCLink **ptr; + + ptr = &loop->body->successors; + while ((link = *ptr)) { + if (link->block == loop->body->lastPCode->args[2].data.label.label->block) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + ptr = &loop->body->lastPCode->args[2].data.label.label->block->predecessors; + while ((link = *ptr)) { + if (link->block == loop->body) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + deletepcode(loop->body->firstPCode); + deletepcode(loop->body->lastPCode); + optimizedloop_full_unroll = 1; +} + +static void rewriteloopwithBDNZ(Loop *loop) { + PCode *instr; + + if (!FITS_IN_SHORT(loop->iterationCount)) { + instr = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(loop->iterationCount)); + insertpcodebefore(loop->preheader->lastPCode, instr); + + if (loop->iterationCount != 0) + insertpcodeafter(instr, makepcode(PC_ADDI, instr->args[0].data.reg.reg, instr->args[0].data.reg.reg, 0, LOW_PART(loop->iterationCount))); + } else { + instr = makepcode(PC_LI, used_virtual_registers[RegClass_GPR]++, loop->iterationCount); + insertpcodebefore(loop->preheader->lastPCode, instr); + } + + insertpcodebefore(loop->preheader->lastPCode, makepcode(PC_MTCTR, instr->args[0].data.reg.reg)); + + instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label); + deletepcode(loop->body->firstPCode); + deletepcode(loop->body->lastPCode); + appendpcode(loop->body, instr); + + for (loop = loop->parent; loop; loop = loop->parent) + loop->x4E = 1; +} + +static void rewriteunknownloopwithBDNZ(Loop *loop) { + SInt32 value1; // r24 + SInt32 value2; // r30 + Opcode branchOpcode; // r19 + SInt32 absStep; // r28 + int branchCondition; // r20 + int counterReg; // r27 + int reg1; // r26 + int reg2; // r22 + unsigned char mode; // r23 + PCodeBlock *afterLoop; // r25 + PCode *addiInstr; + PCode *instr; + PCLink *link; + PCLink **ptr; + + absStep = abs(loop->step); + afterLoop = NULL; + + for (addiInstr = loop->body->lastPCode->prevPCode; addiInstr->op == PC_ADDI; addiInstr = loop->body->lastPCode->prevPCode) { + deletepcode(addiInstr); + if (loop->body->lastPCode->args[2].data.label.label->block->firstPCode) + insertpcodebefore(loop->body->lastPCode->args[2].data.label.label->block->firstPCode, addiInstr); + else + appendpcode(loop->body->lastPCode->args[2].data.label.label->block, addiInstr); + + afterLoop = insertnewpcblock(loop->body, loop->loopWeight); + appendpcode(afterLoop, copypcode(addiInstr)); + loop->footer = afterLoop; + } + + if (loop->unknownCondition == ELESS) { + branchOpcode = PC_BF; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; // GT + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 - loop->lower; + mode = 0; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; // LT + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 + loop->upper; + mode = 1; + } else { + branchCondition = 0; // LT + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 2; + } + } else if (loop->unknownCondition == ELESSEQU) { + branchOpcode = PC_BT; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; // LT + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - loop->lower; + mode = 0; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; // GT + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep + loop->upper; + mode = 1; + } else { + branchCondition = 1; // GT + value1 = 0; + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep; + mode = 2; + } + } else if (loop->unknownCondition == EGREATER) { + branchOpcode = PC_BF; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; // LT + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 + loop->lower; + mode = 1; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; // GT + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 - loop->upper; + mode = 0; + } else { + branchCondition = 1; // GT + value1 = 0; + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 3; + } + } else if (loop->unknownCondition == EGREATEREQU) { + branchOpcode = PC_BT; + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + branchCondition = 1; // GT + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep + loop->lower; + mode = 1; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + branchCondition = 0; // LT + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - loop->upper; + mode = 0; + } else { + branchCondition = 0; // LT + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep; + mode = 3; + } + } else if (loop->unknownCondition == ENOTEQU) { + branchOpcode = PC_BT; + branchCondition = 2; // EQ + if (loop->step > 0) { + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 - loop->lower; + mode = 0; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 + loop->upper; + mode = 1; + } else { + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 2; + } + } else { + if (loop->lowerType == LOOP_BOUND_CONSTANT) { + value1 = loop->lower; + reg1 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1 + loop->lower; + mode = 1; + } else if (loop->upperType == LOOP_BOUND_CONSTANT) { + value1 = loop->upper; + reg1 = addiInstr->args[1].data.reg.reg; + value2 = absStep - 1 - loop->upper; + mode = 0; + } else { + reg1 = addiInstr->args[1].data.reg.reg; + reg2 = addiInstr->args[2].data.reg.reg; + value2 = absStep - 1; + mode = 3; + } + } + } + + while (1) { + instr = loop->body->firstPCode; + CError_ASSERT(1857, instr); + + if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL) + break; + + deletepcode(instr); + insertpcodebefore(loop->preheader->lastPCode, instr); + loop->bodySize--; + } + + // build a value for mtctr + counterReg = used_virtual_registers[RegClass_GPR]++; + + if (mode == 1) { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_NEG, counterReg, reg1)); + } else if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBFIC, counterReg, reg1, value2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_LIS, counterReg, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, counterReg)); + } + } else if (mode == 0) { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_MR, counterReg, reg1)); + } else if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, reg1, 0, value2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + } + } else if (mode == 2) { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, reg2)); + } else if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, reg2, 0, value2)); + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, counterReg)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDIS, counterReg, reg2, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg1, counterReg)); + } + } else { + if (value2 == 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg2, reg1)); + } else { + if (FITS_IN_SHORT(value2)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, reg1, 0, value2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2))); + if (value2 != 0) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2))); + } + } + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_SUBF, counterReg, reg2, counterReg)); + } + } + + if (absStep > 1) { + unsigned char bits; + if ((bits = ispowerof2(absStep))) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_RLWINM, counterReg, counterReg, 32 - bits, bits, 31)); + } else { + int reg = used_virtual_registers[RegClass_GPR]++; + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_LI, reg, absStep)); + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_DIVWU, counterReg, counterReg, reg)); + } + } + + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_MTCTR, counterReg)); + + if (mode < 2) { + if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) { + if (FITS_IN_USHORT(value1)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPLI, 0, reg1, value1)); + } else { + PCode *tmp; + tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1)); + insertpcodebefore(loop->preheader->lastPCode, tmp); + + if (loop->iterationCount != 0) + insertpcodeafter(tmp, + makepcode(PC_ADDI, + tmp->args[0].data.reg.reg, + tmp->args[0].data.reg.reg, + 0, LOW_PART(value1))); + + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPL, 0, reg1, tmp->args[0].data.reg.reg)); + } + } else { + if (FITS_IN_SHORT(value1)) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPI, 0, reg1, value1)); + } else { + PCode *tmp; + tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1)); + insertpcodebefore(loop->preheader->lastPCode, tmp); + + if (loop->iterationCount != 0) + insertpcodeafter(tmp, + makepcode(PC_ADDI, + tmp->args[0].data.reg.reg, + tmp->args[0].data.reg.reg, + 0, LOW_PART(value1))); + + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMP, 0, reg1, tmp->args[0].data.reg.reg)); + } + } + } else { + if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMPL, 0, reg1, reg2)); + } else { + insertpcodebefore(loop->preheader->lastPCode, + makepcode(PC_CMP, 0, reg1, reg2)); + } + } + + if (!afterLoop) + afterLoop = loop->body->nextBlock; + + instr = makepcode(branchOpcode, 0, branchCondition, afterLoop->labels); + deletepcode(loop->preheader->lastPCode); + appendpcode(loop->preheader, instr); + + instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label); + deletepcode(loop->body->firstPCode); + deletepcode(loop->body->lastPCode); + appendpcode(loop->body, instr); + + loop->preheader->successors = NULL; + ptr = &loop->body->predecessors; + while ((link = *ptr)) { + if (link->block == loop->preheader) { + *ptr = link->nextLink; + break; + } + ptr = &link->nextLink; + } + + link = lalloc(sizeof(PCLink)); + link->block = loop->preheader->nextBlock; + link->nextLink = loop->preheader->successors; + loop->preheader->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = loop->preheader; + link->nextLink = loop->preheader->nextBlock->predecessors; + loop->preheader->nextBlock->predecessors = link; + + link = lalloc(sizeof(PCLink)); + link->block = afterLoop; + link->nextLink = loop->preheader->successors; + loop->preheader->successors = link; + + link = lalloc(sizeof(PCLink)); + link->block = loop->preheader; + link->nextLink = afterLoop->predecessors; + afterLoop->predecessors = link; + + for (loop = loop->parent; loop; loop = loop->parent) + loop->x4E = 1; +} + +static void optimizeloop(Loop *loop) { + computeliveonexit(loop); + + if (loop->x53 || loop->x54) + insertupdateinstructions(loop); + + if (loop->isKnownCountingLoop) { + if (loop->iterationCount > 0) { + skiplooptest(loop); + if (!copts.optimizesize && !loop->x4D && !loop->x57) { + if (loop->x4F) + unrollloop(loop); + else if (!loop->x4E) + unrollloopconditional(loop); + } + } + + if (loop->iterationCount != 0) { + if (loop->iterationCount == 1) + deleteloopheader(loop); + else if (!loop->x4E && !loop->x4D) + rewriteloopwithBDNZ(loop); + } + + optimizedloops = 1; + } else if (loop->isUnknownCountingLoop && !loop->x4E && !loop->x4D) { + rewriteunknownloopwithBDNZ(loop); + if (copts.unroll_speculative && !copts.optimizesize) { + if (loop->x4F && !loop->x57 && !loop->x52) + unrollunknownBDNZ(loop); + } + optimizedloops = 1; + } + + eliminateinductionvariables(loop); +} + +typedef struct LocalArray { + struct LocalArray *next; + Object *object; + unsigned int invalid:1; + unsigned int isFloat:1; + unsigned int isSigned:1; + SInt32 elementSize; + SInt32 arraySize; + SInt32 elementCount; + int totalUses; + int elements[1]; +} LocalArray; + +static LocalArray *scanforlocalarrays(void) { + SInt32 elementCount; + LocalArray *head; + LocalArray *array; + ObjectList *list; + int i; + SInt32 arraySize; + SInt32 elementSize; + + head = NULL; + + for (list = locals; list; list = list->next) { + if ( + list->object && + !(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) && + list->object->type && + IS_TYPE_ARRAY(list->object->type) && + TPTR_TARGET(list->object->type) && + TPTR_TARGET(list->object->type)->type < TYPESTRUCT + ) { + arraySize = list->object->type->size; + elementSize = TPTR_TARGET(list->object->type)->size; + elementCount = arraySize / elementSize; + if (elementCount > 0 && elementCount <= 8) { + array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalArray)); + array->next = head; + head = array; + + array->object = list->object; + array->elementSize = elementSize; + array->arraySize = arraySize; + array->elementCount = elementCount; + array->totalUses = 0; + array->isSigned = 1; + array->isFloat = IS_TYPE_FLOAT(TPTR_TARGET(list->object->type)); + array->invalid = 0; + array->isSigned = 1; + if (!array->isFloat && is_unsigned(TPTR_TARGET(list->object->type))) + array->isSigned = 0; + + for (i = 0; i < elementCount; i++) { + array->elements[i] = 0; + } + } + } + } + + return head; +} + +static LocalArray *lookup_array_object(LocalArray *arrays, Object *object) { + while (arrays) { + if (arrays->object == object) + return arrays; + arrays = arrays->next; + } + return NULL; +} + +void changearraytoregisters(void) { + LocalArray *arrays; + PCodeBlock *block; + PCode *instr; + int i; + PCodeArg *op; + LocalArray **ptr; + LocalArray *array; + int intCounter; + int floatCounter; + int reg; + int reg2; + PCode *newInstr; + + arrays = NULL; + if ((arrays = scanforlocalarrays())) { + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (!(instr->flags & fIsBranch) && instr->argCount) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_MEMORY && + (PCOpMemoryArg) op->arg == PCOpMemory1 && + (array = lookup_array_object(arrays, op->data.mem.obj)) && + !array->invalid + ) { + if ( + (instr->flags & (fIsRead | fIsWrite)) && + (op->data.mem.offset % array->elementSize) == 0 && + op->data.mem.offset < array->arraySize + ) { + switch (instr->op) { + case PC_LBZ: + case PC_STB: + if (array->elementSize != 1) + array->invalid = 1; + break; + case PC_LHZ: + case PC_LHA: + case PC_STH: + if (array->elementSize != 2) + array->invalid = 1; + break; + case PC_LWZ: + case PC_STW: + if (array->elementSize != 4) + array->invalid = 1; + break; + case PC_LFD: + case PC_STFD: + if (array->elementSize != 8) + array->invalid = 1; + break; + default: + array->invalid = 1; + break; + } + + if (!array->invalid) + array->elements[op->data.mem.offset / array->elementSize]++; + } else { + array->invalid = 1; + } + } + op++; + } + } + } + } + + intCounter = 0; + floatCounter = 0; + ptr = &arrays; + array = *ptr; + while (array) { + if (array->invalid) { + *ptr = array = array->next; + continue; + } + + if (array->isFloat) + floatCounter += array->elementCount; + if (!array->isFloat) + intCounter += array->elementCount; + + for (i = 0; i < array->elementCount; i++) + array->totalUses += array->elements[i]; + + array = array->next; + } + + if (arrays) { + while (intCounter > 8) { + LocalArray *best; + int score; + score = 0; + best = NULL; + for (array = arrays; array; array = array->next) { + if (!array->isFloat) { + if (best) { + if (array->totalUses < score) { + score = array->totalUses; + best = array; + } + } else { + best = array; + score = array->totalUses; + } + } + } + + if (!best) + break; + + if (best == arrays) { + arrays = best->next; + } else { + for (array = arrays; array; array = array->next) { + if (array->next == best) { + array->next = best->next; + break; + } + } + } + + intCounter -= best->elementCount; + } + + while (floatCounter > 8) { + LocalArray *best; + int score; + score = 0; + best = NULL; + for (array = arrays; array; array = array->next) { + if (array->isFloat) { + if (best) { + if (array->totalUses < score) { + score = array->totalUses; + best = array; + } + } else { + best = array; + score = array->totalUses; + } + } + } + + if (!best) + break; + + if (best == arrays) { + arrays = best->next; + } else { + for (array = arrays; array; array = array->next) { + if (array->next == best) { + array->next = best->next; + break; + } + } + } + + floatCounter -= best->elementCount; + } + + CError_ASSERT(2394, intCounter <= 8 && floatCounter <= 8); + + if (!arrays) + return; + + optimizedloop_trans_regs = 1; + + for (array = arrays; array; array = array->next) { + for (i = 0; i < array->elementCount; i++) { + if (array->isFloat) + array->elements[i] = used_virtual_registers[RegClass_FPR]++; + else + array->elements[i] = used_virtual_registers[RegClass_GPR]++; + } + } + + for (block = pcbasicblocks; block; block = block->nextBlock) { + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if ( + !(instr->flags & fIsBranch) && + instr->argCount && + (instr->flags & (fIsRead | fIsWrite)) && + instr->args[2].kind == PCOp_MEMORY && + (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 && + (array = lookup_array_object(arrays, instr->args[2].data.mem.obj)) && + !(array->invalid) + ) + { + reg2 = instr->args[0].data.reg.reg; + reg = array->elements[instr->args[2].data.mem.offset / array->elementSize]; + newInstr = NULL; + switch (instr->op) { + case PC_LBZ: + if (array->isSigned) + newInstr = makepcode(PC_MR, reg2, reg); + else + newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 24, 31); + break; + case PC_STB: + if (array->isSigned) + newInstr = makepcode(PC_EXTSB, reg, reg2); + else + newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 24, 31); + break; + case PC_LHZ: + newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 16, 31); + break; + case PC_LHA: + newInstr = makepcode(PC_EXTSH, reg2, reg); + break; + case PC_STH: + if (array->isSigned) + newInstr = makepcode(PC_EXTSH, reg, reg2); + else + newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 16, 31); + break; + case PC_LWZ: + newInstr = makepcode(PC_MR, reg2, reg); + break; + case PC_STW: + newInstr = makepcode(PC_MR, reg, reg2); + break; + case PC_LFD: + newInstr = makepcode(PC_FMR, reg2, reg); + break; + case PC_STFD: + newInstr = makepcode(PC_FMR, reg, reg2); + break; + default: + CError_FATAL(2494); + break; + } + + if (newInstr) { + insertpcodebefore(instr, newInstr); + deletepcode(instr); + instr = newInstr; + } + } + } + } + } + } + + freeoheap(); +} + +static void optimizenestedloops(Loop *loop) { + while (loop) { + if (loop->children) + optimizenestedloops(loop->children); + optimizeloop(loop); + loop = loop->nextSibling; + } +} + +void optimizeloops(void) { + optimizedloops = 0; + optimizedloop_full_unroll = 0; + optimizedloop_trans_regs = 0; + if (loopsinflowgraph) { + computeusedefchains(0); + last_virtual_GPR = used_virtual_registers[RegClass_GPR]; + liveonexit = oalloc(4 * ((used_virtual_registers[RegClass_GPR] + 31) >> 5)); + inductionvariables = oalloc(4 * ((last_virtual_GPR + 31) >> 5)); + optimizenestedloops(loopsinflowgraph); + freeoheap(); + } +} + diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c new file mode 100644 index 0000000..2b68dca --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/StrengthReduction.c @@ -0,0 +1,751 @@ +#include "compiler/StrengthReduction.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/LoopDetection.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/Registers.h" +#include "compiler/UseDefChains.h" + +int strengthreducedloops; + +static PCode *findinitializer(Loop *loop, short reg) { + UInt32 *vec; + PCode *best; + RegUseOrDef *list; + + vec = usedefinfo[loop->body->blockIndex].defvec8; + best = NULL; + + for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { + if ( + !(bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) && + bitvectorgetbit(list->id, vec) + ) + { + if (best) + return NULL; + best = Defs[list->id].pcode; + } + } + + if (best) { + if (best->op == PC_LI || best->op == PC_ADDI || best->op == PC_ADD) + return best; + } + + return NULL; +} + +static int isbasicinductionvariable(Loop *loop, short reg, SInt32 step) { + RegUseOrDef *list; + PCode *instr; + + for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { + instr = Defs[list->id].pcode; + if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) { + if (instr->op != PC_ADDI) + return 0; + if (instr->args[1].data.reg.reg != reg) + return 0; + if (instr->args[2].data.imm.value != step) + return 0; + } + } + + return 1; +} + +static void addbasicinductionvariable(Loop *loop, short reg, SInt32 step) { + BasicInductionVar *biv; + RegUseOrDef *list; + PCode *instr; + InstrList *instrList; + + for (biv = loop->basicInductionVars; biv; biv = biv->next) { + if (biv->reg == reg) + return; + } + + biv = oalloc(sizeof(BasicInductionVar)); + biv->next = loop->basicInductionVars; + loop->basicInductionVars = biv; + + biv->loop = loop; + biv->inductionVars = NULL; + biv->instrsC = NULL; + biv->step = step; + biv->reg = reg; + + for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) { + instr = Defs[list->id].pcode; + if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) { + instrList = oalloc(sizeof(InstrList)); + instrList->next = biv->instrsC; + biv->instrsC = instrList; + instrList->instr = instr; + } + } + + biv->initializer = findinitializer(loop, reg); +} + +static void findbasicinductionvariables(Loop *loop) { + SInt16 step; + BlockList *block; + PCode *instr; + short reg; + + for (block = loop->blocks; block; block = block->next) { + for (instr = block->block->firstPCode; instr; instr = instr->nextPCode) { + if (instr->op == PC_ADDI) { + if ( + (reg = instr->args[0].data.reg.reg) >= 32 && + instr->args[1].data.reg.reg == reg && + isbasicinductionvariable(loop, reg, step = instr->args[2].data.imm.value) + ) + addbasicinductionvariable(loop, reg, step); + } + } + } +} + +static void findallbasicinductionvariables(Loop *loop) { + while (loop) { + if (loop->children) + findallbasicinductionvariables(loop->children); + findbasicinductionvariables(loop); + loop = loop->nextSibling; + } +} + +static int isinductionvariable(BasicInductionVar *biv, int useID, SInt32 *result1, short *result2, short *result3, Loop **result4) { + RegUseOrDef *list; + int counter; + Loop *loop; + Loop *scanloop; + PCode *instr; + + instr = Uses[useID].pcode; + *result2 = 0; + *result3 = 0; + *result4 = NULL; + + switch (instr->op) { + case PC_MULLI: + *result1 = instr->args[2].data.imm.value; + break; + + case PC_RLWINM: + if (instr->args[3].data.imm.value) + return 0; + if (instr->args[2].data.imm.value > 15) + return 0; + if (instr->args[4].data.imm.value != (31 - instr->args[2].data.imm.value)) + return 0; + if (PCODE_FLAG_SET_F(instr) & fRecordBit) + return 0; + *result1 = 1 << instr->args[2].data.imm.value; + break; + + case PC_LBZX: + case PC_LHZX: + case PC_LHAX: + case PC_LWZX: + case PC_STBX: + case PC_STHX: + case PC_STWX: + case PC_LFSX: + case PC_LFDX: + case PC_STFSX: + case PC_STFDX: + *result2 = 0; + *result3 = 0; + if (instr->args[1].data.reg.reg == biv->reg) { + *result2 = 1; + *result3 = 2; + } else if (instr->args[2].data.reg.reg == biv->reg) { + *result2 = 2; + *result3 = 1; + } + + counter = 0; + for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks)) + counter++; + } + if (counter) + return 0; + + loop = biv->loop; + for (scanloop = loop->parent; scanloop; scanloop = scanloop->parent) { + counter = 0; + for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, scanloop->memberblocks)) + counter++; + } + if (!biv->initializer || bitvectorgetbit(biv->initializer->block->blockIndex, scanloop->memberblocks)) + counter++; + if (counter) + break; + loop = scanloop; + } + + *result4 = loop; + *result1 = 1; + return 1; + + default: + return 0; + } + + counter = 0; + for (list = reg_Defs[RegClass_GPR][instr->args[0].data.reg.reg]; list; list = list->next) { + if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks)) + counter++; + } + + return counter == 1; +} + +static void addinductionvariable(BasicInductionVar *biv, PCode *instr, SInt32 val1, short val2, short val3, Loop *val4) { + InductionVar *iv; + + iv = oalloc(sizeof(InductionVar)); + iv->next = biv->inductionVars; + biv->inductionVars = iv; + + iv->basicVar = biv; + iv->instr = instr; + iv->instrC = NULL; + iv->step = val1; + iv->x18 = val2; + iv->x1A = val3; + iv->someloop = val4; + if (instr->flags & (fIsRead | fIsWrite)) + iv->x1C = -1; + else + iv->x1C = instr->args[0].data.reg.reg; + iv->x1E = -1; +} + +static void findnonbasicinductionvariables(Loop *loop) { + BasicInductionVar *biv; + RegUseOrDef *list; + SInt32 result1; + short result2; + short result3; + Loop *result4; + + for (biv = loop->basicInductionVars; biv; biv = biv->next) { + for (list = reg_Uses[RegClass_GPR][biv->reg]; list; list = list->next) { + if (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) { + if (isinductionvariable(biv, list->id, &result1, &result2, &result3, &result4)) + addinductionvariable(biv, Uses[list->id].pcode, result1, result2, result3, result4); + } + } + } +} + +static void findallnonbasicinductionvariables(Loop *loop) { + while (loop) { + if (loop->children) + findallnonbasicinductionvariables(loop->children); + if (loop->basicInductionVars) + findnonbasicinductionvariables(loop); + loop = loop->nextSibling; + } +} + +static void initializeinductionvariable(InductionVar *iv) { + BasicInductionVar *biv; // r31 + PCode *instr; // r27 + PCodeBlock *preheader; // r30 + SInt32 value30; // r30 + short reg29; // r29 + short reg26; // r26 + + biv = iv->basicVar; + preheader = biv->loop->preheader; + + if (iv->x1A) { + reg29 = iv->instr->args[iv->x1A].data.reg.reg; + reg26 = iv->instr->args[iv->x18].data.reg.reg; + instr = NULL; + + if ( + biv->initializer && + biv->initializer->op == PC_LI && + biv->initializer->block == preheader + ) + { + if (biv->initializer->args[1].data.imm.value == 0) + instr = makepcode(PC_MR, iv->x1E, reg29); + else if (FITS_IN_SHORT(biv->initializer->args[1].data.imm.value)) + instr = makepcode(PC_ADDI, iv->x1E, reg29, 0, biv->initializer->args[1].data.imm.value); + } + + if (!instr) + instr = makepcode(PC_ADD, iv->x1E, reg29, reg26); + + if (biv->initializer && instr->op != PC_ADD) + insertpcodeafter(biv->initializer, instr); + else if (iv->someloop && iv->someloop->preheader->lastPCode) + insertpcodebefore(iv->someloop->preheader->lastPCode, instr); + else + insertpcodebefore(preheader->lastPCode, instr); + + iv->instrC = instr; + iv->x1C = reg29; + return; + } + + if (!biv->initializer || biv->initializer->op != PC_LI) { + instr = copypcode(iv->instr); + instr->args[0].data.reg.reg = iv->x1E; + insertpcodebefore(preheader->lastPCode, instr); + } else { + value30 = biv->initializer->args[1].data.imm.value * iv->step; + if (!FITS_IN_SHORT(value30)) { + instr = makepcode(PC_LIS, iv->x1E, 0, HIGH_PART(value30)); + insertpcodeafter(biv->initializer, instr); + if (value30 != 0) + insertpcodeafter(instr, makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value30))); + } else { + instr = makepcode(PC_LI, iv->x1E, value30); + insertpcodeafter(biv->initializer, instr); + } + } +} + +static void incrementinductionvariable(InductionVar *iv) { + SInt32 value; + BasicInductionVar *biv; + PCode *instr; + InstrList *list; + + biv = iv->basicVar; + value = iv->step * biv->step; + for (list = biv->instrsC; list; list = list->next) { + if (!FITS_IN_SHORT(value)) { + instr = makepcode(PC_ADDIS, iv->x1E, iv->x1E, 0, HIGH_PART(value)); + insertpcodeafter(list->instr, instr); + + if (value != 0) { + instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value)); + insertpcodeafter(list->instr->nextPCode, instr); + } + } else { + instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, value); + insertpcodeafter(list->instr, instr); + } + } +} + +static void copyinductionvariable(InductionVar *iv) { + if (iv->instr->flags & (fIsRead | fIsWrite)) { + iv->instr->op -= 2; + iv->instr->args[1].data.reg.reg = iv->x1E; + iv->instr->args[2].kind = PCOp_IMMEDIATE; + iv->instr->args[2].data.imm.value = 0; + iv->instr->args[2].data.imm.obj = NULL; + } else { + insertpcodeafter(iv->instr, makepcode(PC_MR, iv->x1C, iv->x1E)); + deletepcode(iv->instr); + } +} + +static int testnestediv(InductionVar *iv, SInt32 step1, int reg, SInt32 step2, Loop *loop1, Loop *loop2) { + SInt32 addend; + BlockList *list; + PCode *instr; + PCodeArg *op; + int i; + + if (iv->instrC && iv->x1C == reg) { + if (iv->instrC->op == PC_MR) + addend = 0; + else if (iv->instrC->op == PC_ADDI) + addend = iv->instrC->args[2].data.imm.value; + else + return 0; + + if (step2 == (addend + (step1 * iv->step * loop2->iterationCount))) { + for (list = loop1->blocks; list && list->block != loop2->blocks->block; list = list->next) { + for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == reg + ) + return 0; + op++; + } + } + } + return 1; + } + } + + return 0; +} + +static void strengthreducenestediv(short reg, SInt32 step, PCode *initializer, Loop *loop) { + Loop *scanloop; + BasicInductionVar *biv; + InductionVar *iv; + PCode *instr; + PCodeArg *op; + int i; + + for (scanloop = loop->children; scanloop; scanloop = scanloop->nextSibling) { + if ( + scanloop->isKnownCountingLoop && + scanloop->x4F && + bitvectorgetbit(scanloop->body->blockIndex, loop->vec2C) + ) + { + for (biv = scanloop->basicInductionVars; biv; biv = biv->next) { + for (iv = biv->inductionVars; iv; iv = iv->next) { + if (testnestediv(iv, biv->step, reg, step, loop, scanloop)) { + deletepcode(iv->instrC); + if (initializer) { + insertpcodeafter(initializer, iv->instrC); + } else if (loop->body->lastPCode) { + for (instr = loop->body->lastPCode; instr; instr = instr->prevPCode) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + (op->data.reg.effect & EffectWrite) && + op->data.reg.reg == reg + ) + break; + op++; + } + } + + if (instr) + insertpcodeafter(instr, iv->instrC); + else + insertpcodebefore(loop->body->firstPCode, iv->instrC); + } else { + appendpcode(loop->body, iv->instrC); + } + } + } + } + } + } +} + +static void strengthreducenestedbiv(BasicInductionVar *biv) { + Loop *loop; + InductionVar *iv; + + loop = biv->loop; + for (iv = biv->inductionVars; iv; iv = iv->next) + strengthreducenestediv(iv->x1E, iv->step * biv->step, iv->instrC, loop); + + strengthreducenestediv(biv->reg, biv->step, biv->initializer, loop); +} + +static void strengthreduceinductionvariable(BasicInductionVar *biv) { + int counter; + InductionVar *iv; + InductionVar *otherIv; + short reg; + + counter = 0; + for (iv = biv->inductionVars; iv; iv = iv->next) { + if (iv->step == 1) + counter++; + } + + for (iv = biv->inductionVars; iv; iv = iv->next) { + if ( + (counter <= 4 || iv->step != 1) && + iv->instr->block && + (iv->x1A == 0 || iv->instr->args[2].kind != PCOp_IMMEDIATE) + ) + { + if (iv->x1E == -1) { + iv->x1E = used_virtual_registers[RegClass_GPR]++; + initializeinductionvariable(iv); + incrementinductionvariable(iv); + if (iv->step == 1) { + reg = iv->instr->args[iv->x1A].data.reg.reg; + for (otherIv = iv->next; otherIv; otherIv = otherIv->next) { + if (otherIv->x1A != 0 && otherIv->instr->args[otherIv->x1A].data.reg.reg == reg) + otherIv->x1E = iv->x1E; + } + } else { + for (otherIv = iv->next; otherIv; otherIv = otherIv->next) { + if (otherIv->step == iv->step) + otherIv->x1E = iv->x1E; + } + } + } + + copyinductionvariable(iv); + strengthreducedloops = 1; + } + } +} + +#ifdef __MWERKS__ +#pragma options align=mac68k +#endif +typedef struct BivInit { + SInt32 x0; + short x4; + short x6; + short x8; + Object *xA; +} BivInit; +#ifdef __MWERKS__ +#pragma options align=reset +#endif + +static void calc_biv_init(BasicInductionVar *biv, BivInit *init) { + PCode *instr; + PCode *scan; + PCodeArg *op; + int i; + + instr = biv->initializer; + init->x0 = 0; + init->x4 = -1; + init->x6 = -1; + init->x8 = 0; + init->xA = NULL; + + if (!biv->initializer || (biv->initializer->op != PC_ADDI && biv->initializer->op != PC_ADD)) + return; + + if (instr->op == PC_ADDI) { + if (instr->args[1].data.reg.reg == biv->reg) { + init->x0 = instr->args[2].data.imm.value; + for (scan = instr->prevPCode; scan; scan = scan->prevPCode) { + op = scan->args; + i = scan->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == biv->reg && + (op->data.reg.effect & EffectWrite) + ) + { + if (scan->op == PC_ADD) { + init->x4 = scan->args[1].data.reg.reg; + init->x6 = scan->args[2].data.reg.reg; + } else if (scan->op == PC_ADDI) { + if (scan->args[2].kind == PCOp_IMMEDIATE) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.imm.value; + } else if (scan->args[2].kind == PCOp_MEMORY) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.mem.offset; + init->xA = scan->args[2].data.mem.obj; + } + } + return; + } + op++; + } + } + } else { + if (instr->args[2].kind == PCOp_IMMEDIATE) { + init->x4 = instr->args[1].data.reg.reg; + init->x8 = instr->args[2].data.imm.value; + } else if (instr->args[2].kind == PCOp_MEMORY) { + init->x4 = instr->args[1].data.reg.reg; + init->x8 = instr->args[2].data.mem.offset; + init->xA = instr->args[2].data.mem.obj; + } + } + } else if (instr->op == PC_ADD) { + if (instr->args[1].data.reg.reg == biv->reg) { + init->x6 = instr->args[2].data.reg.reg; + for (scan = instr->prevPCode; scan; scan = scan->prevPCode) { + op = scan->args; + i = scan->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == biv->reg && + (op->data.reg.effect & EffectWrite) && + scan->op == PC_ADDI + ) + { + if (scan->args[2].kind == PCOp_IMMEDIATE) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.imm.value; + } else if (scan->args[2].kind == PCOp_MEMORY) { + init->x4 = scan->args[1].data.reg.reg; + init->x8 = scan->args[2].data.mem.offset; + init->xA = scan->args[2].data.mem.obj; + } + return; + } + op++; + } + } + } else { + init->x4 = instr->args[1].data.reg.reg; + init->x6 = instr->args[2].data.reg.reg; + } + } +} + +static void combineinductionvariables(Loop *loop, BasicInductionVar *biv1, BasicInductionVar *biv2, SInt32 difference) { + PCode *instr1; // r31 + int reg1; // r30 + int reg2; // r29 + PCode *instr2; // r24 + PCodeBlock *nextBlock; // r24 + BlockList *list; + PCodeArg *op; + int i; + PCode *instr; + + instr1 = NULL; + instr2 = NULL; + + reg1 = biv1->reg; + CError_ASSERT(930, reg1 >= 0); + + reg2 = biv2->reg; + CError_ASSERT(934, reg2 >= 0); + + if (!FITS_IN_SHORT(difference)) + return; + + for (list = loop->blocks; list; list = list->next) { + for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) { + if (instr1) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_REGISTER && + op->arg == RegClass_GPR && + op->data.reg.reg == reg1 + ) + return; + op++; + } + } + + if (instr->op == PC_ADDI) { + if (instr->args[0].data.reg.reg == reg1) { + if (instr1) + return; + instr1 = instr; + } else if (instr->args[0].data.reg.reg == reg2) { + if (instr2) + return; + instr2 = instr; + } + } + } + } + + if (loop->body->lastPCode->flags & fIsBranch) { + nextBlock = NULL; + + for (i = 0; i < loop->body->lastPCode->argCount; i++) { + if (loop->body->lastPCode->args[i].kind == PCOp_LABEL) { + nextBlock = loop->body->lastPCode->args[i].data.label.label->block; + break; + } + } + + if (!nextBlock) + return; + } else { + nextBlock = loop->body->nextBlock; + } + + deletepcode(instr1); + instr1->args[1].data.reg.reg = reg2; + instr1->args[2].data.imm.value = difference; + + if (nextBlock->firstPCode) + insertpcodebefore(nextBlock->firstPCode, instr1); + else + appendpcode(nextBlock, instr1); + + biv1->reg = -1; + strengthreducedloops = 1; +} + +static void strengthreduceinductionvariables(Loop *loop) { + BasicInductionVar *biv1; + BasicInductionVar *biv2; + BivInit init1; + BivInit init2; + + for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) { + if (biv1->inductionVars) + strengthreduceinductionvariable(biv1); + strengthreducenestedbiv(biv1); + } + + for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) { + if (biv1->reg != -1) { + calc_biv_init(biv1, &init1); + if (init1.x4 != -1) { + for (biv2 = loop->basicInductionVars; biv2; biv2 = biv2->next) { + if (biv2->reg != -1 && biv2 != biv1) { + calc_biv_init(biv2, &init2); + if ( + init2.x4 != -1 && + init1.x4 == init2.x4 && + init1.x6 == init2.x6 && + init1.x8 == init2.x8 && + init1.xA == init2.xA && + biv1->step == biv2->step + ) + { + if (init1.x0 < init2.x0) { + combineinductionvariables(loop, biv2, biv1, init2.x0 - init1.x0); + } else { + combineinductionvariables(loop, biv1, biv2, init1.x0 - init2.x0); + break; + } + } + } + } + } + } + } +} + +static void strengthreduceallinductionvariables(Loop *loop) { + while (loop) { + if (loop->children) + strengthreduceallinductionvariables(loop->children); + if (loop->basicInductionVars) + strengthreduceinductionvariables(loop); + loop = loop->nextSibling; + } +} + +void strengthreduceloops(void) { + strengthreducedloops = 0; + if (loopsinflowgraph) { + computeusedefchains(0); + findallbasicinductionvariables(loopsinflowgraph); + findallnonbasicinductionvariables(loopsinflowgraph); + strengthreduceallinductionvariables(loopsinflowgraph); + freeoheap(); + } +} diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ValueNumbering.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ValueNumbering.c new file mode 100644 index 0000000..0907fa1 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/ValueNumbering.c @@ -0,0 +1,661 @@ +#include "compiler/ValueNumbering.h" +#include "compiler/Alias.h" +#include "compiler/PCode.h" +#include "compiler/Registers.h" +#include "compiler/RegisterInfo.h" +#include "compiler/CompilerTools.h" +#include "compiler/CError.h" + +typedef struct ValueLabel { + struct ValueLabel *next; + PCodeArg op; +} ValueLabel; + +typedef struct AvailableValue { + struct AvailableValue *next; + ValueLabel *labelled; + PCode *pcode; + int killedregister; + int aliasnumber; + int opnumbers[0]; +} AvailableValue; + +typedef struct RegValue { + int number; + int x4; + AvailableValue *available; +} RegValue; + +typedef struct State { + void *stackedvalues; + int valueceiling; +} State; + +typedef struct StackedValue { + struct StackedValue *next; + PCodeArg op; + RegValue value; + Alias *alias; + PCode *valuepcode; +} StackedValue; + +int removedcommonsubexpressions; +int nextvaluenumber; +static AvailableValue *opvalue[428]; +static RegValue *regvalue[RegClassMax]; +static StackedValue *stackedvalues; +static int valueceiling; +static int moreaggressiveoptimization; + +static void allocatecsedatastructures(void) { + char rclass; + + for (rclass = 0; rclass < RegClassMax; rclass++) + regvalue[(char) rclass] = oalloc(sizeof(RegValue) * used_virtual_registers[(char) rclass]); +} + +static void initializecsedatastructures(void) { + RegValue *rv; + char rclass; + int i; + + nextvaluenumber = 0; + + for (i = 0; i < 428; i++) + opvalue[i] = NULL; + + for (rclass = 0; rclass < RegClassMax; rclass++) { + rv = regvalue[(char) rclass]; + for (i = 0; i < used_virtual_registers[(char) rclass]; i++, rv++) { + rv->number = nextvaluenumber++; + rv->x4 = 0; + rv->available = NULL; + } + } + + initialize_alias_values(); + stackedvalues = NULL; + valueceiling = 0x7FFFFFFF; +} + +static void labelvalue(AvailableValue *av, PCodeArg *op) { + ValueLabel *label = oalloc(sizeof(ValueLabel)); + label->op = *op; + label->next = av->labelled; + av->labelled = label; +} + +static void unlabelvalue(AvailableValue *av, PCodeArg *op) { + ValueLabel *labelled; + ValueLabel **ptr; + + ptr = &av->labelled; + while ((labelled = *ptr)) { + if (labelled->op.data.reg.reg == op->data.reg.reg) + *ptr = labelled->next; + else + ptr = &labelled->next; + } +} + +static void stackregistervalue(PCodeArg *op, RegValue *value) { + StackedValue *stacked = oalloc(sizeof(StackedValue)); + stacked->next = stackedvalues; + stackedvalues = stacked; + + stacked->op = *op; + stacked->value = *value; +} + +static void stackmemoryvalue(Alias *alias) { + StackedValue *stacked = oalloc(sizeof(StackedValue)); + stacked->next = stackedvalues; + stackedvalues = stacked; + + stacked->op.kind = PCOp_MEMORY; + stacked->alias = alias; + stacked->value.number = alias->valuenumber; + stacked->valuepcode = alias->valuepcode; +} + +static void unstackvalue(StackedValue *stacked) { + PCodeArg *op = &stacked->op; + RegValue *value; + + if (stacked->op.kind == PCOp_MEMORY) { + stacked->alias->valuenumber = stacked->value.number; + stacked->alias->valuepcode = stacked->valuepcode; + } else { + value = ®value[op->arg][op->data.reg.reg]; + if (value->available) + unlabelvalue(value->available, op); + value->number = stacked->value.number; + value->x4 = stacked->value.x4; + value->available = stacked->value.available; + if (value->available) + labelvalue(value->available, op); + } +} + +static int samevalue(PCodeArg *op1, PCodeArg *op2) { + return regvalue[op1->arg][op1->data.reg.reg].number == regvalue[op2->arg][op2->data.reg.reg].number; +} + +static int killregister(PCodeArg *op) { + RegValue *value; + + value = ®value[op->arg][op->data.reg.reg]; + if (value->number < valueceiling && nextvaluenumber >= valueceiling) + stackregistervalue(op, value); + + if (value->available) + unlabelvalue(value->available, op); + value->available = NULL; + value->x4 = 0; + return value->number = nextvaluenumber++; +} + +void killmemory(Alias *alias, PCode *newValue) { + if (alias->valuenumber < valueceiling && nextvaluenumber >= valueceiling) + stackmemoryvalue(alias); + + if (newValue) { + alias->valuenumber = regvalue[newValue->args[0].arg][newValue->args[0].data.reg.reg].number; + alias->valuepcode = newValue; + } else { + alias->valuenumber = nextvaluenumber++; + alias->valuepcode = NULL; + } +} + +static void killspecificCSEs(short op) { + AvailableValue *av; + ValueLabel *labelled; + + for (av = opvalue[op]; av; av = av->next) { + for (labelled = av->labelled; labelled; labelled = labelled->next) + killregister(&labelled->op); + } +} + +static void killallCSEs(void) { + AvailableValue *av; + ValueLabel *labelled; + int i; + + for (i = 0; i < 428; i++) { + for (av = opvalue[i]; av; av = av->next) { + for (labelled = av->labelled; labelled; labelled = labelled->next) + killregister(&labelled->op); + } + } +} + +static void killregisters(PCode *pcode) { + PCodeArg *op; + int i; + + for (i = 0, op = pcode->args; i < pcode->argCount; i++, op++) { + if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite)) + killregister(op); + } +} + +static void copyregister(PCodeArg *src, PCodeArg *dest) { + RegValue *srcvalue; + RegValue *destvalue; + + srcvalue = ®value[src->arg][src->data.reg.reg]; + destvalue = ®value[dest->arg][dest->data.reg.reg]; + + if (destvalue->number < valueceiling && nextvaluenumber >= valueceiling) + stackregistervalue(dest, destvalue); + + if (destvalue->available) + unlabelvalue(destvalue->available, dest); + destvalue->available = srcvalue->available; + if (destvalue->available) + labelvalue(destvalue->available, dest); + + destvalue->number = srcvalue->number; + + if (srcvalue->x4 && srcvalue->number == regvalue[src->arg][srcvalue->x4].number) + destvalue->x4 = srcvalue->x4; + else + destvalue->x4 = src->data.reg.reg; +} + +static int matchvalues(AvailableValue *av, PCode *match) { + PCodeArg *avOp; + PCodeArg *matchOp; + int i; + + for (avOp = &av->pcode->args[0], matchOp = &match->args[0], i = 0; i < match->argCount; i++, avOp++, matchOp++) { + if (i != 0) { + switch (avOp->kind) { + case PCOp_REGISTER: + if (av->opnumbers[i] != regvalue[matchOp->arg][matchOp->data.reg.reg].number) + return 0; + break; + case PCOp_MEMORY: + if (matchOp->kind != PCOp_MEMORY) + return 0; + if (matchOp->data.mem.obj != avOp->data.mem.obj) + return 0; + if (matchOp->data.mem.offset != avOp->data.mem.offset) + return 0; + if ((unsigned char) matchOp->arg != (unsigned char) avOp->arg) + return 0; + break; + case PCOp_IMMEDIATE: + if (matchOp->kind != PCOp_IMMEDIATE) + return 0; + if (matchOp->data.imm.value != avOp->data.imm.value) + return 0; + break; + case PCOp_LABEL: + if (matchOp->kind != PCOp_LABEL) + return 0; + if (matchOp->data.label.label != avOp->data.label.label) + return 0; + break; + case PCOp_SYSREG: + CError_FATAL(572); + } + } + } + + if ((match->flags & (fIsRead | fPCodeFlag20000)) && match->alias->valuenumber != av->aliasnumber) + return 0; + + return 1; +} + +static void chooselocation(AvailableValue *av, PCodeArg *op) { + ValueLabel *labelled; + PCodeArg *baseop; + + baseop = &av->pcode->args[0]; + labelled = av->labelled; + while (labelled) { + if (labelled->op.data.reg.reg == baseop->data.reg.reg) { + *op = labelled->op; + return; + } + labelled = labelled->next; + } + + *op = av->labelled[0].op; +} + +static int findavailablevalue(PCode *pcode, PCodeArg *op) { + AvailableValue *av; + PCodeArg tmp1; + PCodeArg tmp2; + + for (av = opvalue[pcode->op]; av; av = av->next) { + if (av->labelled && av->pcode->flags == pcode->flags && av->pcode->argCount == pcode->argCount) { + if (!matchvalues(av, pcode)) { + if (!(pcode->flags & fCommutative)) + continue; + + tmp1 = pcode->args[1]; + pcode->args[1] = pcode->args[2]; + pcode->args[2] = tmp1; + + if (!matchvalues(av, pcode)) { + tmp2 = pcode->args[1]; + pcode->args[1] = pcode->args[2]; + pcode->args[2] = tmp2; + continue; + } + } + chooselocation(av, op); + return 1; + } + } + + return 0; +} + +static void addavailablevalue(PCode *pcode) { + AvailableValue *av; + PCodeArg *op; + int i; + + av = oalloc(sizeof(AvailableValue) + sizeof(int) * pcode->argCount); + av->labelled = NULL; + av->pcode = pcode; + for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) { + if (op->kind == PCOp_REGISTER) + av->opnumbers[i] = regvalue[op->arg][op->data.reg.reg].number; + } + + if (pcode->flags & (fIsRead | fPCodeFlag20000)) + av->aliasnumber = pcode->alias->valuenumber; + + op = &pcode->args[0]; + av->killedregister = killregister(op); + labelvalue(av, op); + regvalue[op->arg][op->data.reg.reg].available = av; + av->next = opvalue[pcode->op]; + opvalue[pcode->op] = av; +} + +static int isCSEop(PCode *pcode) { + PCodeArg *baseOp; + PCodeArg *op; + int i; + + baseOp = &pcode->args[0]; + + switch (pcode->op) { + case PC_CMPI: + case PC_CMP: + case PC_CMPLI: + case PC_CMPL: + case PC_FCMPU: + case PC_FCMPO: + if (!moreaggressiveoptimization) + return 0; + break; + case PC_LI: + case PC_LIS: + if (!moreaggressiveoptimization) + return 0; + if (pcode->args[0].data.reg.reg < first_fe_temporary_register[RegClass_GPR] || pcode->args[0].data.reg.reg > last_temporary_register[RegClass_GPR]) + return 0; + break; + } + + if (PCODE_FLAG_SET_F(pcode) & (fIsVolatile | fSideEffects | fOverflow | fSetsCarry | fRecordBit)) + return 0; + + for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) { + if (op != baseOp && + op->kind == baseOp->kind && + op->arg == baseOp->arg && + op->data.reg.reg == baseOp->data.reg.reg) + return 0; + } + + return 1; +} + +static int isCSEload(PCode *pcode) { + PCodeArg *op; + int i; + int count; + + count = 0; + for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) { + if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite)) + count++; + } + + return count == 1; +} + +static void registercopy(PCode *pcode) { + PCodeArg *op1; + PCodeArg *op2; + + op1 = &pcode->args[0]; + op2 = &pcode->args[1]; + if (samevalue(op2, op1)) + deletepcode(pcode); + else + copyregister(op2, op1); +} + +static PCode *recentlystored(Alias *alias, PCodeArg *op) { + PCode *pc; + if ((pc = alias->valuepcode) && alias->valuenumber == regvalue[pc->args[0].arg][pc->args[0].data.reg.reg].number) { + *op = pc->args[0]; + return pc; + } else { + return NULL; + } +} + +static void simpleload(PCode *pcode) { + PCodeArg *origOp; + PCodeArg op; + PCode *rs; + + origOp = &pcode->args[0]; + if ((pcode->flags & fIsVolatile) || !isCSEload(pcode)) { + killregisters(pcode); + return; + } + + if (findavailablevalue(pcode, &op)) { + if (!samevalue(origOp, &op)) { + insertpcodebefore(pcode, makecopyinstruction(&op, origOp)); + copyregister(&op, origOp); + } + deletepcode(pcode); + removedcommonsubexpressions = 1; + } else if ((rs = recentlystored(pcode->alias, &op)) && can_reuse_stored_value(rs, pcode)) { + if (!samevalue(origOp, &op)) { + insertpcodebefore(pcode, makecopyinstruction(&op, origOp)); + copyregister(&op, origOp); + } + deletepcode(pcode); + removedcommonsubexpressions = 1; + } else { + addavailablevalue(pcode); + } +} + +static void simplestore(PCode *pcode) { + update_alias_value(pcode->alias, pcode); + killregisters(pcode); +} + +static void pointerload(PCode *pcode) { + PCodeArg *op; + PCodeArg buf; + + op = &pcode->args[0]; + + if ((pcode->flags & fIsVolatile) || !isCSEload(pcode)) { + killregisters(pcode); + return; + } + + if (findavailablevalue(pcode, &buf)) { + if (!samevalue(op, &buf)) { + insertpcodebefore(pcode, makecopyinstruction(&buf, op)); + copyregister(&buf, op); + } + deletepcode(pcode); + removedcommonsubexpressions = 1; + } else { + addavailablevalue(pcode); + } +} + +static void pointerstore(PCode *pcode) { + update_alias_value(pcode->alias, NULL); + killregisters(pcode); +} + +static void arithmeticop(PCode *pcode) { + PCodeArg *op; + PCodeArg buf; + + op = &pcode->args[0]; + + if (findavailablevalue(pcode, &buf)) { + if (!samevalue(op, &buf)) { + insertpcodebefore(pcode, makecopyinstruction(&buf, op)); + copyregister(&buf, op); + } + deletepcode(pcode); + removedcommonsubexpressions = 1; + } else { + addavailablevalue(pcode); + } +} + +static void functioncall(PCode *pcode) { + killregisters(pcode); + if (coloring) { + update_all_alias_values(); + killallCSEs(); + } else { + update_alias_value(pcode->alias, NULL); + } +} + +static void operatefrommemory(PCode *pcode) { + CError_FATAL(980); +} + +static void operatetomemory(PCode *pcode) { + CError_FATAL(1011); +} + +static void propagatecopiesto(PCode *pcode) { + PCodeArg *op; + int i; + + for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) { + if ( + op->kind == PCOp_REGISTER && + (op->data.reg.effect & (EffectRead | EffectWrite | Effect8)) == EffectRead && + op->data.reg.reg >= n_real_registers[op->arg] && + regvalue[op->arg][op->data.reg.reg].x4 && + regvalue[op->arg][op->data.reg.reg].x4 >= n_real_registers[op->arg] && + regvalue[op->arg][op->data.reg.reg].number == regvalue[op->arg][regvalue[op->arg][op->data.reg.reg].x4].number + ) { + op->data.reg.reg = regvalue[op->arg][op->data.reg.reg].x4; + } + } +} + +static void removecsesfrombasicblock(PCodeBlock *block) { + PCode *pcode; + PCode *next; + + for (pcode = block->firstPCode; pcode; pcode = next) { + next = pcode->nextPCode; + propagatecopiesto(pcode); + if (pcode->flags & fIsMove) { + registercopy(pcode); + } else if ((pcode->flags & fIsCall) && (pcode->flags & (fLink | fSideEffects))) { + functioncall(pcode); + } else if (pcode->flags & fIsRead) { + if (pcode->flags & fIsPtrOp) + pointerload(pcode); + else + simpleload(pcode); + } else if (pcode->flags & fIsWrite) { + if (pcode->flags & fIsPtrOp) + pointerstore(pcode); + else + simplestore(pcode); + } else if (pcode->flags & fPCodeFlag20000) { + operatefrommemory(pcode); + } else if (pcode->flags & fPCodeFlag40000) { + operatetomemory(pcode); + } else if ((pcode->flags & fIsCSE) && isCSEop(pcode)) { + arithmeticop(pcode); + } else { + killregisters(pcode); + } + } + + block->flags |= fVisited; +} + +static void getvaluestate(State *state) { + state->stackedvalues = stackedvalues; + state->valueceiling = valueceiling; +} + +static void setvaluestate(State *state) { + stackedvalues = state->stackedvalues; + valueceiling = state->valueceiling; +} + +static void forkvaluestate(int number) { + stackedvalues = NULL; + valueceiling = number; +} + +static void regressvaluestate(void) { + AvailableValue *av; + AvailableValue **ptr; + int i; + StackedValue *stacked; + + for (i = 0; i < 428; i++) { + ptr = &opvalue[i]; + while ((av = *ptr)) { + if (av->killedregister >= valueceiling) + *ptr = av->next; + else + ptr = &av->next; + } + } + + for (stacked = stackedvalues; stacked; stacked = stacked->next) + unstackvalue(stacked); +} + +static void removecsesfromextendedbasicblock(PCodeBlock *block) { + PCLink *succ; + int counter; + State state; + + removecsesfrombasicblock(block); + while (block->successors && + !block->successors->nextLink && + block->successors->block->predecessors && + !block->successors->block->predecessors->nextLink) { + block = block->successors->block; + removecsesfrombasicblock(block); + } + + counter = 0; + for (succ = block->successors; succ; succ = succ->nextLink) { + if (!(succ->block->flags & fVisited) && succ->block->predecessors && !succ->block->predecessors->nextLink) + counter++; + } + + if (counter) { + getvaluestate(&state); + forkvaluestate(nextvaluenumber); + for (succ = block->successors; succ; succ = succ->nextLink) { + if (!(succ->block->flags & fVisited) && succ->block->predecessors && !succ->block->predecessors->nextLink) { + removecsesfromextendedbasicblock(succ->block); + regressvaluestate(); + } + } + setvaluestate(&state); + } +} + +void removecommonsubexpressions(Object *proc, int flag) { + PCodeBlock *block; + + moreaggressiveoptimization = flag; + removedcommonsubexpressions = 0; + gather_alias_info(); + allocatecsedatastructures(); + + for (block = pcbasicblocks; block; block = block->nextBlock) + block->flags &= ~fVisited; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (!(block->flags & fVisited)) { + initializecsedatastructures(); + removecsesfromextendedbasicblock(block); + } + } + + freeoheap(); +} + diff --git a/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/VectorArraysToRegs.c b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/VectorArraysToRegs.c new file mode 100644 index 0000000..fde27f1 --- /dev/null +++ b/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/VectorArraysToRegs.c @@ -0,0 +1,548 @@ +#include "compiler/VectorArraysToRegs.h" +#include "compiler/CError.h" +#include "compiler/CFunc.h" +#include "compiler/BitVectors.h" +#include "compiler/CompilerTools.h" +#include "compiler/PCode.h" +#include "compiler/PCodeInfo.h" +#include "compiler/Registers.h" +#include "compiler/UseDefChains.h" +#include "compiler/objects.h" +#include "compiler/types.h" + +typedef struct LocalVectorArray { + struct LocalVectorArray *next; + Object *object; + unsigned int invalid:1; + SInt32 arraySize; + SInt32 elementCount; + int totalUses; + int elements[1]; +} LocalVectorArray; + +typedef struct VectorPropInfo { + UInt32 *use; + UInt32 *def; + UInt32 *in; + UInt32 *out; +} VectorPropInfo; + +typedef struct ADDI { + PCode *instr; + RegUseOrDef *list; +} ADDI; + +static int number_of_ADDIs; +static ADDI *ADDIs; +static VectorPropInfo *vectorpropinfo; +static int *naddsinblock; +static int *firstaddinblock; +static Boolean converted_arrays; + +static LocalVectorArray *scanforlocalvectorarrays(void) { + SInt32 elementCount; + LocalVectorArray *head; + LocalVectorArray *array; + ObjectList *list; + int i; + SInt32 arraySize; + + head = NULL; + + for (list = locals; list; list = list->next) { + if ( + list->object && + !(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) && + list->object->type && + IS_TYPE_ARRAY(list->object->type) && + IS_TYPE_VECTOR(TPTR_TARGET(list->object->type)) + ) { + arraySize = list->object->type->size; + elementCount = arraySize / 16; + if (elementCount > 0 && elementCount <= 8) { + array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalVectorArray)); + array->next = head; + head = array; + + array->object = list->object; + array->arraySize = arraySize; + array->elementCount = elementCount; + array->totalUses = 0; + array->invalid = 0; + + for (i = 0; i < elementCount; i++) { + array->elements[i] = 0; + } + } + } + } + + return head; +} + +static LocalVectorArray *lookup_vector_array_object(LocalVectorArray *arrays, Object *object) { + while (arrays) { + if (arrays->object == object) + return arrays; + arrays = arrays->next; + } + return NULL; +} + +static void scaninstructions(LocalVectorArray *arrays) { + PCodeBlock *block; + PCode *instr; + int counter; + int i; + PCodeArg *op; + LocalVectorArray *array; + int element; + + naddsinblock = oalloc(sizeof(int) * pcblockcount); + memclrw(naddsinblock, sizeof(int) * pcblockcount); + + firstaddinblock = oalloc(sizeof(int) * pcblockcount); + memclrw(firstaddinblock, sizeof(int) * pcblockcount); + + number_of_ADDIs = 0; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + firstaddinblock[block->blockIndex] = number_of_ADDIs; + counter = 0; + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (!(instr->flags & fIsBranch) && instr->argCount) { + op = instr->args; + i = instr->argCount; + while (i--) { + if ( + op->kind == PCOp_MEMORY && + (PCOpMemoryArg) op->arg == PCOpMemory1 && + (array = lookup_vector_array_object(arrays, op->data.mem.obj)) && + !array->invalid + ) + { + if (instr->op != PC_ADDI) { + array->invalid = 1; + } else if (instr->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) { + array->invalid = 1; + } else { + number_of_ADDIs++; + counter++; + } + + if (!array->invalid) { + element = op->data.mem.offset / 16; + if (element < array->elementCount) + array->elements[element]++; + else + array->invalid = 1; + } + } + op++; + } + } + } + naddsinblock[block->blockIndex] = counter; + } +} + +static void computeaddilist(LocalVectorArray *arrays) { + PCodeBlock *block; + PCode *instr; + RegUseOrDef *list; + ADDI *addi; + UInt32 *vec; + LocalVectorArray *array; + UseOrDef *def; + int defID; + UseOrDef *use; + int useID; + + ADDIs = oalloc(sizeof(ADDI) * number_of_ADDIs); + memclrw(ADDIs, sizeof(ADDI) * number_of_ADDIs); + + vec = oalloc(4 * ((number_of_Uses + 31) >> 5)); + + for (block = pcbasicblocks; block; block = block->nextBlock) { + if (naddsinblock[block->blockIndex]) { + bitvectorcopy(vec, usedefinfo[block->blockIndex].usevec1C, number_of_Uses); + addi = &ADDIs[firstaddinblock[block->blockIndex] + naddsinblock[block->blockIndex] - 1]; + for (instr = block->lastPCode; instr; instr = instr->prevPCode) { + if (!(instr->flags & fIsBranch) && instr->argCount) { + int reg; // r18 + if ( + instr->op == PC_ADDI && + (reg = instr->args[0].data.reg.reg) >= n_real_registers[RegClass_GPR] && + instr->args[2].kind == PCOp_MEMORY && + (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 && + (array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) && + !array->invalid + ) + { + addi->instr = instr; + addi->list = NULL; + for (list = reg_Uses[RegClass_GPR][reg]; list; list = list->next) { + if (bitvectorgetbit(list->id, vec)) { + RegUseOrDef *node = oalloc(sizeof(RegUseOrDef)); + node->id = list->id; + node->next = addi->list; + addi->list = node; + } + } + addi--; + } + + for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) { + if (def->v.kind == PCOp_REGISTER) { + RegUseOrDef *l; + for (l = reg_Uses[def->v.arg][def->v.u.reg]; l; l = l->next) + bitvectorclearbit(l->id, vec); + } + } + + for (use = &Uses[useID = instr->useID]; useID < number_of_Uses && use->pcode == instr; use++, useID++) { + if (use->v.kind == PCOp_REGISTER) + bitvectorsetbit(useID, vec); + } + } + } + } + } +} + +static void allocatevectorpropinfo(void) { + VectorPropInfo *info; + int i; + + vectorpropinfo = oalloc(sizeof(VectorPropInfo) * pcblockcount); + for (i = 0, info = vectorpropinfo; i < pcblockcount; i++, info++) { + info->use = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); + info->def = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); + info->in = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); + info->out = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); + } +} + +static void computelocalvectorpropinfo(LocalVectorArray *arrays) { + VectorPropInfo *info; + PCodeBlock *block; + PCode *instr; + UInt32 *vec0; + UInt32 *vec4; + int index; + PCodeArg *op; + int i; + int addi_i; + ADDI *addi; + LocalVectorArray *array; + + for (block = pcbasicblocks; block; block = block->nextBlock) { + info = &vectorpropinfo[block->blockIndex]; + vec0 = info->use; + vec4 = info->def; + bitvectorinitialize(vec0, number_of_ADDIs, 0); + bitvectorinitialize(vec4, number_of_ADDIs, 0); + index = firstaddinblock[block->blockIndex]; + + for (instr = block->firstPCode; instr; instr = instr->nextPCode) { + if (!(instr->flags & fIsBranch) && instr->argCount) { + i = instr->argCount; + op = instr->args; + while (i--) { + if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && (op->data.reg.effect & EffectWrite)) { + for (addi_i = 0, addi = ADDIs; addi_i < number_of_ADDIs; addi_i++, addi++) { + if ( + addi->instr && + addi->instr->args[0].arg == op->arg && + addi->instr->args[0].data.reg.reg == op->data.reg.reg + ) + { + if (addi->instr->block == block) + bitvectorclearbit(addi_i, vec0); + else + bitvectorsetbit(addi_i, vec4); + } + } + } + op++; + } + + if ( + instr->op == PC_ADDI && + instr->args[2].kind == PCOp_MEMORY && + (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 && + (array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) && + !array->invalid + ) + { + bitvectorsetbit(index, vec0); + index++; + } + } + } + } +} + +static void computeglobalvectorpropinfo(void) { + VectorPropInfo *info; + PCodeBlock *block; + UInt32 *vec0; + UInt32 *vec4; + UInt32 *vec8; + UInt32 *vecC; + int bitvecsize; + int blockIndex; + int i; + int j; + int flag; + PCLink *preds; + UInt32 val; + + bitvecsize = (number_of_ADDIs + 31) >> 5; + flag = 1; + info = &vectorpropinfo[pcbasicblocks->blockIndex]; + bitvectorinitialize(info->in, number_of_ADDIs, 0); + bitvectorcopy(info->out, info->use, number_of_ADDIs); + + for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) { + info = &vectorpropinfo[block->blockIndex]; + vecC = info->out; + vec4 = info->def; + for (i = 0; i < bitvecsize; vecC++, vec4++, i++) + *vecC = ~*vec4; + } + + while (flag) { + flag = 0; + for (blockIndex = 0; blockIndex < pcblockcount; blockIndex++) { + if (depthfirstordering[blockIndex]) { + info = &vectorpropinfo[depthfirstordering[blockIndex]->blockIndex]; + if ((preds = depthfirstordering[blockIndex]->predecessors)) { + vec8 = info->in; + bitvectorcopy(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs); + for (preds = preds->nextLink; preds; preds = preds->nextLink) + bitvectorintersect(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs); + } + + vecC = info->out; + vec8 = info->in; + vec0 = info->use; + vec4 = info->def; + for (j = 0; j < bitvecsize; j++) { + val = *vec0 | (*vec8 & ~*vec4); + if (val != *vecC) { + *vecC = val; + flag = 1; + } + vec8++; + vecC++; + vec4++; + vec0++; + } + } + } + } +} + +static int precedes(PCode *a, PCode *b) { + PCode *scan; + + for (scan = a->nextPCode; scan; scan = scan->nextPCode) { + if (scan == b) + return 1; + } + + return 0; +} + +static int checkvectorstoreorload(int addiID, int useID) { + PCode *addiInstr; + UseOrDef *use; + + addiInstr = ADDIs[addiID].instr; + use = Uses + useID; + if (!addiInstr) + return 0; + + if (addiInstr->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) + return 0; + + if (use->pcode->op != PC_LVX && use->pcode->op != PC_STVX) + return 0; + + if ( + use->pcode->args[1].kind != PCOp_REGISTER || + use->pcode->args[1].arg != RegClass_GPR || + use->pcode->args[1].data.reg.reg != 0 + ) + return 0; + + return use->pcode->args[2].data.reg.reg == addiInstr->args[0].data.reg.reg; +} + +static int checkalluses(LocalVectorArray *arrays, int addiID) { + RegUseOrDef *list; + PCode *instr; + LocalVectorArray *array; + + instr = ADDIs[addiID].instr; + for (list = ADDIs[addiID].list; list; list = list->next) { + if (list && !checkvectorstoreorload(addiID, list->id)) { + array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj); + array->invalid = 1; + return 0; + } + } + + return 1; +} + +static void convert_array_to_register(LocalVectorArray *arrays, int addiID) { + ADDI *addi; + int newReg; + RegUseOrDef *list; + PCode *instr; + PCode *useInstr; + LocalVectorArray *array; + int element; + + addi = ADDIs + addiID; + + if (!(instr = addi->instr)) + return; + + if ( + !(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) || + array->invalid + ) + return; + + element = instr->args[2].data.mem.offset / 16; + if (element > array->elementCount) + return; + + newReg = array->elements[element]; + for (list = addi->list; list; list = list->next) { + useInstr = Uses[list->id].pcode; + if (useInstr->op == PC_LVX) { + converted_arrays = 1; + change_opcode(useInstr, PC_VMR); + change_num_operands(useInstr, 2); + useInstr->args[1].kind = PCOp_REGISTER; + useInstr->args[1].arg = RegClass_VR; + useInstr->args[1].data.reg.reg = newReg; + useInstr->args[1].data.reg.effect = EffectRead; + } else if (useInstr->op == PC_STVX) { + converted_arrays = 1; + change_opcode(useInstr, PC_VMR); + change_num_operands(useInstr, 2); + useInstr->args[1] = useInstr->args[0]; + useInstr->args[0].kind = PCOp_REGISTER; + useInstr->args[0].arg = RegClass_VR; + useInstr->args[0].data.reg.reg = newReg; + useInstr->args[0].data.reg.effect = EffectWrite; + } else { + CError_FATAL(661); + } + } + deletepcode(addi->instr); +} + +static void convert_arrays_to_registers(LocalVectorArray *arrays) { + int i; + int counter; + LocalVectorArray **ptr; + LocalVectorArray *array; + + for (i = 0; i < number_of_ADDIs; i++) + checkalluses(arrays, i); + + counter = 0; + ptr = &arrays; + array = *ptr; + while (array) { + if (array->invalid) { + *ptr = array->next; + array = *ptr; + continue; + } + + counter += array->elementCount; + + for (i = 0; i < array->elementCount; i++) + array->totalUses += array->elements[i]; + + array = array->next; + } + + if (arrays) { + while (counter > 32) { + LocalVectorArray *best; + int score; + score = 0; + best = NULL; + for (array = arrays; array; array = array->next) { + if (best) { + if (array->totalUses < score) { + score = array->totalUses; + best = array; + } + } else { + best = array; + score = array->totalUses; + } + } + + if (!best) + break; + + if (best == arrays) { + arrays = best->next; + } else { + for (array = arrays; array; array = array->next) { + if (array->next == best) { + array->next = best->next; + break; + } + } + } + + counter -= best->elementCount; + } + + if (!(array = arrays)) + return; + + while (array) { + for (i = 0; i < array->elementCount; i++) + array->elements[i] = used_virtual_registers[RegClass_VR]++; + array = array->next; + } + + if (arrays) { + for (i = 0; i < number_of_ADDIs; i++) + convert_array_to_register(arrays, i); + } + } +} + +int vectorarraystoregs(void) { + LocalVectorArray *arrays; + + converted_arrays = 0; + if ((arrays = scanforlocalvectorarrays())) { + scaninstructions(arrays); + if (number_of_ADDIs > 0) { + computeusedefchains(0); + computeaddilist(arrays); + allocatevectorpropinfo(); + computelocalvectorpropinfo(arrays); + computedepthfirstordering(); + computeglobalvectorpropinfo(); + convert_arrays_to_registers(arrays); + } + } + + freeoheap(); + return converted_arrays; +} |