#include "compiler/VectorArraysToRegs.h" #include "compiler/CError.h" #include "compiler/CFunc.h" #include "compiler/BitVectors.h" #include "compiler/CompilerTools.h" #include "compiler/PCode.h" #include "compiler/PCodeInfo.h" #include "compiler/Registers.h" #include "compiler/UseDefChains.h" #include "compiler/objects.h" #include "compiler/types.h" typedef struct LocalVectorArray { struct LocalVectorArray *next; Object *object; unsigned int invalid:1; SInt32 arraySize; SInt32 elementCount; int totalUses; int elements[1]; } LocalVectorArray; typedef struct VectorPropInfo { UInt32 *use; UInt32 *def; UInt32 *in; UInt32 *out; } VectorPropInfo; typedef struct ADDI { PCode *instr; RegUseOrDef *list; } ADDI; static int number_of_ADDIs; static ADDI *ADDIs; static VectorPropInfo *vectorpropinfo; static int *naddsinblock; static int *firstaddinblock; static Boolean converted_arrays; static LocalVectorArray *scanforlocalvectorarrays(void) { SInt32 elementCount; LocalVectorArray *head; LocalVectorArray *array; ObjectList *list; int i; SInt32 arraySize; head = NULL; for (list = locals; list; list = list->next) { if ( list->object && !(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) && list->object->type && IS_TYPE_ARRAY(list->object->type) && IS_TYPE_VECTOR(TPTR_TARGET(list->object->type)) ) { arraySize = list->object->type->size; elementCount = arraySize / 16; if (elementCount > 0 && elementCount <= 8) { array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalVectorArray)); array->next = head; head = array; array->object = list->object; array->arraySize = arraySize; array->elementCount = elementCount; array->totalUses = 0; array->invalid = 0; for (i = 0; i < elementCount; i++) { array->elements[i] = 0; } } } } return head; } static LocalVectorArray *lookup_vector_array_object(LocalVectorArray *arrays, Object *object) { while (arrays) { if (arrays->object == object) return arrays; arrays = arrays->next; } return NULL; } static void scaninstructions(LocalVectorArray *arrays) { PCodeBlock *block; PCode *instr; int counter; int i; PCodeArg *op; LocalVectorArray *array; int element; naddsinblock = oalloc(sizeof(int) * pcblockcount); memclrw(naddsinblock, sizeof(int) * pcblockcount); firstaddinblock = oalloc(sizeof(int) * pcblockcount); memclrw(firstaddinblock, sizeof(int) * pcblockcount); number_of_ADDIs = 0; for (block = pcbasicblocks; block; block = block->nextBlock) { firstaddinblock[block->blockIndex] = number_of_ADDIs; counter = 0; for (instr = block->firstPCode; instr; instr = instr->nextPCode) { if (!(instr->flags & fIsBranch) && instr->argCount) { op = instr->args; i = instr->argCount; while (i--) { if ( op->kind == PCOp_MEMORY && (PCOpMemoryArg) op->arg == PCOpMemory1 && (array = lookup_vector_array_object(arrays, op->data.mem.obj)) && !array->invalid ) { if (instr->op != PC_ADDI) { array->invalid = 1; } else if (instr->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) { array->invalid = 1; } else { number_of_ADDIs++; counter++; } if (!array->invalid) { element = op->data.mem.offset / 16; if (element < array->elementCount) array->elements[element]++; else array->invalid = 1; } } op++; } } } naddsinblock[block->blockIndex] = counter; } } static void computeaddilist(LocalVectorArray *arrays) { PCodeBlock *block; PCode *instr; RegUseOrDef *list; ADDI *addi; UInt32 *vec; LocalVectorArray *array; UseOrDef *def; int defID; UseOrDef *use; int useID; ADDIs = oalloc(sizeof(ADDI) * number_of_ADDIs); memclrw(ADDIs, sizeof(ADDI) * number_of_ADDIs); vec = oalloc(4 * ((number_of_Uses + 31) >> 5)); for (block = pcbasicblocks; block; block = block->nextBlock) { if (naddsinblock[block->blockIndex]) { bitvectorcopy(vec, usedefinfo[block->blockIndex].usevec1C, number_of_Uses); addi = &ADDIs[firstaddinblock[block->blockIndex] + naddsinblock[block->blockIndex] - 1]; for (instr = block->lastPCode; instr; instr = instr->prevPCode) { if (!(instr->flags & fIsBranch) && instr->argCount) { int reg; // r18 if ( instr->op == PC_ADDI && (reg = instr->args[0].data.reg.reg) >= n_real_registers[RegClass_GPR] && instr->args[2].kind == PCOp_MEMORY && (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 && (array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) && !array->invalid ) { addi->instr = instr; addi->list = NULL; for (list = reg_Uses[RegClass_GPR][reg]; list; list = list->next) { if (bitvectorgetbit(list->id, vec)) { RegUseOrDef *node = oalloc(sizeof(RegUseOrDef)); node->id = list->id; node->next = addi->list; addi->list = node; } } addi--; } for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) { if (def->v.kind == PCOp_REGISTER) { RegUseOrDef *l; for (l = reg_Uses[def->v.arg][def->v.u.reg]; l; l = l->next) bitvectorclearbit(l->id, vec); } } for (use = &Uses[useID = instr->useID]; useID < number_of_Uses && use->pcode == instr; use++, useID++) { if (use->v.kind == PCOp_REGISTER) bitvectorsetbit(useID, vec); } } } } } } static void allocatevectorpropinfo(void) { VectorPropInfo *info; int i; vectorpropinfo = oalloc(sizeof(VectorPropInfo) * pcblockcount); for (i = 0, info = vectorpropinfo; i < pcblockcount; i++, info++) { info->use = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); info->def = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); info->in = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); info->out = oalloc(4 * ((number_of_ADDIs + 31) >> 5)); } } static void computelocalvectorpropinfo(LocalVectorArray *arrays) { VectorPropInfo *info; PCodeBlock *block; PCode *instr; UInt32 *vec0; UInt32 *vec4; int index; PCodeArg *op; int i; int addi_i; ADDI *addi; LocalVectorArray *array; for (block = pcbasicblocks; block; block = block->nextBlock) { info = &vectorpropinfo[block->blockIndex]; vec0 = info->use; vec4 = info->def; bitvectorinitialize(vec0, number_of_ADDIs, 0); bitvectorinitialize(vec4, number_of_ADDIs, 0); index = firstaddinblock[block->blockIndex]; for (instr = block->firstPCode; instr; instr = instr->nextPCode) { if (!(instr->flags & fIsBranch) && instr->argCount) { i = instr->argCount; op = instr->args; while (i--) { if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && (op->data.reg.effect & EffectWrite)) { for (addi_i = 0, addi = ADDIs; addi_i < number_of_ADDIs; addi_i++, addi++) { if ( addi->instr && addi->instr->args[0].arg == op->arg && addi->instr->args[0].data.reg.reg == op->data.reg.reg ) { if (addi->instr->block == block) bitvectorclearbit(addi_i, vec0); else bitvectorsetbit(addi_i, vec4); } } } op++; } if ( instr->op == PC_ADDI && instr->args[2].kind == PCOp_MEMORY && (PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 && (array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) && !array->invalid ) { bitvectorsetbit(index, vec0); index++; } } } } } static void computeglobalvectorpropinfo(void) { VectorPropInfo *info; PCodeBlock *block; UInt32 *vec0; UInt32 *vec4; UInt32 *vec8; UInt32 *vecC; int bitvecsize; int blockIndex; int i; int j; int flag; PCLink *preds; UInt32 val; bitvecsize = (number_of_ADDIs + 31) >> 5; flag = 1; info = &vectorpropinfo[pcbasicblocks->blockIndex]; bitvectorinitialize(info->in, number_of_ADDIs, 0); bitvectorcopy(info->out, info->use, number_of_ADDIs); for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) { info = &vectorpropinfo[block->blockIndex]; vecC = info->out; vec4 = info->def; for (i = 0; i < bitvecsize; vecC++, vec4++, i++) *vecC = ~*vec4; } while (flag) { flag = 0; for (blockIndex = 0; blockIndex < pcblockcount; blockIndex++) { if (depthfirstordering[blockIndex]) { info = &vectorpropinfo[depthfirstordering[blockIndex]->blockIndex]; if ((preds = depthfirstordering[blockIndex]->predecessors)) { vec8 = info->in; bitvectorcopy(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs); for (preds = preds->nextLink; preds; preds = preds->nextLink) bitvectorintersect(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs); } vecC = info->out; vec8 = info->in; vec0 = info->use; vec4 = info->def; for (j = 0; j < bitvecsize; j++) { val = *vec0 | (*vec8 & ~*vec4); if (val != *vecC) { *vecC = val; flag = 1; } vec8++; vecC++; vec4++; vec0++; } } } } } static int precedes(PCode *a, PCode *b) { PCode *scan; for (scan = a->nextPCode; scan; scan = scan->nextPCode) { if (scan == b) return 1; } return 0; } static int checkvectorstoreorload(int addiID, int useID) { PCode *addiInstr; UseOrDef *use; addiInstr = ADDIs[addiID].instr; use = Uses + useID; if (!addiInstr) return 0; if (addiInstr->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) return 0; if (use->pcode->op != PC_LVX && use->pcode->op != PC_STVX) return 0; if ( use->pcode->args[1].kind != PCOp_REGISTER || use->pcode->args[1].arg != RegClass_GPR || use->pcode->args[1].data.reg.reg != 0 ) return 0; return use->pcode->args[2].data.reg.reg == addiInstr->args[0].data.reg.reg; } static int checkalluses(LocalVectorArray *arrays, int addiID) { RegUseOrDef *list; PCode *instr; LocalVectorArray *array; instr = ADDIs[addiID].instr; for (list = ADDIs[addiID].list; list; list = list->next) { if (list && !checkvectorstoreorload(addiID, list->id)) { array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj); array->invalid = 1; return 0; } } return 1; } static void convert_array_to_register(LocalVectorArray *arrays, int addiID) { ADDI *addi; int newReg; RegUseOrDef *list; PCode *instr; PCode *useInstr; LocalVectorArray *array; int element; addi = ADDIs + addiID; if (!(instr = addi->instr)) return; if ( !(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) || array->invalid ) return; element = instr->args[2].data.mem.offset / 16; if (element > array->elementCount) return; newReg = array->elements[element]; for (list = addi->list; list; list = list->next) { useInstr = Uses[list->id].pcode; if (useInstr->op == PC_LVX) { converted_arrays = 1; change_opcode(useInstr, PC_VMR); change_num_operands(useInstr, 2); useInstr->args[1].kind = PCOp_REGISTER; useInstr->args[1].arg = RegClass_VR; useInstr->args[1].data.reg.reg = newReg; useInstr->args[1].data.reg.effect = EffectRead; } else if (useInstr->op == PC_STVX) { converted_arrays = 1; change_opcode(useInstr, PC_VMR); change_num_operands(useInstr, 2); useInstr->args[1] = useInstr->args[0]; useInstr->args[0].kind = PCOp_REGISTER; useInstr->args[0].arg = RegClass_VR; useInstr->args[0].data.reg.reg = newReg; useInstr->args[0].data.reg.effect = EffectWrite; } else { CError_FATAL(661); } } deletepcode(addi->instr); } static void convert_arrays_to_registers(LocalVectorArray *arrays) { int i; int counter; LocalVectorArray **ptr; LocalVectorArray *array; for (i = 0; i < number_of_ADDIs; i++) checkalluses(arrays, i); counter = 0; ptr = &arrays; array = *ptr; while (array) { if (array->invalid) { *ptr = array->next; array = *ptr; continue; } counter += array->elementCount; for (i = 0; i < array->elementCount; i++) array->totalUses += array->elements[i]; array = array->next; } if (arrays) { while (counter > 32) { LocalVectorArray *best; int score; score = 0; best = NULL; for (array = arrays; array; array = array->next) { if (best) { if (array->totalUses < score) { score = array->totalUses; best = array; } } else { best = array; score = array->totalUses; } } if (!best) break; if (best == arrays) { arrays = best->next; } else { for (array = arrays; array; array = array->next) { if (array->next == best) { array->next = best->next; break; } } } counter -= best->elementCount; } if (!(array = arrays)) return; while (array) { for (i = 0; i < array->elementCount; i++) array->elements[i] = used_virtual_registers[RegClass_VR]++; array = array->next; } if (arrays) { for (i = 0; i < number_of_ADDIs; i++) convert_array_to_register(arrays, i); } } } int vectorarraystoregs(void) { LocalVectorArray *arrays; converted_arrays = 0; if ((arrays = scanforlocalvectorarrays())) { scaninstructions(arrays); if (number_of_ADDIs > 0) { computeusedefchains(0); computeaddilist(arrays); allocatevectorpropinfo(); computelocalvectorpropinfo(arrays); computedepthfirstordering(); computeglobalvectorpropinfo(); convert_arrays_to_registers(arrays); } } freeoheap(); return converted_arrays; }