/*
 * Copyright © 2014 Broadcom
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

/**
 * @file v3d_opt_dead_code.c
 *
 * This is a simple dead code eliminator for SSA values in VIR.
 *
 * It walks all the instructions finding what temps are used, then walks again
 * to remove instructions writing unused temps.
 *
 * This is an inefficient implementation if you have long chains of
 * instructions where the entire chain is dead, but we expect those to have
 * been eliminated at the NIR level, and here we're just cleaning up small
 * problems produced by NIR->VIR.
 */

#include "v3d_compiler.h"

static bool debug;

static void
dce(struct v3d_compile *c, struct qinst *inst)
{
        if (debug) {
                fprintf(stderr, "Removing: ");
                vir_dump_inst(c, inst);
                fprintf(stderr, "\n");
        }
        assert(!v3d_qpu_writes_flags(&inst->qpu));
        vir_remove_instruction(c, inst);
}

static bool
has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst)
{
        for (int i = 0; i < vir_get_nsrc(inst); i++) {
                if (inst->src[i].file == QFILE_VPM)
                        return true;
        }

        return false;
}

static bool
can_write_to_null(struct v3d_compile *c, struct qinst *inst)
{
        /* The SFU instructions must write to a physical register. */
        if (c->devinfo->ver >= 41 && v3d_qpu_uses_sfu(&inst->qpu))
                return false;

        return true;
}

static void
vir_dce_flags(struct v3d_compile *c, struct qinst *inst)
{
        if (debug) {
                fprintf(stderr,
                        "Removing flags write from: ");
                vir_dump_inst(c, inst);
                fprintf(stderr, "\n");
        }

        assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);

        inst->qpu.flags.apf = V3D_QPU_PF_NONE;
        inst->qpu.flags.mpf = V3D_QPU_PF_NONE;
        inst->qpu.flags.auf = V3D_QPU_UF_NONE;
        inst->qpu.flags.muf = V3D_QPU_UF_NONE;
}

static bool
check_last_ldunifa(struct v3d_compile *c,
                   struct qinst *inst,
                   struct qblock *block)
{
        if (!inst->qpu.sig.ldunifa && !inst->qpu.sig.ldunifarf)
                return false;

        list_for_each_entry_from(struct qinst, scan_inst, inst->link.next,
                                 &block->instructions, link) {
                /* If we find a new write to unifa, then this was the last
                 * ldunifa in its sequence and is safe to remove.
                 */
                if (scan_inst->dst.file == QFILE_MAGIC &&
                    scan_inst->dst.index == V3D_QPU_WADDR_UNIFA) {
                        return true;
                }

                /* If we find another ldunifa in the same sequence then we
                 * can't remove it.
                 */
                if (scan_inst->qpu.sig.ldunifa || scan_inst->qpu.sig.ldunifarf)
                        return false;
        }

        return true;
}

static bool
check_first_ldunifa(struct v3d_compile *c,
                    struct qinst *inst,
                    struct qblock *block,
                    struct qinst **unifa)
{
        if (!inst->qpu.sig.ldunifa && !inst->qpu.sig.ldunifarf)
                return false;

        list_for_each_entry_from_rev(struct qinst, scan_inst, inst->link.prev,
                                     &block->instructions, link) {
                /* If we find a write to unifa, then this was the first
                 * ldunifa in its sequence and is safe to remove.
                 */
                if (scan_inst->dst.file == QFILE_MAGIC &&
                    scan_inst->dst.index == V3D_QPU_WADDR_UNIFA) {
                        *unifa = scan_inst;
                        return true;
                }

                /* If we find another ldunifa in the same sequence then we
                 * can't remove it.
                 */
                if (scan_inst->qpu.sig.ldunifa || scan_inst->qpu.sig.ldunifarf)
                        return false;
        }

        unreachable("could not find starting unifa for ldunifa sequence");
}

static bool
increment_unifa_address(struct v3d_compile *c, struct qblock *block, struct qinst *unifa)
{
        struct qblock *current_block = c->cur_block;
        if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
            unifa->qpu.alu.mul.op == V3D_QPU_M_MOV) {
                c->cursor = vir_after_inst(unifa);
                c->cur_block = block;
                struct qreg unifa_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
                vir_ADD_dest(c, unifa_reg, unifa->src[0], vir_uniform_ui(c, 4u));
                vir_remove_instruction(c, unifa);
                c->cur_block = current_block;
                return true;
        }

        if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
            unifa->qpu.alu.add.op == V3D_QPU_A_ADD) {
                c->cursor = vir_after_inst(unifa);
                c->cur_block = block;
                struct qreg unifa_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
                struct qreg tmp =
                        vir_ADD(c, unifa->src[1], vir_uniform_ui(c, 4u));
                vir_ADD_dest(c, unifa_reg, unifa->src[0], tmp);
                vir_remove_instruction(c, unifa);
                c->cur_block = current_block;
                return true;
        }

        return false;
}

bool
vir_opt_dead_code(struct v3d_compile *c)
{
        bool progress = false;
        bool *used = calloc(c->num_temps, sizeof(bool));

        /* Defuse the "are you removing the cursor?" assertion in the core.
         * You'll need to set up a new cursor for any new instructions after
         * doing DCE (which we would expect, anyway).
         */
        c->cursor.link = NULL;

        vir_for_each_inst_inorder(inst, c) {
                for (int i = 0; i < vir_get_nsrc(inst); i++) {
                        if (inst->src[i].file == QFILE_TEMP)
                                used[inst->src[i].index] = true;
                }
        }

        vir_for_each_block(block, c) {
                struct qinst *last_flags_write = NULL;

                vir_for_each_inst_safe(inst, block) {
                        /* If this instruction reads the flags, we can't
                         * remove the flags generation for it.
                         */
                        if (v3d_qpu_reads_flags(&inst->qpu))
                                last_flags_write = NULL;

                        if (inst->dst.file != QFILE_NULL &&
                            !(inst->dst.file == QFILE_TEMP &&
                              !used[inst->dst.index])) {
                                continue;
                        }

                        const bool is_ldunifa = inst->qpu.sig.ldunifa ||
                                                inst->qpu.sig.ldunifarf;

                        if (vir_has_side_effects(c, inst) && !is_ldunifa)
                                continue;

                        bool is_first_ldunifa = false;
                        bool is_last_ldunifa = false;
                        struct qinst *unifa = NULL;
                        if (is_ldunifa) {
                                is_last_ldunifa =
                                        check_last_ldunifa(c, inst, block);

                                is_first_ldunifa =
                                        check_first_ldunifa(c, inst, block, &unifa);
                        }

                        if (v3d_qpu_writes_flags(&inst->qpu)) {
                                /* If we obscure a previous flags write,
                                 * drop it.
                                 */
                                if (last_flags_write &&
                                    (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
                                     inst->qpu.flags.mpf != V3D_QPU_PF_NONE)) {
                                        vir_dce_flags(c, last_flags_write);
                                        progress = true;
                                }

                                last_flags_write = inst;
                        }

                        if (v3d_qpu_writes_flags(&inst->qpu) ||
                            has_nonremovable_reads(c, inst) ||
                            (is_ldunifa && !is_first_ldunifa && !is_last_ldunifa)) {
                                /* If we can't remove the instruction, but we
                                 * don't need its destination value, just
                                 * remove the destination.  The register
                                 * allocator would trivially color it and it
                                 * wouldn't cause any register pressure, but
                                 * it's nicer to read the VIR code without
                                 * unused destination regs.
                                 */
                                if (inst->dst.file == QFILE_TEMP &&
                                    can_write_to_null(c, inst)) {
                                        if (debug) {
                                                fprintf(stderr,
                                                        "Removing dst from: ");
                                                vir_dump_inst(c, inst);
                                                fprintf(stderr, "\n");
                                        }
                                        c->defs[inst->dst.index] = NULL;
                                        inst->dst.file = QFILE_NULL;
                                        progress = true;
                                }
                                continue;
                        }

                        /* If we are removing the first ldunifa in a sequence
                         * we need to update the unifa address.
                         */
                        if (is_first_ldunifa) {
                                assert(unifa);
                                if (!increment_unifa_address(c, block, unifa))
                                        continue;
                        }

                        assert(inst != last_flags_write);
                        dce(c, inst);
                        progress = true;
                        continue;
                }
        }

        free(used);

        return progress;
}