#!/usr/bin/env python
# SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
# SPDX-License-Identifier: Apache-2.0
import argparse
import copy
import json
import math
import re
import struct
import sys
from typing import Any
from typing import cast
from typing import Dict
from typing import List
from typing import Tuple
from typing import Type
from typing import TypedDict

# Increase this if you change the on-disk binary output format of the BitScrambler so it's
# not compatible with previous versions
BITSCRAMBLER_BINARY_VER = 1
# If we have multiple BitScrambler versions, this'll indicate what hardware we're expecting
# to run on.
BITSCRAMBLER_HW_REV = 0


class Element(TypedDict, total=False):
    more_in_instruction: bool
    is_label: bool
    is_meta: bool
    text: str
    line: int
    column: int


class InputFlags(TypedDict, total=False):
    rel_addr: int
    ctrsel: int
    lutsel: int


class Input(TypedDict, total=False):
    text: str
    ele: Element
    input: int
    muxsel: int
    flags: InputFlags


class Opcode(TypedDict, total=False):
    ele: Element
    op: int
    c: int
    end_val: int
    ctr_val: int
    tgt: int
    h: int
    l: int
    ctr_add: int
    ctl_cond_src: Input


class Inst(TypedDict, total=False):
    op: Opcode
    mux: Dict[int, Input]
    write: int
    read: int


class Chipcfg(TypedDict, total=False):
    chipname: str
    extra_instruction_groups: List[str]
    support_all: bool


# Parser.
# A bsasm file consists of labels, instruction bundles, meta-instructions
# and comments. Comments start at a # and run to a newline and will be
# ignored. Labels are the first word in a line followed by a colon.
# Meta-instructions start with 'cfg' and end at the end of the line;
# they cannot contain commas. Instruction bundles consist of multiple
# sub-instructions separated by a comma. An instruction bundle ends
# when there is a newline and the last non-whitespace character was
# not a comma. An element is defined as either a meta-instruction or a
# sub-instruction.
# This routine uses a state machine to keep track of what it has parsed.
# It handles comments separately as they essentially can be ignored by
# the rest of the state machine; that only needs to see the newline at
# the end of the sentence.
# The output of this routine is an array with element descriptions:
# line -> line the element starts on
# column -> column the element starts on
# text -> text of the element; no whitespace at the start or end and
#         all whitespace between words changed to one single space
# more_in_instruction -> false if this element is a meta-instruction,
#         label or at the end of an instruction bundle.
# is_label -> true if element is a label
# Note that this parser can't see the difference between a
# meta-instruction and an instruction in an instruction bundle: errors
# like a bundle containing a meta-instruction needs to be detected
# elsewhere.

# (Note that this handrolled parser might not be the best option wrt ease
# to understand and maintain, and that has been brought up by Espressif
# colleagues. I've spent some time trying to implement it as a PyParsing
# syntax, but it's very hard to get it to agree with the
# newline-is-sometimes-an-end-of-statement-and-sometimes-not. Antlr might
# be a better choice for that. However, rewriting this likely is more
# work than the ease of fixing bugs in the current parser warrants. If
# that assumption turns out to be very wrong and you're the unlucky
# soul tasked with fixing up this code, feel free to create an issue to
# rewrite this and assign it to me - Jeroen)


def bsasm_parse(src: str) -> List[Element]:
    # Small hack: we trigger processing things on a newline. If a file is read without
    # a newline at the end of the last instruction, we'd erroneously ignore the last element.
    # Easiest way to fix it is to make sure the src always ends in a newline.
    src = src + '\n'
    # Define the various states
    ST_WH_PRE = 0  # Whitespace before an instruction.
    ST_ELEMENT = 1  # Inside a subinstruction or meta-instruction
    ST_WH_IN_EL = 2  # Whitespace, but we're unsure if this is at the end of the element
    ST_AFTER_COMMA = 3  # Encountered a comma, plus possibly whitespace
    state = ST_WH_PRE
    # We keep track of row/col for error reporting
    line = 0
    column = 0
    elements: List[Element] = []
    curr_element: Element = {}
    in_comment = False  # True if we're anywhere between a # and a newline.
    for ch in src:
        # We use these as flags later in the code to start or finish an element.
        start_element = False
        finish_element = False

        if in_comment:
            # If we're in a comment, go back to no-comment mode at the end of the line.
            # We'll need to parse the newline, so this is not part of the big
            # if statement below.
            if ch == '\n':
                in_comment = False

        # Big statemachine handler depending on ch.
        if in_comment:
            # Ignore any character in comment
            pass
        elif ch == '#':
            # Start of a comment.
            in_comment = True
        elif ch in [' ', '\t']:
            # Whitespace. This can be before an element (ignored) or inside an element (might need
            # to insert space if element continues after this)
            if state == ST_ELEMENT:
                state = ST_WH_IN_EL
        elif ch == '\n' or ch == '\r':
            # Newline. If not after a comma, this finishes the element. If after a comma,
            # this can be ignored as whitespace-before-the-next-element.
            if state == ST_ELEMENT or state == ST_WH_IN_EL:
                finish_element = True
                state = ST_WH_PRE
            elif state == ST_AFTER_COMMA:
                state = ST_WH_PRE
        elif ch == ',':
            # A comma. If this is at the end of an element, this finishes the element and
            # prepares for more elements in the instruction.
            if state == ST_ELEMENT or state == ST_WH_IN_EL:
                curr_element['more_in_instruction'] = True
                finish_element = True
                state = ST_AFTER_COMMA
            elif state == ST_AFTER_COMMA:
                raise RuntimeError(
                    f'Line {line} column {column}: Empty subinstruction found'
                )
            elif state == ST_WH_PRE:
                raise RuntimeError(f'Line {line} column {column}: Stray comma found')
        elif ch == ':':
            # This indicates the current element is a label; a colon is not used anywhere else.
            if state == ST_ELEMENT:
                # Check if label is before any instruction
                if len(elements) == 0 or not elements[-1]['more_in_instruction']:
                    # Looks okay.
                    curr_element['is_label'] = True
                    finish_element = True
                    state = ST_WH_PRE
                else:
                    raise RuntimeError(
                        f'Line {line} column {column}: Stray semicolon found'
                    )
            else:
                raise RuntimeError(f'Line {line} column {column}: Stray semicolon found')
        else:
            # Any other characters.
            if state == ST_ELEMENT:
                curr_element['text'] += ch
            elif state == ST_WH_PRE or state == ST_AFTER_COMMA:
                start_element = True
                state = ST_ELEMENT
            elif state == ST_WH_IN_EL:
                curr_element['text'] += ' ' + ch
                state = ST_ELEMENT

        # Handle starting and finishing of elements
        if start_element:
            if 'line' in curr_element:
                raise RuntimeError(
                    f'Line {line} column {column}: Internal error: Element started twice!'
                )
            curr_element['line'] = line
            curr_element['column'] = column
            curr_element['text'] = ch
            curr_element['more_in_instruction'] = False
            curr_element['is_label'] = False
        if finish_element:
            if 'line' not in curr_element:
                raise RuntimeError(
                    f'Line {line} column {column}: Internal error: Element finished while none started'
                )
            elements.append(curr_element)
            curr_element = {}

        # Handle line and column counts
        if ch == '\n':
            line = line + 1
            column = 0
        else:
            column = column + 1
    return elements


# Specific syntax error exception. Reports details about the element[s] to make debugging
# assembly sources easier.

class bsasm_syntax_error(Exception):
    def __new__(cls: Type['bsasm_syntax_error'], *args: str, **kwargs: str) -> 'bsasm_syntax_error':  # noqa: F821
        return cast(bsasm_syntax_error, super().__new__(cls))

    def __init__(self, *args: Any) -> None:  # noqa: F821
        if len(args) == 2:
            ele = args[0]
            message = args[1]
            self.msg = 'Line {} column {}: "{}": {}'.format(ele['line'], ele['column'], ele['text'], message)
        else:
            ele1 = args[0]
            ele2 = args[1]
            message = args[1]
            self.msg = 'Line {} col {}: "{}" and line {} col {}: "{}": {}'.format(ele1['line'],
                                                                                  ele1['column'],
                                                                                  ele1['text'],
                                                                                  ele2['line'],
                                                                                  ele2['column'],
                                                                                  ele2['text'],
                                                                                  message)

    def __str__(self) -> str:  # noqa: F821
        return self.msg


# Definition of possible meta 'cfg' commands
class Meta_inst_def(TypedDict, total=False):
    op: str
    default: int
    enum: Dict[str, int]
    min: int
    max: int


meta_inst_defs: List[Meta_inst_def] = [
    # RX_FETCH_MODE: 0 - on startup fill M0/M1, 1 - don't
    {'op': 'prefetch', 'default': 1, 'enum': {'true': 1, 'false': 0, '1': 1, '0': 0}},
    # Amount of bytes read from input or written to output (depending on eof_on)
    # after EOF on input before we send an EOF to the output.
    {'op': 'trailing_bytes', 'default': 0, 'min': 0, 'max': 8192},
    # Source where 'trailing' counts the bytes after input EOF before generating an output EOF
    {'op': 'eof_on', 'default': 1, 'enum': {'upstream': 1, 'downstream': 0}},
    # Width, in bits, of the LUT memory
    {'op': 'lut_width_bits', 'default': 32, 'enum': {'8': 8, '16': 16, '32': 32}},
]


# Check if element is a meta element
def is_meta(ele: Element) -> bool:
    if ele['text'].lower()[0:4] == 'cfg ':
        return True
    if ele['text'].lower()[0:4] == 'lut ':
        return True
    return False


# Parse a config meta-instruction: check if the values are within range and convert from enums to values
def parse_meta_cfg(ele: Element) -> Tuple[str, int]:
    words = ele['text'].lower().split(' ')
    meta_key = ''
    if len(words) != 3:
        raise bsasm_syntax_error(ele, f'too many arguments to cfg statement')
    for meta_inst_def in meta_inst_defs:
        if meta_inst_def['op'] == words[1]:
            if 'enum' in meta_inst_def:
                if words[2] in meta_inst_def['enum']:
                    meta_key = words[1]
                    meta_value = meta_inst_def['enum'][words[2]]
                else:
                    raise bsasm_syntax_error(
                        ele, f'{words[2]} is not an allowed value for {words[1]}'
                    )
            else:
                v = parse_val(ele, words[2], meta_inst_def['min'], meta_inst_def['max'])
                meta_key = words[1]
                meta_value = v
    if meta_key == '':
        raise bsasm_syntax_error(
            ele, f'{words[1]} is not a recognized meta-instruction'
        )
    return (meta_key, meta_value)


# Check the number of arguments an element has vs what it should have
def check_arg_ct(ele: Element, words: list, ct: int) -> None:
    if len(words) != ct:
        raise bsasm_syntax_error(ele, 'invalid number of arguments')


# Parses a textual range like '1..10' into an actual Python range
def parse_output_range(ele: Element, text: str) -> range:
    b = re.findall(r'^([0-9]+)(?:\.\.([0-9]+))?$', text)
    if not b:
        raise bsasm_syntax_error(ele, f'{text} not a valid integer or range)')
    start = int(b[0][0])
    if b[0][1] != '':
        end = int(b[0][1])
    else:
        end = int(b[0][0])
    if start < 0 or end < 0 or start > 31 or end > 31:
        raise bsasm_syntax_error(ele, f"'{text}' is not a valid integer or range)")
    if start <= end:
        return range(start, end + 1)
    else:
        return range(start, end - 1, -1)


# Resolve an input to a mux selection number and CTL_LUT_SEL/CTL_SRC_SEL/rel_addr
# settings, if those need those to be in a specific state.
def parse_input(ele: Element, text: str, meta: Dict[str, int]) -> Input:
    # Note that strings in the input def arrays need to be lower case.
    inputs = (
        # REG_MEM0
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15',
        '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31',
        # This region is multiplexed using SRC_SEL and LUT_SEL
        '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
        '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
        # AUX
        'bl<=o0', 'bl>o0', 'bl=o0', 'bl<=o8', 'bl>o8', 'bl=o8', 'bl<=o16', 'bl>o16',
        'bl=o16', 'bl<=o24', 'bl>o24', 'bl=o24', 'bh<=o0', 'bh>o0', 'bh=o0', 'bh<=o8',
        'bh>o8', 'bh=o8', 'bh<=o16', 'bh>o16', 'bh=o16', 'bh<=o24', 'bh>o24', 'bh=o24',
        'b<=o0', 'b>o0', 'b=o0', 'b<=o16', 'b>o16', 'b=o16', 'l', 'h',
        # Reg_last
        'o0', 'o1', 'o2', 'o3', 'o4', 'o5', 'o6', 'o7', 'o8', 'o9', 'o10', 'o11', 'o12', 'o13', 'o14', 'o15',
        'o16', 'o17', 'o18', 'o19', 'o20', 'o21', 'o22', 'o23', 'o24', 'o25', 'o26', 'o27', 'o28', 'o29', 'o30', 'o31',
    )
    inputs_ctrsel_set = (
        # Counter reg
        'a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9', 'a10', 'a11', 'a12', 'a13', 'a14', 'a15',
        'b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8', 'b9', 'b10', 'b11', 'b12', 'b13', 'b14', 'b15',
    )
    inputs_ctrsel_clr = (
        # REG_MEM1
        '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47',
        '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63',
    )
    # Note that the index of this depends on the selected LUT width.
    input_lut_bits = (
        'l0', 'l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'l8', 'l9', 'l10', 'l11', 'l12', 'l13', 'l14', 'l15',
        'l16', 'l17', 'l18', 'l19', 'l20', 'l21', 'l22', 'l23', 'l24', 'l25', 'l26', 'l27', 'l28', 'l29', 'l30', 'l31',
    )

    # Note where in the counter reg / mem1 reg region the LUT starts, if enabled
    lut_starts = {8: 24, 16: 16, 32: 0}
    lut_start = lut_starts[meta['lut_width_bits']]

    ret: Input = {'text': text, 'ele': ele, 'flags': {}}
    # Handle relative addressing
    rel_addr = False
    if text[-2:] == '+a':
        rel_addr = True
        text = text[:-2]  # chop off the '+a'

    # Find in what list the input is, and process accordingly.
    if text in inputs:
        # These inputs are always accessible regardless of ctrsel/lutsel
        ret['input'] = inputs.index(text)
    elif text in inputs_ctrsel_set:
        # These inputs need ctrsel to be 1
        i = inputs_ctrsel_set.index(text)
        ret['input'] = i + 32
        ret['flags']['ctrsel'] = 1
        if i >= lut_start:
            ret['flags']['lutsel'] = 0
    elif text in inputs_ctrsel_clr:
        # These inputs need ctrsel to be 0.
        i = inputs_ctrsel_clr.index(text)
        ret['input'] = i + 32
        ret['flags']['ctrsel'] = 0
        if i >= lut_start:
            # These overlap the LUT. lutsel cannot be 1 when these are addressed.
            ret['flags']['lutsel'] = 0
    elif text in input_lut_bits:
        # These inputs need lutsel to be 1.
        i = input_lut_bits.index(text)
        if i > meta['lut_width_bits']:
            raise bsasm_syntax_error(
                ele,
                f"'LUT input {text} referenced, but LUT is configured to be only {meta['lut_width_bits']} bits wide.",
            )
        ret['input'] = i + 32 + lut_start
        ret['flags']['lutsel'] = 1
    else:
        raise bsasm_syntax_error(ele, f"'Input {text} is not valid.")

    if ret['input'] >= 64 and rel_addr:
        raise bsasm_syntax_error(
            ele, f"'LUT input {text} cannot be relatively addressed."
        )

    if ret['input'] < 64:
        ret['flags']['rel_addr'] = rel_addr

    return ret


# Raises an error if two inputs can't be used at the same time
def check_input_compatible(in1: Input, in2: Input) -> None:
    if 'flags' not in in1:
        return
    if 'flags' not in in2:
        return
    in1f = in1['flags']
    in2f = in2['flags']
    err = ''
    if 'rel_addr' in in1f and 'rel_addr' in in2f and in1f['rel_addr'] != in2f['rel_addr']:
        err = 'Cannot have these inputs as both relative and not-relative in the same instruction'
    if 'ctrsel' in in1f and 'ctrsel' in in2f and in1f['ctrsel'] != in2f['ctrsel']:
        err = 'Cannot have both counters/LUT as well as reg_mem1 inputs in the same instruction'
    if 'lutsel' in in1f and 'lutsel' in in2f and in1f['lutsel'] != in2f['lutsel']:
        err = 'With the selected LUT width, the LUT input overlaps the selected counter input'
    if err != '':
        raise bsasm_syntax_error(in1['ele'], in2['ele'], err)


# returns the range as a list of mux selections, plus any ctrsel/lutsel/rel_addr bits needed
# Returns a dictionary with the selected input in 'muxsel' plus a 'flags' dictionary. If a
# rel_addr/lutsel/ctrsel key is in the 'flags' field, that bit must be set or cleared in
# the instruction; if it's not set, the value of that bit doesn't matter for that input.
def parse_input_range(ele: Element, text: str, meta: Dict[str, int]) -> List[Input]:
    # Validate the range and split into start and optionally end fields
    b = re.findall(r'^([a-z0-9><=+]+)(?:\.\.([a-z0-9<>=+]+))?$', text)
    if not b:
        raise bsasm_syntax_error(
            ele, f'{text} not a valid input selection or range of input selections)'
        )
    start = parse_input(ele, b[0][0], meta)
    if b[0][1] != '':
        end = parse_input(ele, b[0][1], meta)
    else:
        end = start

    # Note: this function cannot parse 'weird' ranges, like 31..L1. That's why we limit the ranges
    # to not cross the 32-bit boundary, except when referring to reg_mem0/reg_mem1. This will
    # generally match what the user tries to do.
    if (
        start['input'] < 32
        and 32 <= end['input'] < 64
        and ('flags' not in end or 'ctrsel' not in end['flags'] or end['flags']['ctrsel'] == 0)
        and ('flags' not in end or 'lutsel' not in end['flags'] or end['flags']['lutsel'] == 0)
    ):
        pass
    elif (
        end['input'] < 32
        and start['input'] >= 32
        and start['input'] < 64
        and ('flags' not in start or 'ctrsel' not in start['flags'] or start['flags']['ctrsel'] == 0)
        and ('flags' not in start or 'lutsel' not in start['flags'] or start['flags']['lutsel'] == 0)
    ):
        # in case a 'backwards' range like [35..5] is passed
        pass
    elif math.floor(start['input'] / 32) != math.floor(end['input'] / 32):
        errtxt = f'{text} is not a valid range of input selections. '
        if 'flags' in start and 'lutsel' in start['flags'] and start['flags']['lutsel'] == 1 \
                and ('flags' not in end or 'lutsel' not in end['flags']) and end['input'] < 32:
            errtxt += 'Did you forget an L at the end of the range? (e.g. L0..31 instead of L0..L31)'
        else:
            errtxt += 'Try splitting up the range.'
        raise bsasm_syntax_error(ele, errtxt)

    # The start and end *should* guaranteed to be compatible by now. Check anyway
    # to catch any errors. If this triggers an exception, we have a bug...
    check_input_compatible(start, end)

    flags: InputFlags = {}
    if 'rel_addr' in start['flags']:
        flags['rel_addr'] = start['flags']['rel_addr']
    if 'rel_addr' in end['flags']:
        flags['rel_addr'] = end['flags']['rel_addr']
    if 'ctrsel' in start['flags']:
        flags['ctrsel'] = start['flags']['ctrsel']
    if 'ctrsel' in end['flags']:
        flags['ctrsel'] = end['flags']['ctrsel']
    if 'lutsel' in start['flags']:
        flags['lutsel'] = start['flags']['lutsel']
    if 'lutsel' in end['flags']:
        flags['lutsel'] = end['flags']['lutsel']

    if start['input'] <= end['input']:
        r = range(start['input'], end['input'] + 1)
    else:
        r = range(start['input'], end['input'] - 1, -1)
    ret: List[Input] = []
    for i in r:
        n: Input = {'muxsel': i, 'flags': flags, 'ele': ele}
        ret.append(n)
    return ret


# Parse a numerical field into an int, between a given minimum and maximum.
def parse_val(ele: Element, text: str, minimum: int, maximum: int) -> int:
    try:
        if text[:2] == '0x':
            n = int(text[2:], 16)
        elif text[:2] == '0b':
            n = int(text[2:], 2)
        else:
            n = int(text)
    except ValueError:
        raise bsasm_syntax_error(ele, f"'{text}' is not an integer")
    if n < minimum or n > maximum:
        raise bsasm_syntax_error(
            ele, f"'{text}' is out of range [{minimum}..{maximum}]"
        )
    return n


# Return an IP for a label text
def resolve_label(ele: Element, text: str, labels: Dict[str, int]) -> int:
    if text in labels:
        return labels[text]
    # No match. We could technically also see if the label is a direct IP, but I think
    # that is more likely to be used erroneously than on purpose. If you read this and
    # disagree, feel free to file an issue :)
    raise bsasm_syntax_error(ele, f"'{text}': Label not found.")


# Bitfields defining the instructions
OP_LOOP = 0x2000000
OP_ADD = 0x0000000
OP_IF = 0x0010000
OP_IFN = 0x0020000
OP_LDCTD = 0x0030000
OP_LDCTI = 0x0040000
OP_ADDCTI = 0x0050000


def check_chip_supports_inst(chipcfg: Chipcfg, instgroup: str, ele: Element) -> None:
    if 'support_all' in chipcfg and chipcfg['support_all']:
        return

    if instgroup not in chipcfg['extra_instruction_groups']:
        name = chipcfg['chipname']
        raise bsasm_syntax_error(
            ele, f'Chip {name} does not support this instruction'
        )


def add_op_to_inst(inst: Inst, op: Opcode, ele: Element) -> None:
    if 'op' in inst:
        raise bsasm_syntax_error(
            inst['op']['ele'], ele, f'Cannot have multiple opcodes in one instruction'
        )
    op['ele'] = ele
    inst['op'] = op


# Takes the elements generated by the parse routine and converts it to a
# representation of the bits in the Bitscrambler program.
def bsasm_assemble(elements: List[Element], chipcfg: Chipcfg) -> Tuple[List[Inst], Dict[str, int], List[int]]:
    # This assembler uses two passes: the first finds and resolves global
    # stuff, the second one encodes the actual instructions.

    # Set the meta-instruction values to their defaults
    meta: Dict[str, int] = {}
    for meta_inst_def in meta_inst_defs:
        meta[meta_inst_def['op']] = meta_inst_def['default']

    # Pass 1a: find IPs for labels, mark meta instructions
    # ToDo: also resolve 'def' symbols here once we implement them
    ip = 0
    ip_for_label: Dict[str, int] = {}
    inst_is_meta = False
    inst_start = True
    for ele in elements:
        if inst_start and is_meta(ele):
            # Start of meta-instruction (can only occur at first ele in instruction)
            inst_is_meta = True

        if inst_is_meta:
            ele['is_meta'] = True
        elif ele['is_label']:
            # Label. Record its IP.
            ip_for_label[ele['text']] = ip
            ele['is_meta'] = False
        else:
            ele['is_meta'] = False

        if ele['more_in_instruction']:
            inst_start = False
        else:
            # End of an instruction
            inst_start = True  # mark next element as start of inst
            if (not ele['is_meta']) and (not ele['is_label']):
                ip += 1
            inst_is_meta = False

    # Pass 1B: Collate and parse meta instructions
    inst_start = True
    for ele in elements:
        if inst_start and ele['is_meta']:
            if ele['text'][0:4] == 'cfg ':
                (key, val) = parse_meta_cfg(ele)
                meta[key] = val
                if ele['more_in_instruction']:
                    raise bsasm_syntax_error(
                        ele, 'garbage after cfg statement detected'
                    )
        inst_start = not ele['more_in_instruction']

    # Pass 1C: parse LUT data instructions. We do this after the meta instructions pass
    # as it requires the size of the LUT to figure out min/max boundaries.
    # Note a lut can be written both as 'lut 1 2 3' as well as 'lut 1,2,3' so we need
    # to account for both cases.
    lut_minmax_vals = {
        8:  (-128, 255),
        16: (-32768, 65537),
        32: (-2147483648, 4294967296 - 1),
    }
    minmax = lut_minmax_vals[meta['lut_width_bits']]
    lut = []
    is_lut = False
    for ele in elements:
        if ele['is_meta']:
            if is_lut:
                words = ele['text'].split(' ')
                for w in words:
                    lut.append(parse_val(ele, w, minmax[0], minmax[1]))
            if ele['text'][0:4] == 'lut ':
                is_lut = True
                words = ele['text'].split(' ')
                for w in words[1:]:
                    lut.append(parse_val(ele, w, minmax[0], minmax[1]))
            if not ele['more_in_instruction']:
                is_lut = False

    # Pass 2: Parse any instructions
    valid_read_write = [0, 8, 16, 32]
    insts: List[Inst] = []
    def_inst: Inst = {'mux': {}}
    inst = copy.deepcopy(def_inst)
    op: Opcode
    for ele in elements:
        if not ele['is_meta'] and not ele['is_label']:
            words = ele['text'].lower().split(' ')
            if words[0] == 'set':
                # set (output) (mux input)
                check_arg_ct(ele, words, 3)
                outs = parse_output_range(ele, words[1])
                ins = parse_input_range(ele, words[2], meta)
                if len(ins) != 1 and len(ins) != len(outs):
                    raise bsasm_syntax_error(ele, 'ranges not the same length')
                i = 0
                for out in outs:
                    if out in inst['mux']:
                        raise bsasm_syntax_error(
                            ele, f'output {out} already set earlier in instruction'
                        )
                    if len(ins) == 1:
                        # set range input
                        inst['mux'][out] = ins[0]
                    else:
                        # set range range
                        inst['mux'][out] = ins[i]
                        i = i + 1
            elif words[0] == 'write':
                # Write x bits to output fifo
                check_arg_ct(ele, words, 2)
                no = parse_val(ele, words[1], 0, 32)
                if no not in valid_read_write:
                    raise bsasm_syntax_error(
                        ele, f'{no} is not a valid amount of bits to write'
                    )
                inst['write'] = no
            elif words[0] == 'read':
                # Read x bits from input fifo
                check_arg_ct(ele, words, 2)
                no = parse_val(ele, words[1], 0, 32)
                if no not in valid_read_write:
                    raise bsasm_syntax_error(
                        ele, f'{no} is not a valid amount of bits to write'
                    )
                inst['read'] = no
            elif re.match('loop[ab]', words[0]):
                # LOOPc end_val ctr_add tgt
                check_arg_ct(ele, words, 4)
                op = {'op': OP_LOOP, 'ele': ele}
                op['c'] = 1 if words[0][4] == 'b' else 0
                op['end_val'] = parse_val(ele, words[1], -32768, 65535) & 0xffff
                op['ctr_add'] = parse_val(ele, words[2], -16, 15) & 31
                op['tgt'] = resolve_label(ele, words[3], ip_for_label)
                add_op_to_inst(inst, op, ele)
            elif re.match('add[ab]([hl])?', words[0]):
                # ADDc[h|l] ctr_add
                check_arg_ct(ele, words, 2)
                op = {'op': OP_ADD, 'ele': ele}
                op['c'] = 1 if words[0][3] == 'b' else 0
                if len(words[0]) == 4:
                    op['h'] = 1
                    op['l'] = 1
                else:
                    op['h'] = 1 if words[0][4] == 'h' else 0
                    op['l'] = 1 if words[0][4] == 'l' else 0
                op['ctr_add'] = parse_val(ele, words[1], -32768, 65535) & 0xffff
                add_op_to_inst(inst, op, ele)
            elif re.match('if(n)?', words[0]):
                # IF[N] ctl_cond_src tgt
                check_arg_ct(ele, words, 3)
                op = {'op': OP_IF if len(words[0]) == 2 else OP_IFN, 'ele': ele}
                op['ctl_cond_src'] = parse_input(ele, words[1], meta)
                op['tgt'] = resolve_label(ele, words[2], ip_for_label)
                add_op_to_inst(inst, op, ele)
            elif re.match('ldctd[ab]([hl])?', words[0]):
                # LDCTDc[h|l] ctr_set
                check_arg_ct(ele, words, 2)
                op = {'op': OP_LDCTD}
                op['c'] = 1 if words[0][5] == 'b' else 0
                if len(words[0]) == 6:
                    op['h'] = 1
                    op['l'] = 1
                else:
                    op['h'] = 1 if words[0][6] == 'h' else 0
                    op['l'] = 1 if words[0][6] == 'l' else 0
                op['ctr_add'] = parse_val(ele, words[1], -32768, 65535) & 0xffff
                add_op_to_inst(inst, op, ele)
            elif re.match('ldcti[ab]([hl])?', words[0]):
                # LDCTIc[h|l]
                check_arg_ct(ele, words, 1)
                op = {'op': OP_LDCTI}
                op['c'] = 1 if words[0][5] == 'b' else 0
                if len(words[0]) == 6:
                    op['h'] = 1
                    op['l'] = 1
                else:
                    op['h'] = 1 if words[0][6] == 'h' else 0
                    op['l'] = 1 if words[0][6] == 'l' else 0
                add_op_to_inst(inst, op, ele)
            elif re.match('addcti[ab]([hl])?$', words[0]):
                # ADDCTIc[h|l]
                check_chip_supports_inst(chipcfg, 'addcti', ele)
                check_arg_ct(ele, words, 1)
                op = {'op': OP_ADDCTI}
                op['c'] = 1 if words[0][6] == 'b' else 0
                if len(words[0]) == 7:
                    op['h'] = 1
                    op['l'] = 1
                else:
                    op['h'] = 1 if words[0][7] == 'h' else 0
                    op['l'] = 1 if words[0][7] == 'l' else 0
                add_op_to_inst(inst, op, ele)
            elif re.match('jmp', words[0]):
                # JMP tgt. Pseudo-op, translates to 'IF h tgt'
                check_arg_ct(ele, words, 2)
                op = {'op': OP_IF}
                op['ctl_cond_src'] = parse_input(ele, 'h', meta)
                op['tgt'] = resolve_label(ele, words[1], ip_for_label)
                add_op_to_inst(inst, op, ele)
            elif re.match('nop', words[0]):
                # NOP. Pseudo-op, translates to ADDA 0
                check_arg_ct(ele, words, 1)
                op = {'op': OP_ADD}
                op['h'] = 1
                op['l'] = 1
                op['ctr_add'] = 0
                add_op_to_inst(inst, op, ele)
            else:
                raise bsasm_syntax_error(ele, 'unknown instruction')

            if (
                (not ele['more_in_instruction'])
                and (not ele['is_label'])
                and (not ele['is_meta'])
            ):
                insts.append(inst)
                inst = copy.deepcopy(def_inst)
    return (insts, meta, lut)


# Quick and dirty way to assemble a bytearray from a bunch of bitfields.
# The implementation is not optimal as it handles data bit-by-bit, but it works fine.


class bitstream:
    bitpos: int
    data: list
    curbyte: int

    def __init__(self) -> None:
        self.data = []
        self.curbyte = 0
        self.bitpos = 0

    # Add a field of `bits` bits with the field having the value `val` at the end
    # of the bitstream
    def add_bits(self, val: int, bits: int) -> None:
        v = val
        for i in range(0, bits):
            self.curbyte = self.curbyte >> 1
            if v & 1:
                self.curbyte = self.curbyte | 0x80
            v = v >> 1
            self.bitpos += 1
            if self.bitpos == 8:
                self.bitpos = 0
                self.data.append(self.curbyte)
                self.curbyte = 0

    def to_bytearray(self) -> bytearray:
        return bytearray(self.data)

    def size(self) -> int:
        # Return size in bits
        return len(self.data) * 8 + self.bitpos


# This encodes all the instructions into binary.
def insts_to_binary(insts: List[Inst], meta: Dict[str, int], lut: list) -> bytearray:
    if len(insts) > 8:
        raise RuntimeError('Program has more than eight instructions.')
    ret = bytearray()

    # We need to reformat the LUT into 32-bit values, if not already in that format.
    lut_reformatted = []
    if meta['lut_width_bits'] == 8:
        while (len(lut) % 4) != 0:
            lut.append(0)
        for i in range(0, len(lut), 4):
            v = lut[i] & 255
            v += (lut[i + 1] & 255) << 8
            v += (lut[i + 2] & 255) << 16
            v += (lut[i + 3] & 255) << 24
            lut_reformatted.append(v)
    elif meta['lut_width_bits'] == 16:
        while (len(lut) % 2) != 0:
            lut.append(0)
        for i in range(0, len(lut), 2):
            v = lut[i] & 65535
            v += (lut[i + 1] & 65535) << 16
            lut_reformatted.append(v)
    else:  # 32-bit
        lut_reformatted = lut

    # Format of binary:
    # Header, with self-described length. Any fields that are known to the firmware
    #   past this length will be assumed to be 0.
    # Instructions, padded to 36 bytes per instruction line. Amount of instructions is
    #   defined in header.
    # LUT data, in 32-bit words. Length is defined in header.

    # Header. Note this should always be a multiple of 32 bytes.
    lut_width_vals = {8: 0, 16: 1, 32: 2}
    ret += struct.pack(
        '<BBBBHBBHBB',
        BITSCRAMBLER_BINARY_VER,  # byte
        BITSCRAMBLER_HW_REV,  # byte
        3,  # byte: Length of header in 32-bit words
        len(insts),  # byte: Instruction count
        len(lut_reformatted),  # short: Length of LUT in 32-bit words
        lut_width_vals[meta['lut_width_bits']],  # byte: LUT width setting (0, 1, 2)
        meta['prefetch'],  # byte: prefetch enabled/disabled
        meta['trailing_bytes'] * 8,  # short: number of trailing *bits* after eof
        meta['eof_on'],  # byte
        0,
    )  # byte: unused for now

    for inst in insts:
        bits = bitstream()
        # If the opcode also needs a source, we add it to the mux list as the 32th input
        if 'op' in inst and 'ctl_cond_src' in inst['op']:
            inst['mux'][32] = inst['op']['ctl_cond_src']

        # Check if mux bits are compatible and figure out flags needed
        # Also set unused mux lines to 'l'
        # Finally, insert mux bits into the bitstream.
        flags = {'rel_addr': 0, 'ctrsel': 0, 'lutsel': 0}

        for i in range(0, 33):
            if i in inst['mux']:
                # This could be optimized, but checking each input against each other input
                # allows us to easily tell the user exactly which inputs clash.
                for j in range(i + 1, 33):
                    if j in inst['mux']:
                        check_input_compatible(inst['mux'][i], inst['mux'][j])
                if 'flags' in inst['mux'][i]:
                    if 'rel_addr' in inst['mux'][i]['flags']:
                        flags['rel_addr'] = inst['mux'][i]['flags']['rel_addr']
                    if 'ctrsel' in inst['mux'][i]['flags']:
                        flags['ctrsel'] = inst['mux'][i]['flags']['ctrsel']
                    if 'lutsel' in inst['mux'][i]['flags']:
                        flags['lutsel'] = inst['mux'][i]['flags']['lutsel']
                if i < 32:
                    bits.add_bits(inst['mux'][i]['muxsel'], 7)
            else:
                # Input mux bit is undefined in the program. Set it to a
                # fixed-low input.
                high_input = parse_input({}, 'l', {'lut_width_bits': 8})
                if i < 32:
                    bits.add_bits(high_input['input'], 7)

        # Encode the opcode
        opcode = 0
        if 'op' not in inst:
            # Default to NOP, which is encoded as ADDA 0
            inst['op'] = {'op': OP_ADD, 'h': 1, 'l': 1, 'ctr_add': 0}
        if 'op' in inst['op']:
            opcode = inst['op']['op']
        if 'c' in inst['op']:
            opcode = opcode | ((1 << 24) if inst['op']['c'] else 0)
        if 'h' in inst['op']:
            opcode = opcode | ((1 << 23) if inst['op']['h'] else 0)
        if 'l' in inst['op']:
            opcode = opcode | ((1 << 22) if inst['op']['l'] else 0)
        if 'tgt' in inst['op']:
            opcode = opcode | (inst['op']['tgt'] << 21)
        if 'end_val' in inst['op']:
            opcode = opcode | (inst['op']['end_val'] << 5)
        if 'ctr_add' in inst['op']:  # also aliased to ctr_set
            opcode = opcode | (inst['op']['ctr_add'] << 0)
        if 'ctl_cond_src' in inst['op']:
            opcode = opcode | (inst['op']['ctl_cond_src']['input'] << 0)
        # Add the rest of the fields: read, write, source sel and reladr
        bits.add_bits(opcode, 26)
        val_for_read_write = {0: 0, 8: 1, 16: 2, 32: 3}
        if 'read' not in inst:
            inst['read'] = 0
        bits.add_bits(val_for_read_write[inst['read']], 2)
        if 'write' not in inst:
            inst['write'] = 0
        bits.add_bits(val_for_read_write[inst['write']], 2)
        bits.add_bits(flags['rel_addr'], 1)
        bits.add_bits(flags['ctrsel'], 1)
        bits.add_bits(flags['lutsel'], 1)
        if bits.size() != 257:
            raise RuntimeError(f'Internal error: instruction size is {bits.size()}!')
        # Pad instruction field to 36 bytes = 9 32-bit words
        bits.add_bits(0, 31)
        ret += bits.to_bytearray()

    for i in lut_reformatted:
        ret += struct.pack('<I', i & 0xffffffff)

    return ret


# Return the contents of a file
def read_file(filename: str) -> str:
    try:
        with open(filename, 'r') as f:
            file_content = f.read()
    except OSError:
        print(f'Error opening {filename}: {sys.exc_info()[0]}')
    return file_content


# Write a bytestring to a file
def write_file(filename: str, data: bytearray) -> None:
    with open(filename, 'wb') as f:
        f.write(data)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        prog=sys.argv[0],
        description='BitScrambler program assembler')
    parser.add_argument('infile', help='File name of assembly source to be assembled into a binary')
    parser.add_argument('outfile', help='File name of output binary', nargs='?', default=argparse.SUPPRESS)
    parser.add_argument('-c', help='Set chip capabilities json file; if set, returns an error when \
            an unsupported instruction is assembled',  default=argparse.SUPPRESS)
    args = parser.parse_args()

    chipcfg = Chipcfg()
    if 'c' in args:
        with open(args.c) as chipcfg_json:
            chipcfg = json.load(chipcfg_json)
    else:
        chipcfg = {'chipname': 'chip', 'extra_instruction_groups': [], 'support_all': True}

    if 'outfile' in args:
        outfile = args.outfile
    else:
        outfile = re.sub('.bsasm', '', args.infile) + '.bsbin'
    asm = read_file(args.infile)
    tokens = bsasm_parse(asm)
    insts, meta, lut = bsasm_assemble(tokens, chipcfg)
    out_data = insts_to_binary(insts, meta, lut)
    write_file(outfile, out_data)
    print(f'Written {len(insts)} instructions and {len(lut)} 32-bit words of LUT.')