#!/usr/bin/env python # SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD # SPDX-License-Identifier: Apache-2.0 import argparse import copy import json import math import re import struct import sys from typing import Any from typing import cast from typing import Dict from typing import List from typing import Tuple from typing import Type from typing import TypedDict # Increase this if you change the on-disk binary output format of the BitScrambler so it's # not compatible with previous versions BITSCRAMBLER_BINARY_VER = 1 # If we have multiple BitScrambler versions, this'll indicate what hardware we're expecting # to run on. BITSCRAMBLER_HW_REV = 0 class Element(TypedDict, total=False): more_in_instruction: bool is_label: bool is_meta: bool text: str line: int column: int class InputFlags(TypedDict, total=False): rel_addr: int ctrsel: int lutsel: int class Input(TypedDict, total=False): text: str ele: Element input: int muxsel: int flags: InputFlags class Opcode(TypedDict, total=False): ele: Element op: int c: int end_val: int ctr_val: int tgt: int h: int l: int ctr_add: int ctl_cond_src: Input class Inst(TypedDict, total=False): op: Opcode mux: Dict[int, Input] write: int read: int class Chipcfg(TypedDict, total=False): chipname: str extra_instruction_groups: List[str] support_all: bool # Parser. # A bsasm file consists of labels, instruction bundles, meta-instructions # and comments. Comments start at a # and run to a newline and will be # ignored. Labels are the first word in a line followed by a colon. # Meta-instructions start with 'cfg' and end at the end of the line; # they cannot contain commas. Instruction bundles consist of multiple # sub-instructions separated by a comma. An instruction bundle ends # when there is a newline and the last non-whitespace character was # not a comma. An element is defined as either a meta-instruction or a # sub-instruction. # This routine uses a state machine to keep track of what it has parsed. # It handles comments separately as they essentially can be ignored by # the rest of the state machine; that only needs to see the newline at # the end of the sentence. # The output of this routine is an array with element descriptions: # line -> line the element starts on # column -> column the element starts on # text -> text of the element; no whitespace at the start or end and # all whitespace between words changed to one single space # more_in_instruction -> false if this element is a meta-instruction, # label or at the end of an instruction bundle. # is_label -> true if element is a label # Note that this parser can't see the difference between a # meta-instruction and an instruction in an instruction bundle: errors # like a bundle containing a meta-instruction needs to be detected # elsewhere. # (Note that this handrolled parser might not be the best option wrt ease # to understand and maintain, and that has been brought up by Espressif # colleagues. I've spent some time trying to implement it as a PyParsing # syntax, but it's very hard to get it to agree with the # newline-is-sometimes-an-end-of-statement-and-sometimes-not. Antlr might # be a better choice for that. However, rewriting this likely is more # work than the ease of fixing bugs in the current parser warrants. If # that assumption turns out to be very wrong and you're the unlucky # soul tasked with fixing up this code, feel free to create an issue to # rewrite this and assign it to me - Jeroen) def bsasm_parse(src: str) -> List[Element]: # Small hack: we trigger processing things on a newline. If a file is read without # a newline at the end of the last instruction, we'd erroneously ignore the last element. # Easiest way to fix it is to make sure the src always ends in a newline. src = src + '\n' # Define the various states ST_WH_PRE = 0 # Whitespace before an instruction. ST_ELEMENT = 1 # Inside a subinstruction or meta-instruction ST_WH_IN_EL = 2 # Whitespace, but we're unsure if this is at the end of the element ST_AFTER_COMMA = 3 # Encountered a comma, plus possibly whitespace state = ST_WH_PRE # We keep track of row/col for error reporting line = 0 column = 0 elements: List[Element] = [] curr_element: Element = {} in_comment = False # True if we're anywhere between a # and a newline. for ch in src: # We use these as flags later in the code to start or finish an element. start_element = False finish_element = False if in_comment: # If we're in a comment, go back to no-comment mode at the end of the line. # We'll need to parse the newline, so this is not part of the big # if statement below. if ch == '\n': in_comment = False # Big statemachine handler depending on ch. if in_comment: # Ignore any character in comment pass elif ch == '#': # Start of a comment. in_comment = True elif ch in [' ', '\t']: # Whitespace. This can be before an element (ignored) or inside an element (might need # to insert space if element continues after this) if state == ST_ELEMENT: state = ST_WH_IN_EL elif ch == '\n' or ch == '\r': # Newline. If not after a comma, this finishes the element. If after a comma, # this can be ignored as whitespace-before-the-next-element. if state == ST_ELEMENT or state == ST_WH_IN_EL: finish_element = True state = ST_WH_PRE elif state == ST_AFTER_COMMA: state = ST_WH_PRE elif ch == ',': # A comma. If this is at the end of an element, this finishes the element and # prepares for more elements in the instruction. if state == ST_ELEMENT or state == ST_WH_IN_EL: curr_element['more_in_instruction'] = True finish_element = True state = ST_AFTER_COMMA elif state == ST_AFTER_COMMA: raise RuntimeError( f'Line {line} column {column}: Empty subinstruction found' ) elif state == ST_WH_PRE: raise RuntimeError(f'Line {line} column {column}: Stray comma found') elif ch == ':': # This indicates the current element is a label; a colon is not used anywhere else. if state == ST_ELEMENT: # Check if label is before any instruction if len(elements) == 0 or not elements[-1]['more_in_instruction']: # Looks okay. curr_element['is_label'] = True finish_element = True state = ST_WH_PRE else: raise RuntimeError( f'Line {line} column {column}: Stray semicolon found' ) else: raise RuntimeError(f'Line {line} column {column}: Stray semicolon found') else: # Any other characters. if state == ST_ELEMENT: curr_element['text'] += ch elif state == ST_WH_PRE or state == ST_AFTER_COMMA: start_element = True state = ST_ELEMENT elif state == ST_WH_IN_EL: curr_element['text'] += ' ' + ch state = ST_ELEMENT # Handle starting and finishing of elements if start_element: if 'line' in curr_element: raise RuntimeError( f'Line {line} column {column}: Internal error: Element started twice!' ) curr_element['line'] = line curr_element['column'] = column curr_element['text'] = ch curr_element['more_in_instruction'] = False curr_element['is_label'] = False if finish_element: if 'line' not in curr_element: raise RuntimeError( f'Line {line} column {column}: Internal error: Element finished while none started' ) elements.append(curr_element) curr_element = {} # Handle line and column counts if ch == '\n': line = line + 1 column = 0 else: column = column + 1 return elements # Specific syntax error exception. Reports details about the element[s] to make debugging # assembly sources easier. class bsasm_syntax_error(Exception): def __new__(cls: Type['bsasm_syntax_error'], *args: str, **kwargs: str) -> 'bsasm_syntax_error': # noqa: F821 return cast(bsasm_syntax_error, super().__new__(cls)) def __init__(self, *args: Any) -> None: # noqa: F821 if len(args) == 2: ele = args[0] message = args[1] self.msg = 'Line {} column {}: "{}": {}'.format(ele['line'], ele['column'], ele['text'], message) else: ele1 = args[0] ele2 = args[1] message = args[1] self.msg = 'Line {} col {}: "{}" and line {} col {}: "{}": {}'.format(ele1['line'], ele1['column'], ele1['text'], ele2['line'], ele2['column'], ele2['text'], message) def __str__(self) -> str: # noqa: F821 return self.msg # Definition of possible meta 'cfg' commands class Meta_inst_def(TypedDict, total=False): op: str default: int enum: Dict[str, int] min: int max: int meta_inst_defs: List[Meta_inst_def] = [ # RX_FETCH_MODE: 0 - on startup fill M0/M1, 1 - don't {'op': 'prefetch', 'default': 1, 'enum': {'true': 1, 'false': 0, '1': 1, '0': 0}}, # Amount of bytes read from input or written to output (depending on eof_on) # after EOF on input before we send an EOF to the output. {'op': 'trailing_bytes', 'default': 0, 'min': 0, 'max': 8192}, # Source where 'trailing' counts the bytes after input EOF before generating an output EOF {'op': 'eof_on', 'default': 1, 'enum': {'upstream': 1, 'downstream': 0}}, # Width, in bits, of the LUT memory {'op': 'lut_width_bits', 'default': 32, 'enum': {'8': 8, '16': 16, '32': 32}}, ] # Check if element is a meta element def is_meta(ele: Element) -> bool: if ele['text'].lower()[0:4] == 'cfg ': return True if ele['text'].lower()[0:4] == 'lut ': return True return False # Parse a config meta-instruction: check if the values are within range and convert from enums to values def parse_meta_cfg(ele: Element) -> Tuple[str, int]: words = ele['text'].lower().split(' ') meta_key = '' if len(words) != 3: raise bsasm_syntax_error(ele, f'too many arguments to cfg statement') for meta_inst_def in meta_inst_defs: if meta_inst_def['op'] == words[1]: if 'enum' in meta_inst_def: if words[2] in meta_inst_def['enum']: meta_key = words[1] meta_value = meta_inst_def['enum'][words[2]] else: raise bsasm_syntax_error( ele, f'{words[2]} is not an allowed value for {words[1]}' ) else: v = parse_val(ele, words[2], meta_inst_def['min'], meta_inst_def['max']) meta_key = words[1] meta_value = v if meta_key == '': raise bsasm_syntax_error( ele, f'{words[1]} is not a recognized meta-instruction' ) return (meta_key, meta_value) # Check the number of arguments an element has vs what it should have def check_arg_ct(ele: Element, words: list, ct: int) -> None: if len(words) != ct: raise bsasm_syntax_error(ele, 'invalid number of arguments') # Parses a textual range like '1..10' into an actual Python range def parse_output_range(ele: Element, text: str) -> range: b = re.findall(r'^([0-9]+)(?:\.\.([0-9]+))?$', text) if not b: raise bsasm_syntax_error(ele, f'{text} not a valid integer or range)') start = int(b[0][0]) if b[0][1] != '': end = int(b[0][1]) else: end = int(b[0][0]) if start < 0 or end < 0 or start > 31 or end > 31: raise bsasm_syntax_error(ele, f"'{text}' is not a valid integer or range)") if start <= end: return range(start, end + 1) else: return range(start, end - 1, -1) # Resolve an input to a mux selection number and CTL_LUT_SEL/CTL_SRC_SEL/rel_addr # settings, if those need those to be in a specific state. def parse_input(ele: Element, text: str, meta: Dict[str, int]) -> Input: # Note that strings in the input def arrays need to be lower case. inputs = ( # REG_MEM0 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', # This region is multiplexed using SRC_SEL and LUT_SEL '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', # AUX 'bl<=o0', 'bl>o0', 'bl=o0', 'bl<=o8', 'bl>o8', 'bl=o8', 'bl<=o16', 'bl>o16', 'bl=o16', 'bl<=o24', 'bl>o24', 'bl=o24', 'bh<=o0', 'bh>o0', 'bh=o0', 'bh<=o8', 'bh>o8', 'bh=o8', 'bh<=o16', 'bh>o16', 'bh=o16', 'bh<=o24', 'bh>o24', 'bh=o24', 'b<=o0', 'b>o0', 'b=o0', 'b<=o16', 'b>o16', 'b=o16', 'l', 'h', # Reg_last 'o0', 'o1', 'o2', 'o3', 'o4', 'o5', 'o6', 'o7', 'o8', 'o9', 'o10', 'o11', 'o12', 'o13', 'o14', 'o15', 'o16', 'o17', 'o18', 'o19', 'o20', 'o21', 'o22', 'o23', 'o24', 'o25', 'o26', 'o27', 'o28', 'o29', 'o30', 'o31', ) inputs_ctrsel_set = ( # Counter reg 'a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9', 'a10', 'a11', 'a12', 'a13', 'a14', 'a15', 'b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8', 'b9', 'b10', 'b11', 'b12', 'b13', 'b14', 'b15', ) inputs_ctrsel_clr = ( # REG_MEM1 '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', ) # Note that the index of this depends on the selected LUT width. input_lut_bits = ( 'l0', 'l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'l8', 'l9', 'l10', 'l11', 'l12', 'l13', 'l14', 'l15', 'l16', 'l17', 'l18', 'l19', 'l20', 'l21', 'l22', 'l23', 'l24', 'l25', 'l26', 'l27', 'l28', 'l29', 'l30', 'l31', ) # Note where in the counter reg / mem1 reg region the LUT starts, if enabled lut_starts = {8: 24, 16: 16, 32: 0} lut_start = lut_starts[meta['lut_width_bits']] ret: Input = {'text': text, 'ele': ele, 'flags': {}} # Handle relative addressing rel_addr = False if text[-2:] == '+a': rel_addr = True text = text[:-2] # chop off the '+a' # Find in what list the input is, and process accordingly. if text in inputs: # These inputs are always accessible regardless of ctrsel/lutsel ret['input'] = inputs.index(text) elif text in inputs_ctrsel_set: # These inputs need ctrsel to be 1 i = inputs_ctrsel_set.index(text) ret['input'] = i + 32 ret['flags']['ctrsel'] = 1 if i >= lut_start: ret['flags']['lutsel'] = 0 elif text in inputs_ctrsel_clr: # These inputs need ctrsel to be 0. i = inputs_ctrsel_clr.index(text) ret['input'] = i + 32 ret['flags']['ctrsel'] = 0 if i >= lut_start: # These overlap the LUT. lutsel cannot be 1 when these are addressed. ret['flags']['lutsel'] = 0 elif text in input_lut_bits: # These inputs need lutsel to be 1. i = input_lut_bits.index(text) if i > meta['lut_width_bits']: raise bsasm_syntax_error( ele, f"'LUT input {text} referenced, but LUT is configured to be only {meta['lut_width_bits']} bits wide.", ) ret['input'] = i + 32 + lut_start ret['flags']['lutsel'] = 1 else: raise bsasm_syntax_error(ele, f"'Input {text} is not valid.") if ret['input'] >= 64 and rel_addr: raise bsasm_syntax_error( ele, f"'LUT input {text} cannot be relatively addressed." ) if ret['input'] < 64: ret['flags']['rel_addr'] = rel_addr return ret # Raises an error if two inputs can't be used at the same time def check_input_compatible(in1: Input, in2: Input) -> None: if 'flags' not in in1: return if 'flags' not in in2: return in1f = in1['flags'] in2f = in2['flags'] err = '' if 'rel_addr' in in1f and 'rel_addr' in in2f and in1f['rel_addr'] != in2f['rel_addr']: err = 'Cannot have these inputs as both relative and not-relative in the same instruction' if 'ctrsel' in in1f and 'ctrsel' in in2f and in1f['ctrsel'] != in2f['ctrsel']: err = 'Cannot have both counters/LUT as well as reg_mem1 inputs in the same instruction' if 'lutsel' in in1f and 'lutsel' in in2f and in1f['lutsel'] != in2f['lutsel']: err = 'With the selected LUT width, the LUT input overlaps the selected counter input' if err != '': raise bsasm_syntax_error(in1['ele'], in2['ele'], err) # returns the range as a list of mux selections, plus any ctrsel/lutsel/rel_addr bits needed # Returns a dictionary with the selected input in 'muxsel' plus a 'flags' dictionary. If a # rel_addr/lutsel/ctrsel key is in the 'flags' field, that bit must be set or cleared in # the instruction; if it's not set, the value of that bit doesn't matter for that input. def parse_input_range(ele: Element, text: str, meta: Dict[str, int]) -> List[Input]: # Validate the range and split into start and optionally end fields b = re.findall(r'^([a-z0-9><=+]+)(?:\.\.([a-z0-9<>=+]+))?$', text) if not b: raise bsasm_syntax_error( ele, f'{text} not a valid input selection or range of input selections)' ) start = parse_input(ele, b[0][0], meta) if b[0][1] != '': end = parse_input(ele, b[0][1], meta) else: end = start # Note: this function cannot parse 'weird' ranges, like 31..L1. That's why we limit the ranges # to not cross the 32-bit boundary, except when referring to reg_mem0/reg_mem1. This will # generally match what the user tries to do. if ( start['input'] < 32 and 32 <= end['input'] < 64 and ('flags' not in end or 'ctrsel' not in end['flags'] or end['flags']['ctrsel'] == 0) and ('flags' not in end or 'lutsel' not in end['flags'] or end['flags']['lutsel'] == 0) ): pass elif ( end['input'] < 32 and start['input'] >= 32 and start['input'] < 64 and ('flags' not in start or 'ctrsel' not in start['flags'] or start['flags']['ctrsel'] == 0) and ('flags' not in start or 'lutsel' not in start['flags'] or start['flags']['lutsel'] == 0) ): # in case a 'backwards' range like [35..5] is passed pass elif math.floor(start['input'] / 32) != math.floor(end['input'] / 32): errtxt = f'{text} is not a valid range of input selections. ' if 'flags' in start and 'lutsel' in start['flags'] and start['flags']['lutsel'] == 1 \ and ('flags' not in end or 'lutsel' not in end['flags']) and end['input'] < 32: errtxt += 'Did you forget an L at the end of the range? (e.g. L0..31 instead of L0..L31)' else: errtxt += 'Try splitting up the range.' raise bsasm_syntax_error(ele, errtxt) # The start and end *should* guaranteed to be compatible by now. Check anyway # to catch any errors. If this triggers an exception, we have a bug... check_input_compatible(start, end) flags: InputFlags = {} if 'rel_addr' in start['flags']: flags['rel_addr'] = start['flags']['rel_addr'] if 'rel_addr' in end['flags']: flags['rel_addr'] = end['flags']['rel_addr'] if 'ctrsel' in start['flags']: flags['ctrsel'] = start['flags']['ctrsel'] if 'ctrsel' in end['flags']: flags['ctrsel'] = end['flags']['ctrsel'] if 'lutsel' in start['flags']: flags['lutsel'] = start['flags']['lutsel'] if 'lutsel' in end['flags']: flags['lutsel'] = end['flags']['lutsel'] if start['input'] <= end['input']: r = range(start['input'], end['input'] + 1) else: r = range(start['input'], end['input'] - 1, -1) ret: List[Input] = [] for i in r: n: Input = {'muxsel': i, 'flags': flags, 'ele': ele} ret.append(n) return ret # Parse a numerical field into an int, between a given minimum and maximum. def parse_val(ele: Element, text: str, minimum: int, maximum: int) -> int: try: if text[:2] == '0x': n = int(text[2:], 16) elif text[:2] == '0b': n = int(text[2:], 2) else: n = int(text) except ValueError: raise bsasm_syntax_error(ele, f"'{text}' is not an integer") if n < minimum or n > maximum: raise bsasm_syntax_error( ele, f"'{text}' is out of range [{minimum}..{maximum}]" ) return n # Return an IP for a label text def resolve_label(ele: Element, text: str, labels: Dict[str, int]) -> int: if text in labels: return labels[text] # No match. We could technically also see if the label is a direct IP, but I think # that is more likely to be used erroneously than on purpose. If you read this and # disagree, feel free to file an issue :) raise bsasm_syntax_error(ele, f"'{text}': Label not found.") # Bitfields defining the instructions OP_LOOP = 0x2000000 OP_ADD = 0x0000000 OP_IF = 0x0010000 OP_IFN = 0x0020000 OP_LDCTD = 0x0030000 OP_LDCTI = 0x0040000 OP_ADDCTI = 0x0050000 def check_chip_supports_inst(chipcfg: Chipcfg, instgroup: str, ele: Element) -> None: if 'support_all' in chipcfg and chipcfg['support_all']: return if instgroup not in chipcfg['extra_instruction_groups']: name = chipcfg['chipname'] raise bsasm_syntax_error( ele, f'Chip {name} does not support this instruction' ) def add_op_to_inst(inst: Inst, op: Opcode, ele: Element) -> None: if 'op' in inst: raise bsasm_syntax_error( inst['op']['ele'], ele, f'Cannot have multiple opcodes in one instruction' ) op['ele'] = ele inst['op'] = op # Takes the elements generated by the parse routine and converts it to a # representation of the bits in the Bitscrambler program. def bsasm_assemble(elements: List[Element], chipcfg: Chipcfg) -> Tuple[List[Inst], Dict[str, int], List[int]]: # This assembler uses two passes: the first finds and resolves global # stuff, the second one encodes the actual instructions. # Set the meta-instruction values to their defaults meta: Dict[str, int] = {} for meta_inst_def in meta_inst_defs: meta[meta_inst_def['op']] = meta_inst_def['default'] # Pass 1a: find IPs for labels, mark meta instructions # ToDo: also resolve 'def' symbols here once we implement them ip = 0 ip_for_label: Dict[str, int] = {} inst_is_meta = False inst_start = True for ele in elements: if inst_start and is_meta(ele): # Start of meta-instruction (can only occur at first ele in instruction) inst_is_meta = True if inst_is_meta: ele['is_meta'] = True elif ele['is_label']: # Label. Record its IP. ip_for_label[ele['text']] = ip ele['is_meta'] = False else: ele['is_meta'] = False if ele['more_in_instruction']: inst_start = False else: # End of an instruction inst_start = True # mark next element as start of inst if (not ele['is_meta']) and (not ele['is_label']): ip += 1 inst_is_meta = False # Pass 1B: Collate and parse meta instructions inst_start = True for ele in elements: if inst_start and ele['is_meta']: if ele['text'][0:4] == 'cfg ': (key, val) = parse_meta_cfg(ele) meta[key] = val if ele['more_in_instruction']: raise bsasm_syntax_error( ele, 'garbage after cfg statement detected' ) inst_start = not ele['more_in_instruction'] # Pass 1C: parse LUT data instructions. We do this after the meta instructions pass # as it requires the size of the LUT to figure out min/max boundaries. # Note a lut can be written both as 'lut 1 2 3' as well as 'lut 1,2,3' so we need # to account for both cases. lut_minmax_vals = { 8: (-128, 255), 16: (-32768, 65537), 32: (-2147483648, 4294967296 - 1), } minmax = lut_minmax_vals[meta['lut_width_bits']] lut = [] is_lut = False for ele in elements: if ele['is_meta']: if is_lut: words = ele['text'].split(' ') for w in words: lut.append(parse_val(ele, w, minmax[0], minmax[1])) if ele['text'][0:4] == 'lut ': is_lut = True words = ele['text'].split(' ') for w in words[1:]: lut.append(parse_val(ele, w, minmax[0], minmax[1])) if not ele['more_in_instruction']: is_lut = False # Pass 2: Parse any instructions valid_read_write = [0, 8, 16, 32] insts: List[Inst] = [] def_inst: Inst = {'mux': {}} inst = copy.deepcopy(def_inst) op: Opcode for ele in elements: if not ele['is_meta'] and not ele['is_label']: words = ele['text'].lower().split(' ') if words[0] == 'set': # set (output) (mux input) check_arg_ct(ele, words, 3) outs = parse_output_range(ele, words[1]) ins = parse_input_range(ele, words[2], meta) if len(ins) != 1 and len(ins) != len(outs): raise bsasm_syntax_error(ele, 'ranges not the same length') i = 0 for out in outs: if out in inst['mux']: raise bsasm_syntax_error( ele, f'output {out} already set earlier in instruction' ) if len(ins) == 1: # set range input inst['mux'][out] = ins[0] else: # set range range inst['mux'][out] = ins[i] i = i + 1 elif words[0] == 'write': # Write x bits to output fifo check_arg_ct(ele, words, 2) no = parse_val(ele, words[1], 0, 32) if no not in valid_read_write: raise bsasm_syntax_error( ele, f'{no} is not a valid amount of bits to write' ) inst['write'] = no elif words[0] == 'read': # Read x bits from input fifo check_arg_ct(ele, words, 2) no = parse_val(ele, words[1], 0, 32) if no not in valid_read_write: raise bsasm_syntax_error( ele, f'{no} is not a valid amount of bits to write' ) inst['read'] = no elif re.match('loop[ab]', words[0]): # LOOPc end_val ctr_add tgt check_arg_ct(ele, words, 4) op = {'op': OP_LOOP, 'ele': ele} op['c'] = 1 if words[0][4] == 'b' else 0 op['end_val'] = parse_val(ele, words[1], -32768, 65535) & 0xffff op['ctr_add'] = parse_val(ele, words[2], -16, 15) & 31 op['tgt'] = resolve_label(ele, words[3], ip_for_label) add_op_to_inst(inst, op, ele) elif re.match('add[ab]([hl])?', words[0]): # ADDc[h|l] ctr_add check_arg_ct(ele, words, 2) op = {'op': OP_ADD, 'ele': ele} op['c'] = 1 if words[0][3] == 'b' else 0 if len(words[0]) == 4: op['h'] = 1 op['l'] = 1 else: op['h'] = 1 if words[0][4] == 'h' else 0 op['l'] = 1 if words[0][4] == 'l' else 0 op['ctr_add'] = parse_val(ele, words[1], -32768, 65535) & 0xffff add_op_to_inst(inst, op, ele) elif re.match('if(n)?', words[0]): # IF[N] ctl_cond_src tgt check_arg_ct(ele, words, 3) op = {'op': OP_IF if len(words[0]) == 2 else OP_IFN, 'ele': ele} op['ctl_cond_src'] = parse_input(ele, words[1], meta) op['tgt'] = resolve_label(ele, words[2], ip_for_label) add_op_to_inst(inst, op, ele) elif re.match('ldctd[ab]([hl])?', words[0]): # LDCTDc[h|l] ctr_set check_arg_ct(ele, words, 2) op = {'op': OP_LDCTD} op['c'] = 1 if words[0][5] == 'b' else 0 if len(words[0]) == 6: op['h'] = 1 op['l'] = 1 else: op['h'] = 1 if words[0][6] == 'h' else 0 op['l'] = 1 if words[0][6] == 'l' else 0 op['ctr_add'] = parse_val(ele, words[1], -32768, 65535) & 0xffff add_op_to_inst(inst, op, ele) elif re.match('ldcti[ab]([hl])?', words[0]): # LDCTIc[h|l] check_arg_ct(ele, words, 1) op = {'op': OP_LDCTI} op['c'] = 1 if words[0][5] == 'b' else 0 if len(words[0]) == 6: op['h'] = 1 op['l'] = 1 else: op['h'] = 1 if words[0][6] == 'h' else 0 op['l'] = 1 if words[0][6] == 'l' else 0 add_op_to_inst(inst, op, ele) elif re.match('addcti[ab]([hl])?$', words[0]): # ADDCTIc[h|l] check_chip_supports_inst(chipcfg, 'addcti', ele) check_arg_ct(ele, words, 1) op = {'op': OP_ADDCTI} op['c'] = 1 if words[0][6] == 'b' else 0 if len(words[0]) == 7: op['h'] = 1 op['l'] = 1 else: op['h'] = 1 if words[0][7] == 'h' else 0 op['l'] = 1 if words[0][7] == 'l' else 0 add_op_to_inst(inst, op, ele) elif re.match('jmp', words[0]): # JMP tgt. Pseudo-op, translates to 'IF h tgt' check_arg_ct(ele, words, 2) op = {'op': OP_IF} op['ctl_cond_src'] = parse_input(ele, 'h', meta) op['tgt'] = resolve_label(ele, words[1], ip_for_label) add_op_to_inst(inst, op, ele) elif re.match('nop', words[0]): # NOP. Pseudo-op, translates to ADDA 0 check_arg_ct(ele, words, 1) op = {'op': OP_ADD} op['h'] = 1 op['l'] = 1 op['ctr_add'] = 0 add_op_to_inst(inst, op, ele) else: raise bsasm_syntax_error(ele, 'unknown instruction') if ( (not ele['more_in_instruction']) and (not ele['is_label']) and (not ele['is_meta']) ): insts.append(inst) inst = copy.deepcopy(def_inst) return (insts, meta, lut) # Quick and dirty way to assemble a bytearray from a bunch of bitfields. # The implementation is not optimal as it handles data bit-by-bit, but it works fine. class bitstream: bitpos: int data: list curbyte: int def __init__(self) -> None: self.data = [] self.curbyte = 0 self.bitpos = 0 # Add a field of `bits` bits with the field having the value `val` at the end # of the bitstream def add_bits(self, val: int, bits: int) -> None: v = val for i in range(0, bits): self.curbyte = self.curbyte >> 1 if v & 1: self.curbyte = self.curbyte | 0x80 v = v >> 1 self.bitpos += 1 if self.bitpos == 8: self.bitpos = 0 self.data.append(self.curbyte) self.curbyte = 0 def to_bytearray(self) -> bytearray: return bytearray(self.data) def size(self) -> int: # Return size in bits return len(self.data) * 8 + self.bitpos # This encodes all the instructions into binary. def insts_to_binary(insts: List[Inst], meta: Dict[str, int], lut: list) -> bytearray: if len(insts) > 8: raise RuntimeError('Program has more than eight instructions.') ret = bytearray() # We need to reformat the LUT into 32-bit values, if not already in that format. lut_reformatted = [] if meta['lut_width_bits'] == 8: while (len(lut) % 4) != 0: lut.append(0) for i in range(0, len(lut), 4): v = lut[i] & 255 v += (lut[i + 1] & 255) << 8 v += (lut[i + 2] & 255) << 16 v += (lut[i + 3] & 255) << 24 lut_reformatted.append(v) elif meta['lut_width_bits'] == 16: while (len(lut) % 2) != 0: lut.append(0) for i in range(0, len(lut), 2): v = lut[i] & 65535 v += (lut[i + 1] & 65535) << 16 lut_reformatted.append(v) else: # 32-bit lut_reformatted = lut # Format of binary: # Header, with self-described length. Any fields that are known to the firmware # past this length will be assumed to be 0. # Instructions, padded to 36 bytes per instruction line. Amount of instructions is # defined in header. # LUT data, in 32-bit words. Length is defined in header. # Header. Note this should always be a multiple of 32 bytes. lut_width_vals = {8: 0, 16: 1, 32: 2} ret += struct.pack( ' str: try: with open(filename, 'r') as f: file_content = f.read() except OSError: print(f'Error opening {filename}: {sys.exc_info()[0]}') return file_content # Write a bytestring to a file def write_file(filename: str, data: bytearray) -> None: with open(filename, 'wb') as f: f.write(data) if __name__ == '__main__': parser = argparse.ArgumentParser( prog=sys.argv[0], description='BitScrambler program assembler') parser.add_argument('infile', help='File name of assembly source to be assembled into a binary') parser.add_argument('outfile', help='File name of output binary', nargs='?', default=argparse.SUPPRESS) parser.add_argument('-c', help='Set chip capabilities json file; if set, returns an error when \ an unsupported instruction is assembled', default=argparse.SUPPRESS) args = parser.parse_args() chipcfg = Chipcfg() if 'c' in args: with open(args.c) as chipcfg_json: chipcfg = json.load(chipcfg_json) else: chipcfg = {'chipname': 'chip', 'extra_instruction_groups': [], 'support_all': True} if 'outfile' in args: outfile = args.outfile else: outfile = re.sub('.bsasm', '', args.infile) + '.bsbin' asm = read_file(args.infile) tokens = bsasm_parse(asm) insts, meta, lut = bsasm_assemble(tokens, chipcfg) out_data = insts_to_binary(insts, meta, lut) write_file(outfile, out_data) print(f'Written {len(insts)} instructions and {len(lut)} 32-bit words of LUT.')