mirror of
https://github.com/espressif/esp-idf
synced 2025-03-08 23:59:08 -05:00
1012 lines
40 KiB
Python
Executable File
1012 lines
40 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
import argparse
|
|
import copy
|
|
import json
|
|
import math
|
|
import re
|
|
import struct
|
|
import sys
|
|
from typing import Any
|
|
from typing import cast
|
|
from typing import Dict
|
|
from typing import List
|
|
from typing import Tuple
|
|
from typing import Type
|
|
from typing import TypedDict
|
|
|
|
# Increase this if you change the on-disk binary output format of the BitScrambler so it's
|
|
# not compatible with previous versions
|
|
BITSCRAMBLER_BINARY_VER = 1
|
|
# If we have multiple BitScrambler versions, this'll indicate what hardware we're expecting
|
|
# to run on.
|
|
BITSCRAMBLER_HW_REV = 0
|
|
|
|
|
|
class Element(TypedDict, total=False):
|
|
more_in_instruction: bool
|
|
is_label: bool
|
|
is_meta: bool
|
|
text: str
|
|
line: int
|
|
column: int
|
|
|
|
|
|
class InputFlags(TypedDict, total=False):
|
|
rel_addr: int
|
|
ctrsel: int
|
|
lutsel: int
|
|
|
|
|
|
class Input(TypedDict, total=False):
|
|
text: str
|
|
ele: Element
|
|
input: int
|
|
muxsel: int
|
|
flags: InputFlags
|
|
|
|
|
|
class Opcode(TypedDict, total=False):
|
|
ele: Element
|
|
op: int
|
|
c: int
|
|
end_val: int
|
|
ctr_val: int
|
|
tgt: int
|
|
h: int
|
|
l: int
|
|
ctr_add: int
|
|
ctl_cond_src: Input
|
|
|
|
|
|
class Inst(TypedDict, total=False):
|
|
op: Opcode
|
|
mux: Dict[int, Input]
|
|
write: int
|
|
read: int
|
|
|
|
|
|
class Chipcfg(TypedDict, total=False):
|
|
chipname: str
|
|
extra_instruction_groups: List[str]
|
|
support_all: bool
|
|
|
|
|
|
# Parser.
|
|
# A bsasm file consists of labels, instruction bundles, meta-instructions
|
|
# and comments. Comments start at a # and run to a newline and will be
|
|
# ignored. Labels are the first word in a line followed by a colon.
|
|
# Meta-instructions start with 'cfg' and end at the end of the line;
|
|
# they cannot contain commas. Instruction bundles consist of multiple
|
|
# sub-instructions separated by a comma. An instruction bundle ends
|
|
# when there is a newline and the last non-whitespace character was
|
|
# not a comma. An element is defined as either a meta-instruction or a
|
|
# sub-instruction.
|
|
# This routine uses a state machine to keep track of what it has parsed.
|
|
# It handles comments separately as they essentially can be ignored by
|
|
# the rest of the state machine; that only needs to see the newline at
|
|
# the end of the sentence.
|
|
# The output of this routine is an array with element descriptions:
|
|
# line -> line the element starts on
|
|
# column -> column the element starts on
|
|
# text -> text of the element; no whitespace at the start or end and
|
|
# all whitespace between words changed to one single space
|
|
# more_in_instruction -> false if this element is a meta-instruction,
|
|
# label or at the end of an instruction bundle.
|
|
# is_label -> true if element is a label
|
|
# Note that this parser can't see the difference between a
|
|
# meta-instruction and an instruction in an instruction bundle: errors
|
|
# like a bundle containing a meta-instruction needs to be detected
|
|
# elsewhere.
|
|
|
|
# (Note that this handrolled parser might not be the best option wrt ease
|
|
# to understand and maintain, and that has been brought up by Espressif
|
|
# colleagues. I've spent some time trying to implement it as a PyParsing
|
|
# syntax, but it's very hard to get it to agree with the
|
|
# newline-is-sometimes-an-end-of-statement-and-sometimes-not. Antlr might
|
|
# be a better choice for that. However, rewriting this likely is more
|
|
# work than the ease of fixing bugs in the current parser warrants. If
|
|
# that assumption turns out to be very wrong and you're the unlucky
|
|
# soul tasked with fixing up this code, feel free to create an issue to
|
|
# rewrite this and assign it to me - Jeroen)
|
|
|
|
|
|
def bsasm_parse(src: str) -> List[Element]:
|
|
# Small hack: we trigger processing things on a newline. If a file is read without
|
|
# a newline at the end of the last instruction, we'd erroneously ignore the last element.
|
|
# Easiest way to fix it is to make sure the src always ends in a newline.
|
|
src = src + '\n'
|
|
# Define the various states
|
|
ST_WH_PRE = 0 # Whitespace before an instruction.
|
|
ST_ELEMENT = 1 # Inside a subinstruction or meta-instruction
|
|
ST_WH_IN_EL = 2 # Whitespace, but we're unsure if this is at the end of the element
|
|
ST_AFTER_COMMA = 3 # Encountered a comma, plus possibly whitespace
|
|
state = ST_WH_PRE
|
|
# We keep track of row/col for error reporting
|
|
line = 0
|
|
column = 0
|
|
elements: List[Element] = []
|
|
curr_element: Element = {}
|
|
in_comment = False # True if we're anywhere between a # and a newline.
|
|
for ch in src:
|
|
# We use these as flags later in the code to start or finish an element.
|
|
start_element = False
|
|
finish_element = False
|
|
|
|
if in_comment:
|
|
# If we're in a comment, go back to no-comment mode at the end of the line.
|
|
# We'll need to parse the newline, so this is not part of the big
|
|
# if statement below.
|
|
if ch == '\n':
|
|
in_comment = False
|
|
|
|
# Big statemachine handler depending on ch.
|
|
if in_comment:
|
|
# Ignore any character in comment
|
|
pass
|
|
elif ch == '#':
|
|
# Start of a comment.
|
|
in_comment = True
|
|
elif ch in [' ', '\t']:
|
|
# Whitespace. This can be before an element (ignored) or inside an element (might need
|
|
# to insert space if element continues after this)
|
|
if state == ST_ELEMENT:
|
|
state = ST_WH_IN_EL
|
|
elif ch == '\n' or ch == '\r':
|
|
# Newline. If not after a comma, this finishes the element. If after a comma,
|
|
# this can be ignored as whitespace-before-the-next-element.
|
|
if state == ST_ELEMENT or state == ST_WH_IN_EL:
|
|
finish_element = True
|
|
state = ST_WH_PRE
|
|
elif state == ST_AFTER_COMMA:
|
|
state = ST_WH_PRE
|
|
elif ch == ',':
|
|
# A comma. If this is at the end of an element, this finishes the element and
|
|
# prepares for more elements in the instruction.
|
|
if state == ST_ELEMENT or state == ST_WH_IN_EL:
|
|
curr_element['more_in_instruction'] = True
|
|
finish_element = True
|
|
state = ST_AFTER_COMMA
|
|
elif state == ST_AFTER_COMMA:
|
|
raise RuntimeError(
|
|
f'Line {line} column {column}: Empty subinstruction found'
|
|
)
|
|
elif state == ST_WH_PRE:
|
|
raise RuntimeError(f'Line {line} column {column}: Stray comma found')
|
|
elif ch == ':':
|
|
# This indicates the current element is a label; a colon is not used anywhere else.
|
|
if state == ST_ELEMENT:
|
|
# Check if label is before any instruction
|
|
if len(elements) == 0 or not elements[-1]['more_in_instruction']:
|
|
# Looks okay.
|
|
curr_element['is_label'] = True
|
|
finish_element = True
|
|
state = ST_WH_PRE
|
|
else:
|
|
raise RuntimeError(
|
|
f'Line {line} column {column}: Stray semicolon found'
|
|
)
|
|
else:
|
|
raise RuntimeError(f'Line {line} column {column}: Stray semicolon found')
|
|
else:
|
|
# Any other characters.
|
|
if state == ST_ELEMENT:
|
|
curr_element['text'] += ch
|
|
elif state == ST_WH_PRE or state == ST_AFTER_COMMA:
|
|
start_element = True
|
|
state = ST_ELEMENT
|
|
elif state == ST_WH_IN_EL:
|
|
curr_element['text'] += ' ' + ch
|
|
state = ST_ELEMENT
|
|
|
|
# Handle starting and finishing of elements
|
|
if start_element:
|
|
if 'line' in curr_element:
|
|
raise RuntimeError(
|
|
f'Line {line} column {column}: Internal error: Element started twice!'
|
|
)
|
|
curr_element['line'] = line
|
|
curr_element['column'] = column
|
|
curr_element['text'] = ch
|
|
curr_element['more_in_instruction'] = False
|
|
curr_element['is_label'] = False
|
|
if finish_element:
|
|
if 'line' not in curr_element:
|
|
raise RuntimeError(
|
|
f'Line {line} column {column}: Internal error: Element finished while none started'
|
|
)
|
|
elements.append(curr_element)
|
|
curr_element = {}
|
|
|
|
# Handle line and column counts
|
|
if ch == '\n':
|
|
line = line + 1
|
|
column = 0
|
|
else:
|
|
column = column + 1
|
|
return elements
|
|
|
|
|
|
# Specific syntax error exception. Reports details about the element[s] to make debugging
|
|
# assembly sources easier.
|
|
|
|
class bsasm_syntax_error(Exception):
|
|
def __new__(cls: Type['bsasm_syntax_error'], *args: str, **kwargs: str) -> 'bsasm_syntax_error': # noqa: F821
|
|
return cast(bsasm_syntax_error, super().__new__(cls))
|
|
|
|
def __init__(self, *args: Any) -> None: # noqa: F821
|
|
if len(args) == 2:
|
|
ele = args[0]
|
|
message = args[1]
|
|
self.msg = 'Line {} column {}: "{}": {}'.format(ele['line'], ele['column'], ele['text'], message)
|
|
else:
|
|
ele1 = args[0]
|
|
ele2 = args[1]
|
|
message = args[1]
|
|
self.msg = 'Line {} col {}: "{}" and line {} col {}: "{}": {}'.format(ele1['line'],
|
|
ele1['column'],
|
|
ele1['text'],
|
|
ele2['line'],
|
|
ele2['column'],
|
|
ele2['text'],
|
|
message)
|
|
|
|
def __str__(self) -> str: # noqa: F821
|
|
return self.msg
|
|
|
|
|
|
# Definition of possible meta 'cfg' commands
|
|
class Meta_inst_def(TypedDict, total=False):
|
|
op: str
|
|
default: int
|
|
enum: Dict[str, int]
|
|
min: int
|
|
max: int
|
|
|
|
|
|
meta_inst_defs: List[Meta_inst_def] = [
|
|
# RX_FETCH_MODE: 0 - on startup fill M0/M1, 1 - don't
|
|
{'op': 'prefetch', 'default': 1, 'enum': {'true': 1, 'false': 0, '1': 1, '0': 0}},
|
|
# Amount of bytes read from input or written to output (depending on eof_on)
|
|
# after EOF on input before we send an EOF to the output.
|
|
{'op': 'trailing_bytes', 'default': 0, 'min': 0, 'max': 8192},
|
|
# Source where 'trailing' counts the bytes after input EOF before generating an output EOF
|
|
{'op': 'eof_on', 'default': 1, 'enum': {'upstream': 1, 'downstream': 0}},
|
|
# Width, in bits, of the LUT memory
|
|
{'op': 'lut_width_bits', 'default': 32, 'enum': {'8': 8, '16': 16, '32': 32}},
|
|
]
|
|
|
|
|
|
# Check if element is a meta element
|
|
def is_meta(ele: Element) -> bool:
|
|
if ele['text'].lower()[0:4] == 'cfg ':
|
|
return True
|
|
if ele['text'].lower()[0:4] == 'lut ':
|
|
return True
|
|
return False
|
|
|
|
|
|
# Parse a config meta-instruction: check if the values are within range and convert from enums to values
|
|
def parse_meta_cfg(ele: Element) -> Tuple[str, int]:
|
|
words = ele['text'].lower().split(' ')
|
|
meta_key = ''
|
|
if len(words) != 3:
|
|
raise bsasm_syntax_error(ele, f'too many arguments to cfg statement')
|
|
for meta_inst_def in meta_inst_defs:
|
|
if meta_inst_def['op'] == words[1]:
|
|
if 'enum' in meta_inst_def:
|
|
if words[2] in meta_inst_def['enum']:
|
|
meta_key = words[1]
|
|
meta_value = meta_inst_def['enum'][words[2]]
|
|
else:
|
|
raise bsasm_syntax_error(
|
|
ele, f'{words[2]} is not an allowed value for {words[1]}'
|
|
)
|
|
else:
|
|
v = parse_val(ele, words[2], meta_inst_def['min'], meta_inst_def['max'])
|
|
meta_key = words[1]
|
|
meta_value = v
|
|
if meta_key == '':
|
|
raise bsasm_syntax_error(
|
|
ele, f'{words[1]} is not a recognized meta-instruction'
|
|
)
|
|
return (meta_key, meta_value)
|
|
|
|
|
|
# Check the number of arguments an element has vs what it should have
|
|
def check_arg_ct(ele: Element, words: list, ct: int) -> None:
|
|
if len(words) != ct:
|
|
raise bsasm_syntax_error(ele, 'invalid number of arguments')
|
|
|
|
|
|
# Parses a textual range like '1..10' into an actual Python range
|
|
def parse_output_range(ele: Element, text: str) -> range:
|
|
b = re.findall(r'^([0-9]+)(?:\.\.([0-9]+))?$', text)
|
|
if not b:
|
|
raise bsasm_syntax_error(ele, f'{text} not a valid integer or range)')
|
|
start = int(b[0][0])
|
|
if b[0][1] != '':
|
|
end = int(b[0][1])
|
|
else:
|
|
end = int(b[0][0])
|
|
if start < 0 or end < 0 or start > 31 or end > 31:
|
|
raise bsasm_syntax_error(ele, f"'{text}' is not a valid integer or range)")
|
|
if start <= end:
|
|
return range(start, end + 1)
|
|
else:
|
|
return range(start, end - 1, -1)
|
|
|
|
|
|
# Resolve an input to a mux selection number and CTL_LUT_SEL/CTL_SRC_SEL/rel_addr
|
|
# settings, if those need those to be in a specific state.
|
|
def parse_input(ele: Element, text: str, meta: Dict[str, int]) -> Input:
|
|
# Note that strings in the input def arrays need to be lower case.
|
|
inputs = (
|
|
# REG_MEM0
|
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15',
|
|
'16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31',
|
|
# This region is multiplexed using SRC_SEL and LUT_SEL
|
|
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
|
|
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
|
|
# AUX
|
|
'bl<=o0', 'bl>o0', 'bl=o0', 'bl<=o8', 'bl>o8', 'bl=o8', 'bl<=o16', 'bl>o16',
|
|
'bl=o16', 'bl<=o24', 'bl>o24', 'bl=o24', 'bh<=o0', 'bh>o0', 'bh=o0', 'bh<=o8',
|
|
'bh>o8', 'bh=o8', 'bh<=o16', 'bh>o16', 'bh=o16', 'bh<=o24', 'bh>o24', 'bh=o24',
|
|
'b<=o0', 'b>o0', 'b=o0', 'b<=o16', 'b>o16', 'b=o16', 'l', 'h',
|
|
# Reg_last
|
|
'o0', 'o1', 'o2', 'o3', 'o4', 'o5', 'o6', 'o7', 'o8', 'o9', 'o10', 'o11', 'o12', 'o13', 'o14', 'o15',
|
|
'o16', 'o17', 'o18', 'o19', 'o20', 'o21', 'o22', 'o23', 'o24', 'o25', 'o26', 'o27', 'o28', 'o29', 'o30', 'o31',
|
|
)
|
|
inputs_ctrsel_set = (
|
|
# Counter reg
|
|
'a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 'a8', 'a9', 'a10', 'a11', 'a12', 'a13', 'a14', 'a15',
|
|
'b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8', 'b9', 'b10', 'b11', 'b12', 'b13', 'b14', 'b15',
|
|
)
|
|
inputs_ctrsel_clr = (
|
|
# REG_MEM1
|
|
'32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47',
|
|
'48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63',
|
|
)
|
|
# Note that the index of this depends on the selected LUT width.
|
|
input_lut_bits = (
|
|
'l0', 'l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'l8', 'l9', 'l10', 'l11', 'l12', 'l13', 'l14', 'l15',
|
|
'l16', 'l17', 'l18', 'l19', 'l20', 'l21', 'l22', 'l23', 'l24', 'l25', 'l26', 'l27', 'l28', 'l29', 'l30', 'l31',
|
|
)
|
|
|
|
# Note where in the counter reg / mem1 reg region the LUT starts, if enabled
|
|
lut_starts = {8: 24, 16: 16, 32: 0}
|
|
lut_start = lut_starts[meta['lut_width_bits']]
|
|
|
|
ret: Input = {'text': text, 'ele': ele, 'flags': {}}
|
|
# Handle relative addressing
|
|
rel_addr = False
|
|
if text[-2:] == '+a':
|
|
rel_addr = True
|
|
text = text[:-2] # chop off the '+a'
|
|
|
|
# Find in what list the input is, and process accordingly.
|
|
if text in inputs:
|
|
# These inputs are always accessible regardless of ctrsel/lutsel
|
|
ret['input'] = inputs.index(text)
|
|
elif text in inputs_ctrsel_set:
|
|
# These inputs need ctrsel to be 1
|
|
i = inputs_ctrsel_set.index(text)
|
|
ret['input'] = i + 32
|
|
ret['flags']['ctrsel'] = 1
|
|
if i >= lut_start:
|
|
ret['flags']['lutsel'] = 0
|
|
elif text in inputs_ctrsel_clr:
|
|
# These inputs need ctrsel to be 0.
|
|
i = inputs_ctrsel_clr.index(text)
|
|
ret['input'] = i + 32
|
|
ret['flags']['ctrsel'] = 0
|
|
if i >= lut_start:
|
|
# These overlap the LUT. lutsel cannot be 1 when these are addressed.
|
|
ret['flags']['lutsel'] = 0
|
|
elif text in input_lut_bits:
|
|
# These inputs need lutsel to be 1.
|
|
i = input_lut_bits.index(text)
|
|
if i > meta['lut_width_bits']:
|
|
raise bsasm_syntax_error(
|
|
ele,
|
|
f"'LUT input {text} referenced, but LUT is configured to be only {meta['lut_width_bits']} bits wide.",
|
|
)
|
|
ret['input'] = i + 32 + lut_start
|
|
ret['flags']['lutsel'] = 1
|
|
else:
|
|
raise bsasm_syntax_error(ele, f"'Input {text} is not valid.")
|
|
|
|
if ret['input'] >= 64 and rel_addr:
|
|
raise bsasm_syntax_error(
|
|
ele, f"'LUT input {text} cannot be relatively addressed."
|
|
)
|
|
|
|
if ret['input'] < 64:
|
|
ret['flags']['rel_addr'] = rel_addr
|
|
|
|
return ret
|
|
|
|
|
|
# Raises an error if two inputs can't be used at the same time
|
|
def check_input_compatible(in1: Input, in2: Input) -> None:
|
|
if 'flags' not in in1:
|
|
return
|
|
if 'flags' not in in2:
|
|
return
|
|
in1f = in1['flags']
|
|
in2f = in2['flags']
|
|
err = ''
|
|
if 'rel_addr' in in1f and 'rel_addr' in in2f and in1f['rel_addr'] != in2f['rel_addr']:
|
|
err = 'Cannot have these inputs as both relative and not-relative in the same instruction'
|
|
if 'ctrsel' in in1f and 'ctrsel' in in2f and in1f['ctrsel'] != in2f['ctrsel']:
|
|
err = 'Cannot have both counters/LUT as well as reg_mem1 inputs in the same instruction'
|
|
if 'lutsel' in in1f and 'lutsel' in in2f and in1f['lutsel'] != in2f['lutsel']:
|
|
err = 'With the selected LUT width, the LUT input overlaps the selected counter input'
|
|
if err != '':
|
|
raise bsasm_syntax_error(in1['ele'], in2['ele'], err)
|
|
|
|
|
|
# returns the range as a list of mux selections, plus any ctrsel/lutsel/rel_addr bits needed
|
|
# Returns a dictionary with the selected input in 'muxsel' plus a 'flags' dictionary. If a
|
|
# rel_addr/lutsel/ctrsel key is in the 'flags' field, that bit must be set or cleared in
|
|
# the instruction; if it's not set, the value of that bit doesn't matter for that input.
|
|
def parse_input_range(ele: Element, text: str, meta: Dict[str, int]) -> List[Input]:
|
|
# Validate the range and split into start and optionally end fields
|
|
b = re.findall(r'^([a-z0-9><=+]+)(?:\.\.([a-z0-9<>=+]+))?$', text)
|
|
if not b:
|
|
raise bsasm_syntax_error(
|
|
ele, f'{text} not a valid input selection or range of input selections)'
|
|
)
|
|
start = parse_input(ele, b[0][0], meta)
|
|
if b[0][1] != '':
|
|
end = parse_input(ele, b[0][1], meta)
|
|
else:
|
|
end = start
|
|
|
|
# Note: this function cannot parse 'weird' ranges, like 31..L1. That's why we limit the ranges
|
|
# to not cross the 32-bit boundary, except when referring to reg_mem0/reg_mem1. This will
|
|
# generally match what the user tries to do.
|
|
if (
|
|
start['input'] < 32
|
|
and 32 <= end['input'] < 64
|
|
and ('flags' not in end or 'ctrsel' not in end['flags'] or end['flags']['ctrsel'] == 0)
|
|
and ('flags' not in end or 'lutsel' not in end['flags'] or end['flags']['lutsel'] == 0)
|
|
):
|
|
pass
|
|
elif (
|
|
end['input'] < 32
|
|
and start['input'] >= 32
|
|
and start['input'] < 64
|
|
and ('flags' not in start or 'ctrsel' not in start['flags'] or start['flags']['ctrsel'] == 0)
|
|
and ('flags' not in start or 'lutsel' not in start['flags'] or start['flags']['lutsel'] == 0)
|
|
):
|
|
# in case a 'backwards' range like [35..5] is passed
|
|
pass
|
|
elif math.floor(start['input'] / 32) != math.floor(end['input'] / 32):
|
|
errtxt = f'{text} is not a valid range of input selections. '
|
|
if 'flags' in start and 'lutsel' in start['flags'] and start['flags']['lutsel'] == 1 \
|
|
and ('flags' not in end or 'lutsel' not in end['flags']) and end['input'] < 32:
|
|
errtxt += 'Did you forget an L at the end of the range? (e.g. L0..31 instead of L0..L31)'
|
|
else:
|
|
errtxt += 'Try splitting up the range.'
|
|
raise bsasm_syntax_error(ele, errtxt)
|
|
|
|
# The start and end *should* guaranteed to be compatible by now. Check anyway
|
|
# to catch any errors. If this triggers an exception, we have a bug...
|
|
check_input_compatible(start, end)
|
|
|
|
flags: InputFlags = {}
|
|
if 'rel_addr' in start['flags']:
|
|
flags['rel_addr'] = start['flags']['rel_addr']
|
|
if 'rel_addr' in end['flags']:
|
|
flags['rel_addr'] = end['flags']['rel_addr']
|
|
if 'ctrsel' in start['flags']:
|
|
flags['ctrsel'] = start['flags']['ctrsel']
|
|
if 'ctrsel' in end['flags']:
|
|
flags['ctrsel'] = end['flags']['ctrsel']
|
|
if 'lutsel' in start['flags']:
|
|
flags['lutsel'] = start['flags']['lutsel']
|
|
if 'lutsel' in end['flags']:
|
|
flags['lutsel'] = end['flags']['lutsel']
|
|
|
|
if start['input'] <= end['input']:
|
|
r = range(start['input'], end['input'] + 1)
|
|
else:
|
|
r = range(start['input'], end['input'] - 1, -1)
|
|
ret: List[Input] = []
|
|
for i in r:
|
|
n: Input = {'muxsel': i, 'flags': flags, 'ele': ele}
|
|
ret.append(n)
|
|
return ret
|
|
|
|
|
|
# Parse a numerical field into an int, between a given minimum and maximum.
|
|
def parse_val(ele: Element, text: str, minimum: int, maximum: int) -> int:
|
|
try:
|
|
if text[:2] == '0x':
|
|
n = int(text[2:], 16)
|
|
elif text[:2] == '0b':
|
|
n = int(text[2:], 2)
|
|
else:
|
|
n = int(text)
|
|
except ValueError:
|
|
raise bsasm_syntax_error(ele, f"'{text}' is not an integer")
|
|
if n < minimum or n > maximum:
|
|
raise bsasm_syntax_error(
|
|
ele, f"'{text}' is out of range [{minimum}..{maximum}]"
|
|
)
|
|
return n
|
|
|
|
|
|
# Return an IP for a label text
|
|
def resolve_label(ele: Element, text: str, labels: Dict[str, int]) -> int:
|
|
if text in labels:
|
|
return labels[text]
|
|
# No match. We could technically also see if the label is a direct IP, but I think
|
|
# that is more likely to be used erroneously than on purpose. If you read this and
|
|
# disagree, feel free to file an issue :)
|
|
raise bsasm_syntax_error(ele, f"'{text}': Label not found.")
|
|
|
|
|
|
# Bitfields defining the instructions
|
|
OP_LOOP = 0x2000000
|
|
OP_ADD = 0x0000000
|
|
OP_IF = 0x0010000
|
|
OP_IFN = 0x0020000
|
|
OP_LDCTD = 0x0030000
|
|
OP_LDCTI = 0x0040000
|
|
OP_ADDCTI = 0x0050000
|
|
|
|
|
|
def check_chip_supports_inst(chipcfg: Chipcfg, instgroup: str, ele: Element) -> None:
|
|
if 'support_all' in chipcfg and chipcfg['support_all']:
|
|
return
|
|
|
|
if instgroup not in chipcfg['extra_instruction_groups']:
|
|
name = chipcfg['chipname']
|
|
raise bsasm_syntax_error(
|
|
ele, f'Chip {name} does not support this instruction'
|
|
)
|
|
|
|
|
|
def add_op_to_inst(inst: Inst, op: Opcode, ele: Element) -> None:
|
|
if 'op' in inst:
|
|
raise bsasm_syntax_error(
|
|
inst['op']['ele'], ele, f'Cannot have multiple opcodes in one instruction'
|
|
)
|
|
op['ele'] = ele
|
|
inst['op'] = op
|
|
|
|
|
|
# Takes the elements generated by the parse routine and converts it to a
|
|
# representation of the bits in the Bitscrambler program.
|
|
def bsasm_assemble(elements: List[Element], chipcfg: Chipcfg) -> Tuple[List[Inst], Dict[str, int], List[int]]:
|
|
# This assembler uses two passes: the first finds and resolves global
|
|
# stuff, the second one encodes the actual instructions.
|
|
|
|
# Set the meta-instruction values to their defaults
|
|
meta: Dict[str, int] = {}
|
|
for meta_inst_def in meta_inst_defs:
|
|
meta[meta_inst_def['op']] = meta_inst_def['default']
|
|
|
|
# Pass 1a: find IPs for labels, mark meta instructions
|
|
# ToDo: also resolve 'def' symbols here once we implement them
|
|
ip = 0
|
|
ip_for_label: Dict[str, int] = {}
|
|
inst_is_meta = False
|
|
inst_start = True
|
|
for ele in elements:
|
|
if inst_start and is_meta(ele):
|
|
# Start of meta-instruction (can only occur at first ele in instruction)
|
|
inst_is_meta = True
|
|
|
|
if inst_is_meta:
|
|
ele['is_meta'] = True
|
|
elif ele['is_label']:
|
|
# Label. Record its IP.
|
|
ip_for_label[ele['text']] = ip
|
|
ele['is_meta'] = False
|
|
else:
|
|
ele['is_meta'] = False
|
|
|
|
if ele['more_in_instruction']:
|
|
inst_start = False
|
|
else:
|
|
# End of an instruction
|
|
inst_start = True # mark next element as start of inst
|
|
if (not ele['is_meta']) and (not ele['is_label']):
|
|
ip += 1
|
|
inst_is_meta = False
|
|
|
|
# Pass 1B: Collate and parse meta instructions
|
|
inst_start = True
|
|
for ele in elements:
|
|
if inst_start and ele['is_meta']:
|
|
if ele['text'][0:4] == 'cfg ':
|
|
(key, val) = parse_meta_cfg(ele)
|
|
meta[key] = val
|
|
if ele['more_in_instruction']:
|
|
raise bsasm_syntax_error(
|
|
ele, 'garbage after cfg statement detected'
|
|
)
|
|
inst_start = not ele['more_in_instruction']
|
|
|
|
# Pass 1C: parse LUT data instructions. We do this after the meta instructions pass
|
|
# as it requires the size of the LUT to figure out min/max boundaries.
|
|
# Note a lut can be written both as 'lut 1 2 3' as well as 'lut 1,2,3' so we need
|
|
# to account for both cases.
|
|
lut_minmax_vals = {
|
|
8: (-128, 255),
|
|
16: (-32768, 65537),
|
|
32: (-2147483648, 4294967296 - 1),
|
|
}
|
|
minmax = lut_minmax_vals[meta['lut_width_bits']]
|
|
lut = []
|
|
is_lut = False
|
|
for ele in elements:
|
|
if ele['is_meta']:
|
|
if is_lut:
|
|
words = ele['text'].split(' ')
|
|
for w in words:
|
|
lut.append(parse_val(ele, w, minmax[0], minmax[1]))
|
|
if ele['text'][0:4] == 'lut ':
|
|
is_lut = True
|
|
words = ele['text'].split(' ')
|
|
for w in words[1:]:
|
|
lut.append(parse_val(ele, w, minmax[0], minmax[1]))
|
|
if not ele['more_in_instruction']:
|
|
is_lut = False
|
|
|
|
# Pass 2: Parse any instructions
|
|
valid_read_write = [0, 8, 16, 32]
|
|
insts: List[Inst] = []
|
|
def_inst: Inst = {'mux': {}}
|
|
inst = copy.deepcopy(def_inst)
|
|
op: Opcode
|
|
for ele in elements:
|
|
if not ele['is_meta'] and not ele['is_label']:
|
|
words = ele['text'].lower().split(' ')
|
|
if words[0] == 'set':
|
|
# set (output) (mux input)
|
|
check_arg_ct(ele, words, 3)
|
|
outs = parse_output_range(ele, words[1])
|
|
ins = parse_input_range(ele, words[2], meta)
|
|
if len(ins) != 1 and len(ins) != len(outs):
|
|
raise bsasm_syntax_error(ele, 'ranges not the same length')
|
|
i = 0
|
|
for out in outs:
|
|
if out in inst['mux']:
|
|
raise bsasm_syntax_error(
|
|
ele, f'output {out} already set earlier in instruction'
|
|
)
|
|
if len(ins) == 1:
|
|
# set range input
|
|
inst['mux'][out] = ins[0]
|
|
else:
|
|
# set range range
|
|
inst['mux'][out] = ins[i]
|
|
i = i + 1
|
|
elif words[0] == 'write':
|
|
# Write x bits to output fifo
|
|
check_arg_ct(ele, words, 2)
|
|
no = parse_val(ele, words[1], 0, 32)
|
|
if no not in valid_read_write:
|
|
raise bsasm_syntax_error(
|
|
ele, f'{no} is not a valid amount of bits to write'
|
|
)
|
|
inst['write'] = no
|
|
elif words[0] == 'read':
|
|
# Read x bits from input fifo
|
|
check_arg_ct(ele, words, 2)
|
|
no = parse_val(ele, words[1], 0, 32)
|
|
if no not in valid_read_write:
|
|
raise bsasm_syntax_error(
|
|
ele, f'{no} is not a valid amount of bits to write'
|
|
)
|
|
inst['read'] = no
|
|
elif re.match('loop[ab]', words[0]):
|
|
# LOOPc end_val ctr_add tgt
|
|
check_arg_ct(ele, words, 4)
|
|
op = {'op': OP_LOOP, 'ele': ele}
|
|
op['c'] = 1 if words[0][4] == 'b' else 0
|
|
op['end_val'] = parse_val(ele, words[1], -32768, 65535) & 0xffff
|
|
op['ctr_add'] = parse_val(ele, words[2], -16, 15) & 31
|
|
op['tgt'] = resolve_label(ele, words[3], ip_for_label)
|
|
add_op_to_inst(inst, op, ele)
|
|
elif re.match('add[ab]([hl])?', words[0]):
|
|
# ADDc[h|l] ctr_add
|
|
check_arg_ct(ele, words, 2)
|
|
op = {'op': OP_ADD, 'ele': ele}
|
|
op['c'] = 1 if words[0][3] == 'b' else 0
|
|
if len(words[0]) == 4:
|
|
op['h'] = 1
|
|
op['l'] = 1
|
|
else:
|
|
op['h'] = 1 if words[0][4] == 'h' else 0
|
|
op['l'] = 1 if words[0][4] == 'l' else 0
|
|
op['ctr_add'] = parse_val(ele, words[1], -32768, 65535) & 0xffff
|
|
add_op_to_inst(inst, op, ele)
|
|
elif re.match('if(n)?', words[0]):
|
|
# IF[N] ctl_cond_src tgt
|
|
check_arg_ct(ele, words, 3)
|
|
op = {'op': OP_IF if len(words[0]) == 2 else OP_IFN, 'ele': ele}
|
|
op['ctl_cond_src'] = parse_input(ele, words[1], meta)
|
|
op['tgt'] = resolve_label(ele, words[2], ip_for_label)
|
|
add_op_to_inst(inst, op, ele)
|
|
elif re.match('ldctd[ab]([hl])?', words[0]):
|
|
# LDCTDc[h|l] ctr_set
|
|
check_arg_ct(ele, words, 2)
|
|
op = {'op': OP_LDCTD}
|
|
op['c'] = 1 if words[0][5] == 'b' else 0
|
|
if len(words[0]) == 6:
|
|
op['h'] = 1
|
|
op['l'] = 1
|
|
else:
|
|
op['h'] = 1 if words[0][6] == 'h' else 0
|
|
op['l'] = 1 if words[0][6] == 'l' else 0
|
|
op['ctr_add'] = parse_val(ele, words[1], -32768, 65535) & 0xffff
|
|
add_op_to_inst(inst, op, ele)
|
|
elif re.match('ldcti[ab]([hl])?', words[0]):
|
|
# LDCTIc[h|l]
|
|
check_arg_ct(ele, words, 1)
|
|
op = {'op': OP_LDCTI}
|
|
op['c'] = 1 if words[0][5] == 'b' else 0
|
|
if len(words[0]) == 6:
|
|
op['h'] = 1
|
|
op['l'] = 1
|
|
else:
|
|
op['h'] = 1 if words[0][6] == 'h' else 0
|
|
op['l'] = 1 if words[0][6] == 'l' else 0
|
|
add_op_to_inst(inst, op, ele)
|
|
elif re.match('addcti[ab]([hl])?$', words[0]):
|
|
# ADDCTIc[h|l]
|
|
check_chip_supports_inst(chipcfg, 'addcti', ele)
|
|
check_arg_ct(ele, words, 1)
|
|
op = {'op': OP_ADDCTI}
|
|
op['c'] = 1 if words[0][6] == 'b' else 0
|
|
if len(words[0]) == 7:
|
|
op['h'] = 1
|
|
op['l'] = 1
|
|
else:
|
|
op['h'] = 1 if words[0][7] == 'h' else 0
|
|
op['l'] = 1 if words[0][7] == 'l' else 0
|
|
add_op_to_inst(inst, op, ele)
|
|
elif re.match('jmp', words[0]):
|
|
# JMP tgt. Pseudo-op, translates to 'IF h tgt'
|
|
check_arg_ct(ele, words, 2)
|
|
op = {'op': OP_IF}
|
|
op['ctl_cond_src'] = parse_input(ele, 'h', meta)
|
|
op['tgt'] = resolve_label(ele, words[1], ip_for_label)
|
|
add_op_to_inst(inst, op, ele)
|
|
elif re.match('nop', words[0]):
|
|
# NOP. Pseudo-op, translates to ADDA 0
|
|
check_arg_ct(ele, words, 1)
|
|
op = {'op': OP_ADD}
|
|
op['h'] = 1
|
|
op['l'] = 1
|
|
op['ctr_add'] = 0
|
|
add_op_to_inst(inst, op, ele)
|
|
else:
|
|
raise bsasm_syntax_error(ele, 'unknown instruction')
|
|
|
|
if (
|
|
(not ele['more_in_instruction'])
|
|
and (not ele['is_label'])
|
|
and (not ele['is_meta'])
|
|
):
|
|
insts.append(inst)
|
|
inst = copy.deepcopy(def_inst)
|
|
return (insts, meta, lut)
|
|
|
|
|
|
# Quick and dirty way to assemble a bytearray from a bunch of bitfields.
|
|
# The implementation is not optimal as it handles data bit-by-bit, but it works fine.
|
|
|
|
|
|
class bitstream:
|
|
bitpos: int
|
|
data: list
|
|
curbyte: int
|
|
|
|
def __init__(self) -> None:
|
|
self.data = []
|
|
self.curbyte = 0
|
|
self.bitpos = 0
|
|
|
|
# Add a field of `bits` bits with the field having the value `val` at the end
|
|
# of the bitstream
|
|
def add_bits(self, val: int, bits: int) -> None:
|
|
v = val
|
|
for i in range(0, bits):
|
|
self.curbyte = self.curbyte >> 1
|
|
if v & 1:
|
|
self.curbyte = self.curbyte | 0x80
|
|
v = v >> 1
|
|
self.bitpos += 1
|
|
if self.bitpos == 8:
|
|
self.bitpos = 0
|
|
self.data.append(self.curbyte)
|
|
self.curbyte = 0
|
|
|
|
def to_bytearray(self) -> bytearray:
|
|
return bytearray(self.data)
|
|
|
|
def size(self) -> int:
|
|
# Return size in bits
|
|
return len(self.data) * 8 + self.bitpos
|
|
|
|
|
|
# This encodes all the instructions into binary.
|
|
def insts_to_binary(insts: List[Inst], meta: Dict[str, int], lut: list) -> bytearray:
|
|
if len(insts) > 8:
|
|
raise RuntimeError('Program has more than eight instructions.')
|
|
ret = bytearray()
|
|
|
|
# We need to reformat the LUT into 32-bit values, if not already in that format.
|
|
lut_reformatted = []
|
|
if meta['lut_width_bits'] == 8:
|
|
while (len(lut) % 4) != 0:
|
|
lut.append(0)
|
|
for i in range(0, len(lut), 4):
|
|
v = lut[i] & 255
|
|
v += (lut[i + 1] & 255) << 8
|
|
v += (lut[i + 2] & 255) << 16
|
|
v += (lut[i + 3] & 255) << 24
|
|
lut_reformatted.append(v)
|
|
elif meta['lut_width_bits'] == 16:
|
|
while (len(lut) % 2) != 0:
|
|
lut.append(0)
|
|
for i in range(0, len(lut), 2):
|
|
v = lut[i] & 65535
|
|
v += (lut[i + 1] & 65535) << 16
|
|
lut_reformatted.append(v)
|
|
else: # 32-bit
|
|
lut_reformatted = lut
|
|
|
|
# Format of binary:
|
|
# Header, with self-described length. Any fields that are known to the firmware
|
|
# past this length will be assumed to be 0.
|
|
# Instructions, padded to 36 bytes per instruction line. Amount of instructions is
|
|
# defined in header.
|
|
# LUT data, in 32-bit words. Length is defined in header.
|
|
|
|
# Header. Note this should always be a multiple of 32 bytes.
|
|
lut_width_vals = {8: 0, 16: 1, 32: 2}
|
|
ret += struct.pack(
|
|
'<BBBBHBBHBB',
|
|
BITSCRAMBLER_BINARY_VER, # byte
|
|
BITSCRAMBLER_HW_REV, # byte
|
|
3, # byte: Length of header in 32-bit words
|
|
len(insts), # byte: Instruction count
|
|
len(lut_reformatted), # short: Length of LUT in 32-bit words
|
|
lut_width_vals[meta['lut_width_bits']], # byte: LUT width setting (0, 1, 2)
|
|
meta['prefetch'], # byte: prefetch enabled/disabled
|
|
meta['trailing_bytes'] * 8, # short: number of trailing *bits* after eof
|
|
meta['eof_on'], # byte
|
|
0,
|
|
) # byte: unused for now
|
|
|
|
for inst in insts:
|
|
bits = bitstream()
|
|
# If the opcode also needs a source, we add it to the mux list as the 32th input
|
|
if 'op' in inst and 'ctl_cond_src' in inst['op']:
|
|
inst['mux'][32] = inst['op']['ctl_cond_src']
|
|
|
|
# Check if mux bits are compatible and figure out flags needed
|
|
# Also set unused mux lines to 'l'
|
|
# Finally, insert mux bits into the bitstream.
|
|
flags = {'rel_addr': 0, 'ctrsel': 0, 'lutsel': 0}
|
|
|
|
for i in range(0, 33):
|
|
if i in inst['mux']:
|
|
# This could be optimized, but checking each input against each other input
|
|
# allows us to easily tell the user exactly which inputs clash.
|
|
for j in range(i + 1, 33):
|
|
if j in inst['mux']:
|
|
check_input_compatible(inst['mux'][i], inst['mux'][j])
|
|
if 'flags' in inst['mux'][i]:
|
|
if 'rel_addr' in inst['mux'][i]['flags']:
|
|
flags['rel_addr'] = inst['mux'][i]['flags']['rel_addr']
|
|
if 'ctrsel' in inst['mux'][i]['flags']:
|
|
flags['ctrsel'] = inst['mux'][i]['flags']['ctrsel']
|
|
if 'lutsel' in inst['mux'][i]['flags']:
|
|
flags['lutsel'] = inst['mux'][i]['flags']['lutsel']
|
|
if i < 32:
|
|
bits.add_bits(inst['mux'][i]['muxsel'], 7)
|
|
else:
|
|
# Input mux bit is undefined in the program. Set it to a
|
|
# fixed-low input.
|
|
high_input = parse_input({}, 'l', {'lut_width_bits': 8})
|
|
if i < 32:
|
|
bits.add_bits(high_input['input'], 7)
|
|
|
|
# Encode the opcode
|
|
opcode = 0
|
|
if 'op' not in inst:
|
|
# Default to NOP, which is encoded as ADDA 0
|
|
inst['op'] = {'op': OP_ADD, 'h': 1, 'l': 1, 'ctr_add': 0}
|
|
if 'op' in inst['op']:
|
|
opcode = inst['op']['op']
|
|
if 'c' in inst['op']:
|
|
opcode = opcode | ((1 << 24) if inst['op']['c'] else 0)
|
|
if 'h' in inst['op']:
|
|
opcode = opcode | ((1 << 23) if inst['op']['h'] else 0)
|
|
if 'l' in inst['op']:
|
|
opcode = opcode | ((1 << 22) if inst['op']['l'] else 0)
|
|
if 'tgt' in inst['op']:
|
|
opcode = opcode | (inst['op']['tgt'] << 21)
|
|
if 'end_val' in inst['op']:
|
|
opcode = opcode | (inst['op']['end_val'] << 5)
|
|
if 'ctr_add' in inst['op']: # also aliased to ctr_set
|
|
opcode = opcode | (inst['op']['ctr_add'] << 0)
|
|
if 'ctl_cond_src' in inst['op']:
|
|
opcode = opcode | (inst['op']['ctl_cond_src']['input'] << 0)
|
|
# Add the rest of the fields: read, write, source sel and reladr
|
|
bits.add_bits(opcode, 26)
|
|
val_for_read_write = {0: 0, 8: 1, 16: 2, 32: 3}
|
|
if 'read' not in inst:
|
|
inst['read'] = 0
|
|
bits.add_bits(val_for_read_write[inst['read']], 2)
|
|
if 'write' not in inst:
|
|
inst['write'] = 0
|
|
bits.add_bits(val_for_read_write[inst['write']], 2)
|
|
bits.add_bits(flags['rel_addr'], 1)
|
|
bits.add_bits(flags['ctrsel'], 1)
|
|
bits.add_bits(flags['lutsel'], 1)
|
|
if bits.size() != 257:
|
|
raise RuntimeError(f'Internal error: instruction size is {bits.size()}!')
|
|
# Pad instruction field to 36 bytes = 9 32-bit words
|
|
bits.add_bits(0, 31)
|
|
ret += bits.to_bytearray()
|
|
|
|
for i in lut_reformatted:
|
|
ret += struct.pack('<I', i & 0xffffffff)
|
|
|
|
return ret
|
|
|
|
|
|
# Return the contents of a file
|
|
def read_file(filename: str) -> str:
|
|
try:
|
|
with open(filename, 'r') as f:
|
|
file_content = f.read()
|
|
except OSError:
|
|
print(f'Error opening {filename}: {sys.exc_info()[0]}')
|
|
return file_content
|
|
|
|
|
|
# Write a bytestring to a file
|
|
def write_file(filename: str, data: bytearray) -> None:
|
|
with open(filename, 'wb') as f:
|
|
f.write(data)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(
|
|
prog=sys.argv[0],
|
|
description='BitScrambler program assembler')
|
|
parser.add_argument('infile', help='File name of assembly source to be assembled into a binary')
|
|
parser.add_argument('outfile', help='File name of output binary', nargs='?', default=argparse.SUPPRESS)
|
|
parser.add_argument('-c', help='Set chip capabilities json file; if set, returns an error when \
|
|
an unsupported instruction is assembled', default=argparse.SUPPRESS)
|
|
args = parser.parse_args()
|
|
|
|
chipcfg = Chipcfg()
|
|
if 'c' in args:
|
|
with open(args.c) as chipcfg_json:
|
|
chipcfg = json.load(chipcfg_json)
|
|
else:
|
|
chipcfg = {'chipname': 'chip', 'extra_instruction_groups': [], 'support_all': True}
|
|
|
|
if 'outfile' in args:
|
|
outfile = args.outfile
|
|
else:
|
|
outfile = re.sub('.bsasm', '', args.infile) + '.bsbin'
|
|
asm = read_file(args.infile)
|
|
tokens = bsasm_parse(asm)
|
|
insts, meta, lut = bsasm_assemble(tokens, chipcfg)
|
|
out_data = insts_to_binary(insts, meta, lut)
|
|
write_file(outfile, out_data)
|
|
print(f'Written {len(insts)} instructions and {len(lut)} 32-bit words of LUT.')
|