From e935ee319d32732b8404230388f26205279afc3b Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Wed, 1 Oct 2025 15:17:27 +0200 Subject: [PATCH] adds script for turning json output into asm macros --- process_json.py | 208 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 process_json.py diff --git a/process_json.py b/process_json.py new file mode 100644 index 0000000..9d58acc --- /dev/null +++ b/process_json.py @@ -0,0 +1,208 @@ +from abc import abstractmethod +import argparse +import json +from pathlib import Path +from typing import Union +from jsonschema import validate, ValidationError + +INSTRUCTION_SCHEMA = { + "type": "object", + "properties": { + "instructions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "decoding": {"type": "string"} + }, + "required": ["name", "decoding"] + } + } + }, + "required": ["instructions"] +} + + +class EncodingPart: + pos: int = 0 + size: int = 0 + + @abstractmethod + def coredsl(self) -> str: + pass + + +class Field(EncodingPart): + def get_bounds(self, s: str) -> tuple[int, int]: + if ":" in s: + end, start = s[s.index("["):].strip("[").rstrip("]").split(":") + else: + start = end = int(s[s.index("["):].strip("[").rstrip("]")) + return int(start), int(end) + + def __init__(self, s: str) -> None: + assert "[" in s and "]" in s + assert s.count("[") == 1 and s.count("]") + self.start, self.end = self.get_bounds(s) + self.size = self.end-self.start+1 + self.name = s[:s.index("[")] + + def __str__(self) -> str: + shift = " << " if self.pos >= self.start else ">>" + shamt = str(self.pos) + value_str = f"(\\{self.name} & 0b{self.size*'1'})" + if self.start: + value_str = f"(\\{self.name} & 0b{self.size*'1'}{self.start*'0'})" + shamt = str(abs(self.pos-self.start)) + return value_str + shift + shamt if self.pos-self.start else value_str + + def coredsl(self) -> str: + middle = f"[{self.end}:{self.start}]" if self.size > 1 else f"[{self.end}:{self.end}]" + return self.name + middle + + def __repr__(self) -> str: + return "Field: " + self.coredsl() + + +class Literal(EncodingPart): + def __init__(self, s: str) -> None: + assert len(s.replace("1", "").replace("0", "")) == 0 + self.value = int(s, 2) + self.size = len(s) + + def __str__(self) -> str: + shamt = str(self.pos) + value_str = format(self.value, f'#0{self.size+2}b') + return value_str + " << " + shamt if self.pos else value_str + + def coredsl(self) -> str: + return format(self.value, f'#0{self.size+2}b') + + def __repr__(self) -> str: + return "Literal: " + self.coredsl() + + +class Encoding: + name: str + parts: tuple[EncodingPart, ...] + + def __init__(self, d: dict) -> None: + s = d["decoding"] + parts = s.split("|") + idx = 0 + buf: list[EncodingPart] = [] + # reverse to determine position easier + for part in reversed(parts): + typed = Field(part) if "[" in part else Literal(part) + typed.pos = idx + idx += typed.size + buf.append(typed) + self.parts = tuple(reversed(buf)) + self.name = d["name"].upper() + + def _get_masked_enc(self) -> str: + masked_enc = "".join([bin(elem.value)[2:].zfill(elem.size) if isinstance( + elem, Literal) else elem.size*"x" for elem in self.parts]) + return masked_enc + + def _collapse_literals(self): + new_parts: list[EncodingPart] = [] + for part in self.parts: + if isinstance(part, Literal) and new_parts and isinstance(new_parts[-1], Literal): + new_value = (new_parts[-1].value << part.size) + part.value + new_size = new_parts[-1].size + part.size + combined_val = format(new_value, f'#0{new_size+2}b') + collapsed = Literal(combined_val[2:]) + new_parts[-1] = collapsed + else: + new_parts.append(part) + self.parts = tuple(new_parts) + + def valid_size(self, target_size: int = -1): + total_size = sum([part.size for part in self.parts]) + if target_size == -1: + enc_str = self._get_masked_enc() + if enc_str[-2:] != "11": + target_size = 16 + elif enc_str[-4:-2] == "111": + # ILEN >32 + target_size = -1 + else: + target_size = 32 + return total_size == target_size + + def create_macro(self) -> str: + def riscv_sort_key(name: str) -> tuple[int, Union[int, str]]: + if name == "rd": + return (0, 0) + elif name.startswith("rs") and name[2:].isdigit(): + return (1, int(name[2:])) + else: + return (2, name) + + field_names = [ + elem.name for elem in self.parts if isinstance(elem, Field)] + unique_field_names = list(dict.fromkeys(field_names)) + fields_str = ', '.join(sorted(unique_field_names, key=riscv_sort_key)) + header = f".macro {self.name}{',' if len(unique_field_names)> 0 else ''} {fields_str}" + indent = " " + + comment = "# Encoding parts: " + \ + " ".join([elem.coredsl() for elem in self.parts]) + self._collapse_literals() + strs = [str(elem) for elem in self.parts] + content = ".word " + " | ".join(strs) + + tail = ".endm" + return "\n".join([header, indent+comment, indent+content, tail]) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Generate assembler macros from CoreDSL2JSON output." + ) + parser.add_argument( + "path", + type=Path, + help="Path to the JSON file generated by CoreDSL2JSON." + ) + parser.add_argument( + "--name", + type=str, + help="Name of the instruction to generate the macro for (optional)." + ) + parser.add_argument( + "--size", + type=int, + default=-1, + help="Instruction size in bits. If not set, checks lowest bits of the instruction and determines size according to default RISC-V specification." + ) + return parser.parse_args() + + +def load_and_validate(path: Path): + data = json.loads(path.read_text()) + try: + validate(instance=data, schema=INSTRUCTION_SCHEMA) + except ValidationError as e: + raise ValueError(f"Invalid JSON format: {e.message}") from e + return data + + +def main(): + args = parse_args() + data = load_and_validate(args.path) + for instruction in data["instructions"]: + if args.name and args.name.upper() != instruction["name"]: + continue + enc = Encoding(instruction) + if not enc.valid_size(args.size): + print(f"Invalid size for {enc.name}") + continue + print(enc.create_macro()) + print() + + +if __name__ == "__main__": + main()