This commit is contained in:
Waylon Walker 2022-03-31 20:20:07 -05:00
commit 38355d2442
No known key found for this signature in database
GPG key ID: 66E2BF2B4190EFE4
9083 changed files with 1225834 additions and 0 deletions

View file

@ -0,0 +1,427 @@
"""Intermediate representation of classes."""
from typing import List, Optional, Set, Tuple, Dict, NamedTuple
from mypy.backports import OrderedDict
from mypyc.common import JsonDict
from mypyc.ir.ops import Value, DeserMaps
from mypyc.ir.rtypes import RType, RInstance, deserialize_type
from mypyc.ir.func_ir import FuncIR, FuncDecl, FuncSignature
from mypyc.namegen import NameGenerator, exported_name
from mypyc.common import PROPSET_PREFIX
# Some notes on the vtable layout: Each concrete class has a vtable
# that contains function pointers for its methods. So that subclasses
# may be efficiently used when their parent class is expected, the
# layout of child vtables must be an extension of their base class's
# vtable.
#
# This makes multiple inheritance tricky, since obviously we cannot be
# an extension of multiple parent classes. We solve this by requiring
# all but one parent to be "traits", which we can operate on in a
# somewhat less efficient way. For each trait implemented by a class,
# we generate a separate vtable for the methods in that trait.
# We then store an array of (trait type, trait vtable) pointers alongside
# a class's main vtable. When we want to call a trait method, we
# (at runtime!) search the array of trait vtables to find the correct one,
# then call through it.
# Trait vtables additionally need entries for attribute getters and setters,
# since they can't always be in the same location.
#
# To keep down the number of indirections necessary, we store the
# array of trait vtables in the memory *before* the class vtable, and
# search it backwards. (This is a trick we can only do once---there
# are only two directions to store data in---but I don't think we'll
# need it again.)
# There are some tricks we could try in the future to store the trait
# vtables inline in the trait table (which would cut down one indirection),
# but this seems good enough for now.
#
# As an example:
# Imagine that we have a class B that inherits from a concrete class A
# and traits T1 and T2, and that A has methods foo() and
# bar() and B overrides bar() with a more specific type.
# Then B's vtable will look something like:
#
# T1 type object
# ptr to B's T1 trait vtable
# T2 type object
# ptr to B's T2 trait vtable
# -> | A.foo
# | Glue function that converts between A.bar's type and B.bar
# B.bar
# B.baz
#
# The arrow points to the "start" of the vtable (what vtable pointers
# point to) and the bars indicate which parts correspond to the parent
# class A's vtable layout.
#
# Classes that allow interpreted code to subclass them also have a
# "shadow vtable" that contains implementations that delegate to
# making a pycall, so that overridden methods in interpreted children
# will be called. (A better strategy could dynamically generate these
# vtables based on which methods are overridden in the children.)
# Descriptions of method and attribute entries in class vtables.
# The 'cls' field is the class that the method/attr was defined in,
# which might be a parent class.
# The 'shadow_method', if present, contains the method that should be
# placed in the class's shadow vtable (if it has one).
VTableMethod = NamedTuple(
'VTableMethod', [('cls', 'ClassIR'),
('name', str),
('method', FuncIR),
('shadow_method', Optional[FuncIR])])
VTableEntries = List[VTableMethod]
class ClassIR:
"""Intermediate representation of a class.
This also describes the runtime structure of native instances.
"""
def __init__(self, name: str, module_name: str, is_trait: bool = False,
is_generated: bool = False, is_abstract: bool = False,
is_ext_class: bool = True) -> None:
self.name = name
self.module_name = module_name
self.is_trait = is_trait
self.is_generated = is_generated
self.is_abstract = is_abstract
self.is_ext_class = is_ext_class
# An augmented class has additional methods separate from what mypyc generates.
# Right now the only one is dataclasses.
self.is_augmented = False
# Does this inherit from a Python class?
self.inherits_python = False
# Do instances of this class have __dict__?
self.has_dict = False
# Do we allow interpreted subclasses? Derived from a mypyc_attr.
self.allow_interpreted_subclasses = False
# Does this class need getseters to be generated for its attributes? (getseters are also
# added if is_generated is False)
self.needs_getseters = False
# If this a subclass of some built-in python class, the name
# of the object for that class. We currently only support this
# in a few ad-hoc cases.
self.builtin_base: Optional[str] = None
# Default empty constructor
self.ctor = FuncDecl(name, None, module_name, FuncSignature([], RInstance(self)))
self.attributes: OrderedDict[str, RType] = OrderedDict()
# Deletable attributes
self.deletable: List[str] = []
# We populate method_types with the signatures of every method before
# we generate methods, and we rely on this information being present.
self.method_decls: OrderedDict[str, FuncDecl] = OrderedDict()
# Map of methods that are actually present in an extension class
self.methods: OrderedDict[str, FuncIR] = OrderedDict()
# Glue methods for boxing/unboxing when a class changes the type
# while overriding a method. Maps from (parent class overridden, method)
# to IR of glue method.
self.glue_methods: Dict[Tuple[ClassIR, str], FuncIR] = OrderedDict()
# Properties are accessed like attributes, but have behavior like method calls.
# They don't belong in the methods dictionary, since we don't want to expose them to
# Python's method API. But we want to put them into our own vtable as methods, so that
# they are properly handled and overridden. The property dictionary values are a tuple
# containing a property getter and an optional property setter.
self.properties: OrderedDict[str, Tuple[FuncIR, Optional[FuncIR]]] = OrderedDict()
# We generate these in prepare_class_def so that we have access to them when generating
# other methods and properties that rely on these types.
self.property_types: OrderedDict[str, RType] = OrderedDict()
self.vtable: Optional[Dict[str, int]] = None
self.vtable_entries: VTableEntries = []
self.trait_vtables: OrderedDict[ClassIR, VTableEntries] = OrderedDict()
# N.B: base might not actually quite be the direct base.
# It is the nearest concrete base, but we allow a trait in between.
self.base: Optional[ClassIR] = None
self.traits: List[ClassIR] = []
# Supply a working mro for most generated classes. Real classes will need to
# fix it up.
self.mro: List[ClassIR] = [self]
# base_mro is the chain of concrete (non-trait) ancestors
self.base_mro: List[ClassIR] = [self]
# Direct subclasses of this class (use subclasses() to also include non-direct ones)
# None if separate compilation prevents this from working
self.children: Optional[List[ClassIR]] = []
def __repr__(self) -> str:
return (
"ClassIR("
"name={self.name}, module_name={self.module_name}, "
"is_trait={self.is_trait}, is_generated={self.is_generated}, "
"is_abstract={self.is_abstract}, is_ext_class={self.is_ext_class}"
")".format(self=self))
@property
def fullname(self) -> str:
return "{}.{}".format(self.module_name, self.name)
def real_base(self) -> Optional['ClassIR']:
"""Return the actual concrete base class, if there is one."""
if len(self.mro) > 1 and not self.mro[1].is_trait:
return self.mro[1]
return None
def vtable_entry(self, name: str) -> int:
assert self.vtable is not None, "vtable not computed yet"
assert name in self.vtable, '%r has no attribute %r' % (self.name, name)
return self.vtable[name]
def attr_details(self, name: str) -> Tuple[RType, 'ClassIR']:
for ir in self.mro:
if name in ir.attributes:
return ir.attributes[name], ir
if name in ir.property_types:
return ir.property_types[name], ir
raise KeyError('%r has no attribute %r' % (self.name, name))
def attr_type(self, name: str) -> RType:
return self.attr_details(name)[0]
def method_decl(self, name: str) -> FuncDecl:
for ir in self.mro:
if name in ir.method_decls:
return ir.method_decls[name]
raise KeyError('%r has no attribute %r' % (self.name, name))
def method_sig(self, name: str) -> FuncSignature:
return self.method_decl(name).sig
def has_method(self, name: str) -> bool:
try:
self.method_decl(name)
except KeyError:
return False
return True
def is_method_final(self, name: str) -> bool:
subs = self.subclasses()
if subs is None:
# TODO: Look at the final attribute!
return False
if self.has_method(name):
method_decl = self.method_decl(name)
for subc in subs:
if subc.method_decl(name) != method_decl:
return False
return True
else:
return not any(subc.has_method(name) for subc in subs)
def has_attr(self, name: str) -> bool:
try:
self.attr_type(name)
except KeyError:
return False
return True
def is_deletable(self, name: str) -> bool:
for ir in self.mro:
if name in ir.deletable:
return True
return False
def name_prefix(self, names: NameGenerator) -> str:
return names.private_name(self.module_name, self.name)
def struct_name(self, names: NameGenerator) -> str:
return '{}Object'.format(exported_name(self.fullname))
def get_method_and_class(self, name: str) -> Optional[Tuple[FuncIR, 'ClassIR']]:
for ir in self.mro:
if name in ir.methods:
return ir.methods[name], ir
return None
def get_method(self, name: str) -> Optional[FuncIR]:
res = self.get_method_and_class(name)
return res[0] if res else None
def subclasses(self) -> Optional[Set['ClassIR']]:
"""Return all subclasses of this class, both direct and indirect.
Return None if it is impossible to identify all subclasses, for example
because we are performing separate compilation.
"""
if self.children is None or self.allow_interpreted_subclasses:
return None
result = set(self.children)
for child in self.children:
if child.children:
child_subs = child.subclasses()
if child_subs is None:
return None
result.update(child_subs)
return result
def concrete_subclasses(self) -> Optional[List['ClassIR']]:
"""Return all concrete (i.e. non-trait and non-abstract) subclasses.
Include both direct and indirect subclasses. Place classes with no children first.
"""
subs = self.subclasses()
if subs is None:
return None
concrete = {c for c in subs if not (c.is_trait or c.is_abstract)}
# We place classes with no children first because they are more likely
# to appear in various isinstance() checks. We then sort leaves by name
# to get stable order.
return sorted(concrete, key=lambda c: (len(c.children or []), c.name))
def serialize(self) -> JsonDict:
return {
'name': self.name,
'module_name': self.module_name,
'is_trait': self.is_trait,
'is_ext_class': self.is_ext_class,
'is_abstract': self.is_abstract,
'is_generated': self.is_generated,
'is_augmented': self.is_augmented,
'inherits_python': self.inherits_python,
'has_dict': self.has_dict,
'allow_interpreted_subclasses': self.allow_interpreted_subclasses,
'needs_getseters': self.needs_getseters,
'builtin_base': self.builtin_base,
'ctor': self.ctor.serialize(),
# We serialize dicts as lists to ensure order is preserved
'attributes': [(k, t.serialize()) for k, t in self.attributes.items()],
# We try to serialize a name reference, but if the decl isn't in methods
# then we can't be sure that will work so we serialize the whole decl.
'method_decls': [(k, d.id if k in self.methods else d.serialize())
for k, d in self.method_decls.items()],
# We serialize method fullnames out and put methods in a separate dict
'methods': [(k, m.id) for k, m in self.methods.items()],
'glue_methods': [
((cir.fullname, k), m.id)
for (cir, k), m in self.glue_methods.items()
],
# We serialize properties and property_types separately out of an
# abundance of caution about preserving dict ordering...
'property_types': [(k, t.serialize()) for k, t in self.property_types.items()],
'properties': list(self.properties),
'vtable': self.vtable,
'vtable_entries': serialize_vtable(self.vtable_entries),
'trait_vtables': [
(cir.fullname, serialize_vtable(v)) for cir, v in self.trait_vtables.items()
],
# References to class IRs are all just names
'base': self.base.fullname if self.base else None,
'traits': [cir.fullname for cir in self.traits],
'mro': [cir.fullname for cir in self.mro],
'base_mro': [cir.fullname for cir in self.base_mro],
'children': [
cir.fullname for cir in self.children
] if self.children is not None else None,
}
@classmethod
def deserialize(cls, data: JsonDict, ctx: 'DeserMaps') -> 'ClassIR':
fullname = data['module_name'] + '.' + data['name']
assert fullname in ctx.classes, "Class %s not in deser class map" % fullname
ir = ctx.classes[fullname]
ir.is_trait = data['is_trait']
ir.is_generated = data['is_generated']
ir.is_abstract = data['is_abstract']
ir.is_ext_class = data['is_ext_class']
ir.is_augmented = data['is_augmented']
ir.inherits_python = data['inherits_python']
ir.has_dict = data['has_dict']
ir.allow_interpreted_subclasses = data['allow_interpreted_subclasses']
ir.needs_getseters = data['needs_getseters']
ir.builtin_base = data['builtin_base']
ir.ctor = FuncDecl.deserialize(data['ctor'], ctx)
ir.attributes = OrderedDict(
(k, deserialize_type(t, ctx)) for k, t in data['attributes']
)
ir.method_decls = OrderedDict((k, ctx.functions[v].decl
if isinstance(v, str) else FuncDecl.deserialize(v, ctx))
for k, v in data['method_decls'])
ir.methods = OrderedDict((k, ctx.functions[v]) for k, v in data['methods'])
ir.glue_methods = OrderedDict(
((ctx.classes[c], k), ctx.functions[v]) for (c, k), v in data['glue_methods']
)
ir.property_types = OrderedDict(
(k, deserialize_type(t, ctx)) for k, t in data['property_types']
)
ir.properties = OrderedDict(
(k, (ir.methods[k], ir.methods.get(PROPSET_PREFIX + k))) for k in data['properties']
)
ir.vtable = data['vtable']
ir.vtable_entries = deserialize_vtable(data['vtable_entries'], ctx)
ir.trait_vtables = OrderedDict(
(ctx.classes[k], deserialize_vtable(v, ctx)) for k, v in data['trait_vtables']
)
base = data['base']
ir.base = ctx.classes[base] if base else None
ir.traits = [ctx.classes[s] for s in data['traits']]
ir.mro = [ctx.classes[s] for s in data['mro']]
ir.base_mro = [ctx.classes[s] for s in data['base_mro']]
ir.children = data['children'] and [ctx.classes[s] for s in data['children']]
return ir
class NonExtClassInfo:
"""Information needed to construct a non-extension class (Python class).
Includes the class dictionary, a tuple of base classes,
the class annotations dictionary, and the metaclass.
"""
def __init__(self, dict: Value, bases: Value, anns: Value, metaclass: Value) -> None:
self.dict = dict
self.bases = bases
self.anns = anns
self.metaclass = metaclass
def serialize_vtable_entry(entry: VTableMethod) -> JsonDict:
return {
'.class': 'VTableMethod',
'cls': entry.cls.fullname,
'name': entry.name,
'method': entry.method.decl.id,
'shadow_method': entry.shadow_method.decl.id if entry.shadow_method else None,
}
def serialize_vtable(vtable: VTableEntries) -> List[JsonDict]:
return [serialize_vtable_entry(v) for v in vtable]
def deserialize_vtable_entry(data: JsonDict, ctx: 'DeserMaps') -> VTableMethod:
if data['.class'] == 'VTableMethod':
return VTableMethod(
ctx.classes[data['cls']], data['name'], ctx.functions[data['method']],
ctx.functions[data['shadow_method']] if data['shadow_method'] else None)
assert False, "Bogus vtable .class: %s" % data['.class']
def deserialize_vtable(data: List[JsonDict], ctx: 'DeserMaps') -> VTableEntries:
return [deserialize_vtable_entry(x, ctx) for x in data]
def all_concrete_classes(class_ir: ClassIR) -> Optional[List[ClassIR]]:
"""Return all concrete classes among the class itself and its subclasses."""
concrete = class_ir.concrete_subclasses()
if concrete is None:
return None
if not (class_ir.is_abstract or class_ir.is_trait):
concrete.append(class_ir)
return concrete

View file

@ -0,0 +1,315 @@
"""Intermediate representation of functions."""
from typing import List, Optional, Sequence
from typing_extensions import Final
from mypy.nodes import FuncDef, Block, ArgKind, ARG_POS
from mypyc.common import JsonDict, get_id_from_name, short_id_from_name
from mypyc.ir.ops import (
DeserMaps, BasicBlock, Value, Register, Assign, AssignMulti, ControlOp, LoadAddress
)
from mypyc.ir.rtypes import RType, deserialize_type
from mypyc.namegen import NameGenerator
class RuntimeArg:
"""Description of a function argument in IR.
Argument kind is one of ARG_* constants defined in mypy.nodes.
"""
def __init__(
self, name: str, typ: RType, kind: ArgKind = ARG_POS, pos_only: bool = False) -> None:
self.name = name
self.type = typ
self.kind = kind
self.pos_only = pos_only
@property
def optional(self) -> bool:
return self.kind.is_optional()
def __repr__(self) -> str:
return 'RuntimeArg(name=%s, type=%s, optional=%r, pos_only=%r)' % (
self.name, self.type, self.optional, self.pos_only)
def serialize(self) -> JsonDict:
return {'name': self.name, 'type': self.type.serialize(), 'kind': int(self.kind.value),
'pos_only': self.pos_only}
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> 'RuntimeArg':
return RuntimeArg(
data['name'],
deserialize_type(data['type'], ctx),
ArgKind(data['kind']),
data['pos_only'],
)
class FuncSignature:
"""Signature of a function in IR."""
# TODO: Track if method?
def __init__(self, args: Sequence[RuntimeArg], ret_type: RType) -> None:
self.args = tuple(args)
self.ret_type = ret_type
def __repr__(self) -> str:
return 'FuncSignature(args=%r, ret=%r)' % (self.args, self.ret_type)
def serialize(self) -> JsonDict:
return {'args': [t.serialize() for t in self.args], 'ret_type': self.ret_type.serialize()}
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> 'FuncSignature':
return FuncSignature(
[RuntimeArg.deserialize(arg, ctx) for arg in data['args']],
deserialize_type(data['ret_type'], ctx),
)
FUNC_NORMAL: Final = 0
FUNC_STATICMETHOD: Final = 1
FUNC_CLASSMETHOD: Final = 2
class FuncDecl:
"""Declaration of a function in IR (without body or implementation).
A function can be a regular module-level function, a method, a
static method, a class method, or a property getter/setter.
"""
def __init__(self,
name: str,
class_name: Optional[str],
module_name: str,
sig: FuncSignature,
kind: int = FUNC_NORMAL,
is_prop_setter: bool = False,
is_prop_getter: bool = False) -> None:
self.name = name
self.class_name = class_name
self.module_name = module_name
self.sig = sig
self.kind = kind
self.is_prop_setter = is_prop_setter
self.is_prop_getter = is_prop_getter
if class_name is None:
self.bound_sig: Optional[FuncSignature] = None
else:
if kind == FUNC_STATICMETHOD:
self.bound_sig = sig
else:
self.bound_sig = FuncSignature(sig.args[1:], sig.ret_type)
# this is optional because this will be set to the line number when the corresponding
# FuncIR is created
self._line: Optional[int] = None
@property
def line(self) -> int:
assert self._line is not None
return self._line
@line.setter
def line(self, line: int) -> None:
self._line = line
@property
def id(self) -> str:
assert self.line is not None
return get_id_from_name(self.name, self.fullname, self.line)
@staticmethod
def compute_shortname(class_name: Optional[str], name: str) -> str:
return class_name + '.' + name if class_name else name
@property
def shortname(self) -> str:
return FuncDecl.compute_shortname(self.class_name, self.name)
@property
def fullname(self) -> str:
return self.module_name + '.' + self.shortname
def cname(self, names: NameGenerator) -> str:
partial_name = short_id_from_name(self.name, self.shortname, self._line)
return names.private_name(self.module_name, partial_name)
def serialize(self) -> JsonDict:
return {
'name': self.name,
'class_name': self.class_name,
'module_name': self.module_name,
'sig': self.sig.serialize(),
'kind': self.kind,
'is_prop_setter': self.is_prop_setter,
'is_prop_getter': self.is_prop_getter,
}
# TODO: move this to FuncIR?
@staticmethod
def get_id_from_json(func_ir: JsonDict) -> str:
"""Get the id from the serialized FuncIR associated with this FuncDecl"""
decl = func_ir['decl']
shortname = FuncDecl.compute_shortname(decl['class_name'], decl['name'])
fullname = decl['module_name'] + '.' + shortname
return get_id_from_name(decl['name'], fullname, func_ir['line'])
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> 'FuncDecl':
return FuncDecl(
data['name'],
data['class_name'],
data['module_name'],
FuncSignature.deserialize(data['sig'], ctx),
data['kind'],
data['is_prop_setter'],
data['is_prop_getter'],
)
class FuncIR:
"""Intermediate representation of a function with contextual information.
Unlike FuncDecl, this includes the IR of the body (basic blocks).
"""
def __init__(self,
decl: FuncDecl,
arg_regs: List[Register],
blocks: List[BasicBlock],
line: int = -1,
traceback_name: Optional[str] = None) -> None:
# Declaration of the function, including the signature
self.decl = decl
# Registers for all the arguments to the function
self.arg_regs = arg_regs
# Body of the function
self.blocks = blocks
self.decl.line = line
# The name that should be displayed for tracebacks that
# include this function. Function will be omitted from
# tracebacks if None.
self.traceback_name = traceback_name
@property
def line(self) -> int:
return self.decl.line
@property
def args(self) -> Sequence[RuntimeArg]:
return self.decl.sig.args
@property
def ret_type(self) -> RType:
return self.decl.sig.ret_type
@property
def class_name(self) -> Optional[str]:
return self.decl.class_name
@property
def sig(self) -> FuncSignature:
return self.decl.sig
@property
def name(self) -> str:
return self.decl.name
@property
def fullname(self) -> str:
return self.decl.fullname
@property
def id(self) -> str:
return self.decl.id
def cname(self, names: NameGenerator) -> str:
return self.decl.cname(names)
def __repr__(self) -> str:
if self.class_name:
return '<FuncIR {}.{}>'.format(self.class_name, self.name)
else:
return '<FuncIR {}>'.format(self.name)
def serialize(self) -> JsonDict:
# We don't include blocks in the serialized version
return {
'decl': self.decl.serialize(),
'line': self.line,
'traceback_name': self.traceback_name,
}
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> 'FuncIR':
return FuncIR(
FuncDecl.deserialize(data['decl'], ctx),
[],
[],
data['line'],
data['traceback_name'],
)
INVALID_FUNC_DEF: Final = FuncDef("<INVALID_FUNC_DEF>", [], Block([]))
def all_values(args: List[Register], blocks: List[BasicBlock]) -> List[Value]:
"""Return the set of all values that may be initialized in the blocks.
This omits registers that are only read.
"""
values: List[Value] = list(args)
seen_registers = set(args)
for block in blocks:
for op in block.ops:
if not isinstance(op, ControlOp):
if isinstance(op, (Assign, AssignMulti)):
if op.dest not in seen_registers:
values.append(op.dest)
seen_registers.add(op.dest)
elif op.is_void:
continue
else:
# If we take the address of a register, it might get initialized.
if (isinstance(op, LoadAddress)
and isinstance(op.src, Register)
and op.src not in seen_registers):
values.append(op.src)
seen_registers.add(op.src)
values.append(op)
return values
def all_values_full(args: List[Register], blocks: List[BasicBlock]) -> List[Value]:
"""Return set of all values that are initialized or accessed."""
values: List[Value] = list(args)
seen_registers = set(args)
for block in blocks:
for op in block.ops:
for source in op.sources():
# Look for uninitialized registers that are accessed. Ignore
# non-registers since we don't allow ops outside basic blocks.
if isinstance(source, Register) and source not in seen_registers:
values.append(source)
seen_registers.add(source)
if not isinstance(op, ControlOp):
if isinstance(op, (Assign, AssignMulti)):
if op.dest not in seen_registers:
values.append(op.dest)
seen_registers.add(op.dest)
elif op.is_void:
continue
else:
values.append(op)
return values

View file

@ -0,0 +1,84 @@
"""Intermediate representation of modules."""
from typing import List, Tuple, Dict
from mypyc.common import JsonDict
from mypyc.ir.ops import DeserMaps
from mypyc.ir.rtypes import RType, deserialize_type
from mypyc.ir.func_ir import FuncIR, FuncDecl
from mypyc.ir.class_ir import ClassIR
class ModuleIR:
"""Intermediate representation of a module."""
def __init__(
self,
fullname: str,
imports: List[str],
functions: List[FuncIR],
classes: List[ClassIR],
final_names: List[Tuple[str, RType]]) -> None:
self.fullname = fullname
self.imports = imports[:]
self.functions = functions
self.classes = classes
self.final_names = final_names
def serialize(self) -> JsonDict:
return {
'fullname': self.fullname,
'imports': self.imports,
'functions': [f.serialize() for f in self.functions],
'classes': [c.serialize() for c in self.classes],
'final_names': [(k, t.serialize()) for k, t in self.final_names],
}
@classmethod
def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> 'ModuleIR':
return ModuleIR(
data['fullname'],
data['imports'],
[ctx.functions[FuncDecl.get_id_from_json(f)] for f in data['functions']],
[ClassIR.deserialize(c, ctx) for c in data['classes']],
[(k, deserialize_type(t, ctx)) for k, t in data['final_names']],
)
def deserialize_modules(data: Dict[str, JsonDict], ctx: DeserMaps) -> Dict[str, ModuleIR]:
"""Deserialize a collection of modules.
The modules can contain dependencies on each other.
Arguments:
data: A dict containing the modules to deserialize.
ctx: The deserialization maps to use and to populate.
They are populated with information from the deserialized
modules and as a precondition must have been populated by
deserializing any dependencies of the modules being deserialized
(outside of dependencies between the modules themselves).
Returns a map containing the deserialized modules.
"""
for mod in data.values():
# First create ClassIRs for every class so that we can construct types and whatnot
for cls in mod['classes']:
ir = ClassIR(cls['name'], cls['module_name'])
assert ir.fullname not in ctx.classes, "Class %s already in map" % ir.fullname
ctx.classes[ir.fullname] = ir
for mod in data.values():
# Then deserialize all of the functions so that methods are available
# to the class deserialization.
for method in mod['functions']:
func = FuncIR.deserialize(method, ctx)
assert func.decl.id not in ctx.functions, (
"Method %s already in map" % func.decl.fullname)
ctx.functions[func.decl.id] = func
return {k: ModuleIR.deserialize(v, ctx) for k, v in data.items()}
# ModulesIRs should also always be an *OrderedDict*, but if we
# declared it that way we would need to put it in quotes everywhere...
ModuleIRs = Dict[str, ModuleIR]

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,401 @@
"""Utilities for pretty-printing IR in a human-readable form."""
from collections import defaultdict
from typing import Any, Dict, List, Union, Sequence, Tuple
from typing_extensions import Final
from mypyc.common import short_name
from mypyc.ir.ops import (
Goto, Branch, Return, Unreachable, Assign, Integer, LoadErrorValue, GetAttr, SetAttr,
LoadStatic, InitStatic, TupleGet, TupleSet, IncRef, DecRef, Call, MethodCall, Cast, Box, Unbox,
RaiseStandardError, CallC, Truncate, LoadGlobal, IntOp, ComparisonOp, LoadMem, SetMem,
GetElementPtr, LoadAddress, Register, Value, OpVisitor, BasicBlock, ControlOp, LoadLiteral,
AssignMulti, KeepAlive, Op
)
from mypyc.ir.func_ir import FuncIR, all_values_full
from mypyc.ir.module_ir import ModuleIRs
from mypyc.ir.rtypes import is_bool_rprimitive, is_int_rprimitive, RType
ErrorSource = Union[BasicBlock, Op]
class IRPrettyPrintVisitor(OpVisitor[str]):
"""Internal visitor that pretty-prints ops."""
def __init__(self, names: Dict[Value, str]) -> None:
# This should contain a name for all values that are shown as
# registers in the output. This is not just for Register
# instances -- all Ops that produce values need (generated) names.
self.names = names
def visit_goto(self, op: Goto) -> str:
return self.format('goto %l', op.label)
branch_op_names: Final = {
Branch.BOOL: ('%r', 'bool'),
Branch.IS_ERROR: ('is_error(%r)', ''),
}
def visit_branch(self, op: Branch) -> str:
fmt, typ = self.branch_op_names[op.op]
if op.negated:
fmt = 'not {}'.format(fmt)
cond = self.format(fmt, op.value)
tb = ''
if op.traceback_entry:
tb = ' (error at %s:%d)' % op.traceback_entry
fmt = 'if {} goto %l{} else goto %l'.format(cond, tb)
if typ:
fmt += ' :: {}'.format(typ)
return self.format(fmt, op.true, op.false)
def visit_return(self, op: Return) -> str:
return self.format('return %r', op.value)
def visit_unreachable(self, op: Unreachable) -> str:
return "unreachable"
def visit_assign(self, op: Assign) -> str:
return self.format('%r = %r', op.dest, op.src)
def visit_assign_multi(self, op: AssignMulti) -> str:
return self.format('%r = [%s]',
op.dest,
', '.join(self.format('%r', v) for v in op.src))
def visit_load_error_value(self, op: LoadErrorValue) -> str:
return self.format('%r = <error> :: %s', op, op.type)
def visit_load_literal(self, op: LoadLiteral) -> str:
prefix = ''
# For values that have a potential unboxed representation, make
# it explicit that this is a Python object.
if isinstance(op.value, int):
prefix = 'object '
return self.format('%r = %s%s', op, prefix, repr(op.value))
def visit_get_attr(self, op: GetAttr) -> str:
return self.format('%r = %r.%s', op, op.obj, op.attr)
def visit_set_attr(self, op: SetAttr) -> str:
return self.format('%r.%s = %r; %r = is_error', op.obj, op.attr, op.src, op)
def visit_load_static(self, op: LoadStatic) -> str:
ann = ' ({})'.format(repr(op.ann)) if op.ann else ''
name = op.identifier
if op.module_name is not None:
name = '{}.{}'.format(op.module_name, name)
return self.format('%r = %s :: %s%s', op, name, op.namespace, ann)
def visit_init_static(self, op: InitStatic) -> str:
name = op.identifier
if op.module_name is not None:
name = '{}.{}'.format(op.module_name, name)
return self.format('%s = %r :: %s', name, op.value, op.namespace)
def visit_tuple_get(self, op: TupleGet) -> str:
return self.format('%r = %r[%d]', op, op.src, op.index)
def visit_tuple_set(self, op: TupleSet) -> str:
item_str = ', '.join(self.format('%r', item) for item in op.items)
return self.format('%r = (%s)', op, item_str)
def visit_inc_ref(self, op: IncRef) -> str:
s = self.format('inc_ref %r', op.src)
# TODO: Remove bool check (it's unboxed)
if is_bool_rprimitive(op.src.type) or is_int_rprimitive(op.src.type):
s += ' :: {}'.format(short_name(op.src.type.name))
return s
def visit_dec_ref(self, op: DecRef) -> str:
s = self.format('%sdec_ref %r', 'x' if op.is_xdec else '', op.src)
# TODO: Remove bool check (it's unboxed)
if is_bool_rprimitive(op.src.type) or is_int_rprimitive(op.src.type):
s += ' :: {}'.format(short_name(op.src.type.name))
return s
def visit_call(self, op: Call) -> str:
args = ', '.join(self.format('%r', arg) for arg in op.args)
# TODO: Display long name?
short_name = op.fn.shortname
s = '%s(%s)' % (short_name, args)
if not op.is_void:
s = self.format('%r = ', op) + s
return s
def visit_method_call(self, op: MethodCall) -> str:
args = ', '.join(self.format('%r', arg) for arg in op.args)
s = self.format('%r.%s(%s)', op.obj, op.method, args)
if not op.is_void:
s = self.format('%r = ', op) + s
return s
def visit_cast(self, op: Cast) -> str:
return self.format('%r = cast(%s, %r)', op, op.type, op.src)
def visit_box(self, op: Box) -> str:
return self.format('%r = box(%s, %r)', op, op.src.type, op.src)
def visit_unbox(self, op: Unbox) -> str:
return self.format('%r = unbox(%s, %r)', op, op.type, op.src)
def visit_raise_standard_error(self, op: RaiseStandardError) -> str:
if op.value is not None:
if isinstance(op.value, str):
return self.format('%r = raise %s(%s)', op, op.class_name, repr(op.value))
elif isinstance(op.value, Value):
return self.format('%r = raise %s(%r)', op, op.class_name, op.value)
else:
assert False, 'value type must be either str or Value'
else:
return self.format('%r = raise %s', op, op.class_name)
def visit_call_c(self, op: CallC) -> str:
args_str = ', '.join(self.format('%r', arg) for arg in op.args)
if op.is_void:
return self.format('%s(%s)', op.function_name, args_str)
else:
return self.format('%r = %s(%s)', op, op.function_name, args_str)
def visit_truncate(self, op: Truncate) -> str:
return self.format("%r = truncate %r: %t to %t", op, op.src, op.src_type, op.type)
def visit_load_global(self, op: LoadGlobal) -> str:
ann = ' ({})'.format(repr(op.ann)) if op.ann else ''
return self.format('%r = load_global %s :: static%s', op, op.identifier, ann)
def visit_int_op(self, op: IntOp) -> str:
return self.format('%r = %r %s %r', op, op.lhs, IntOp.op_str[op.op], op.rhs)
def visit_comparison_op(self, op: ComparisonOp) -> str:
if op.op in (ComparisonOp.SLT, ComparisonOp.SGT, ComparisonOp.SLE, ComparisonOp.SGE):
sign_format = " :: signed"
elif op.op in (ComparisonOp.ULT, ComparisonOp.UGT, ComparisonOp.ULE, ComparisonOp.UGE):
sign_format = " :: unsigned"
else:
sign_format = ""
return self.format('%r = %r %s %r%s', op, op.lhs, ComparisonOp.op_str[op.op],
op.rhs, sign_format)
def visit_load_mem(self, op: LoadMem) -> str:
return self.format("%r = load_mem %r :: %t*", op, op.src, op.type)
def visit_set_mem(self, op: SetMem) -> str:
return self.format("set_mem %r, %r :: %t*", op.dest, op.src, op.dest_type)
def visit_get_element_ptr(self, op: GetElementPtr) -> str:
return self.format("%r = get_element_ptr %r %s :: %t", op, op.src, op.field, op.src_type)
def visit_load_address(self, op: LoadAddress) -> str:
if isinstance(op.src, Register):
return self.format("%r = load_address %r", op, op.src)
else:
return self.format("%r = load_address %s", op, op.src)
def visit_keep_alive(self, op: KeepAlive) -> str:
return self.format('keep_alive %s' % ', '.join(self.format('%r', v)
for v in op.src))
# Helpers
def format(self, fmt: str, *args: Any) -> str:
"""Helper for formatting strings.
These format sequences are supported in fmt:
%s: arbitrary object converted to string using str()
%r: name of IR value/register
%d: int
%f: float
%l: BasicBlock (formatted as label 'Ln')
%t: RType
"""
result = []
i = 0
arglist = list(args)
while i < len(fmt):
n = fmt.find('%', i)
if n < 0:
n = len(fmt)
result.append(fmt[i:n])
if n < len(fmt):
typespec = fmt[n + 1]
arg = arglist.pop(0)
if typespec == 'r':
# Register/value
assert isinstance(arg, Value)
if isinstance(arg, Integer):
result.append(str(arg.value))
else:
result.append(self.names[arg])
elif typespec == 'd':
# Integer
result.append('%d' % arg)
elif typespec == 'f':
# Float
result.append('%f' % arg)
elif typespec == 'l':
# Basic block (label)
assert isinstance(arg, BasicBlock)
result.append('L%s' % arg.label)
elif typespec == 't':
# RType
assert isinstance(arg, RType)
result.append(arg.name)
elif typespec == 's':
# String
result.append(str(arg))
else:
raise ValueError('Invalid format sequence %{}'.format(typespec))
i = n + 2
else:
i = n
return ''.join(result)
def format_registers(func_ir: FuncIR,
names: Dict[Value, str]) -> List[str]:
result = []
i = 0
regs = all_values_full(func_ir.arg_regs, func_ir.blocks)
while i < len(regs):
i0 = i
group = [names[regs[i0]]]
while i + 1 < len(regs) and regs[i + 1].type == regs[i0].type:
i += 1
group.append(names[regs[i]])
i += 1
result.append('%s :: %s' % (', '.join(group), regs[i0].type))
return result
def format_blocks(blocks: List[BasicBlock],
names: Dict[Value, str],
source_to_error: Dict[ErrorSource, List[str]]) -> List[str]:
"""Format a list of IR basic blocks into a human-readable form."""
# First label all of the blocks
for i, block in enumerate(blocks):
block.label = i
handler_map: Dict[BasicBlock, List[BasicBlock]] = {}
for b in blocks:
if b.error_handler:
handler_map.setdefault(b.error_handler, []).append(b)
visitor = IRPrettyPrintVisitor(names)
lines = []
for i, block in enumerate(blocks):
handler_msg = ''
if block in handler_map:
labels = sorted('L%d' % b.label for b in handler_map[block])
handler_msg = ' (handler for {})'.format(', '.join(labels))
lines.append('L%d:%s' % (block.label, handler_msg))
if block in source_to_error:
for error in source_to_error[block]:
lines.append(f" ERR: {error}")
ops = block.ops
if (isinstance(ops[-1], Goto) and i + 1 < len(blocks)
and ops[-1].label == blocks[i + 1]
and not source_to_error.get(ops[-1], [])):
# Hide the last goto if it just goes to the next basic block,
# and there are no assocatiated errors with the op.
ops = ops[:-1]
for op in ops:
line = ' ' + op.accept(visitor)
lines.append(line)
if op in source_to_error:
for error in source_to_error[op]:
lines.append(f" ERR: {error}")
if not isinstance(block.ops[-1], (Goto, Branch, Return, Unreachable)):
# Each basic block needs to exit somewhere.
lines.append(' [MISSING BLOCK EXIT OPCODE]')
return lines
def format_func(fn: FuncIR, errors: Sequence[Tuple[ErrorSource, str]] = ()) -> List[str]:
lines = []
cls_prefix = fn.class_name + '.' if fn.class_name else ''
lines.append('def {}{}({}):'.format(cls_prefix, fn.name,
', '.join(arg.name for arg in fn.args)))
names = generate_names_for_ir(fn.arg_regs, fn.blocks)
for line in format_registers(fn, names):
lines.append(' ' + line)
source_to_error = defaultdict(list)
for source, error in errors:
source_to_error[source].append(error)
code = format_blocks(fn.blocks, names, source_to_error)
lines.extend(code)
return lines
def format_modules(modules: ModuleIRs) -> List[str]:
ops = []
for module in modules.values():
for fn in module.functions:
ops.extend(format_func(fn))
ops.append('')
return ops
def generate_names_for_ir(args: List[Register], blocks: List[BasicBlock]) -> Dict[Value, str]:
"""Generate unique names for IR values.
Give names such as 'r5' to temp values in IR which are useful when
pretty-printing or generating C. Ensure generated names are unique.
"""
names: Dict[Value, str] = {}
used_names = set()
temp_index = 0
for arg in args:
names[arg] = arg.name
used_names.add(arg.name)
for block in blocks:
for op in block.ops:
values = []
for source in op.sources():
if source not in names:
values.append(source)
if isinstance(op, (Assign, AssignMulti)):
values.append(op.dest)
elif isinstance(op, ControlOp) or op.is_void:
continue
elif op not in names:
values.append(op)
for value in values:
if value in names:
continue
if isinstance(value, Register) and value.name:
name = value.name
elif isinstance(value, Integer):
continue
else:
name = 'r%d' % temp_index
temp_index += 1
# Append _2, _3, ... if needed to make the name unique.
if name in used_names:
n = 2
while True:
candidate = '%s_%d' % (name, n)
if candidate not in used_names:
name = candidate
break
n += 1
names[value] = name
used_names.add(name)
return names

View file

@ -0,0 +1,829 @@
"""Types used in the intermediate representation.
These are runtime types (RTypes), as opposed to mypy Type objects.
The latter are only used during type checking and not directly used at
runtime. Runtime types are derived from mypy types, but there's no
simple one-to-one correspondence. (Here 'runtime' means 'runtime
checked'.)
The generated IR ensures some runtime type safety properties based on
RTypes. Compiled code can assume that the runtime value matches the
static RType of a value. If the RType of a register is 'builtins.str'
(str_rprimitive), for example, the generated IR will ensure that the
register will have a 'str' object.
RTypes are simpler and less expressive than mypy (or PEP 484)
types. For example, all mypy types of form 'list[T]' (for arbitrary T)
are erased to the single RType 'builtins.list' (list_rprimitive).
mypyc.irbuild.mapper.Mapper.type_to_rtype converts mypy Types to mypyc
RTypes.
"""
from abc import abstractmethod
from typing import Optional, Union, List, Dict, Generic, TypeVar, Tuple
from typing_extensions import Final, ClassVar, TYPE_CHECKING
from mypyc.common import JsonDict, short_name, IS_32_BIT_PLATFORM, PLATFORM_SIZE
from mypyc.namegen import NameGenerator
if TYPE_CHECKING:
from mypyc.ir.ops import DeserMaps
from mypyc.ir.class_ir import ClassIR
T = TypeVar('T')
class RType:
"""Abstract base class for runtime types (erased, only concrete; no generics)."""
name: str
# If True, the type has a special unboxed representation. If False, the
# type is represented as PyObject *. Even if True, the representation
# may contain pointers.
is_unboxed = False
# This is the C undefined value for this type. It's used for initialization
# if there's no value yet, and for function return value on error/exception.
c_undefined: str
# If unboxed: does the unboxed version use reference counting?
is_refcounted = True
# C type; use Emitter.ctype() to access
_ctype: str
@abstractmethod
def accept(self, visitor: 'RTypeVisitor[T]') -> T:
raise NotImplementedError
def short_name(self) -> str:
return short_name(self.name)
def __str__(self) -> str:
return short_name(self.name)
def __repr__(self) -> str:
return '<%s>' % self.__class__.__name__
def serialize(self) -> Union[JsonDict, str]:
raise NotImplementedError('Cannot serialize {} instance'.format(self.__class__.__name__))
def deserialize_type(data: Union[JsonDict, str], ctx: 'DeserMaps') -> 'RType':
"""Deserialize a JSON-serialized RType.
Arguments:
data: The decoded JSON of the serialized type
ctx: The deserialization maps to use
"""
# Since there are so few types, we just case on them directly. If
# more get added we should switch to a system like mypy.types
# uses.
if isinstance(data, str):
if data in ctx.classes:
return RInstance(ctx.classes[data])
elif data in RPrimitive.primitive_map:
return RPrimitive.primitive_map[data]
elif data == "void":
return RVoid()
else:
assert False, "Can't find class {}".format(data)
elif data['.class'] == 'RTuple':
return RTuple.deserialize(data, ctx)
elif data['.class'] == 'RUnion':
return RUnion.deserialize(data, ctx)
raise NotImplementedError('unexpected .class {}'.format(data['.class']))
class RTypeVisitor(Generic[T]):
"""Generic visitor over RTypes (uses the visitor design pattern)."""
@abstractmethod
def visit_rprimitive(self, typ: 'RPrimitive') -> T:
raise NotImplementedError
@abstractmethod
def visit_rinstance(self, typ: 'RInstance') -> T:
raise NotImplementedError
@abstractmethod
def visit_runion(self, typ: 'RUnion') -> T:
raise NotImplementedError
@abstractmethod
def visit_rtuple(self, typ: 'RTuple') -> T:
raise NotImplementedError
@abstractmethod
def visit_rstruct(self, typ: 'RStruct') -> T:
raise NotImplementedError
@abstractmethod
def visit_rarray(self, typ: 'RArray') -> T:
raise NotImplementedError
@abstractmethod
def visit_rvoid(self, typ: 'RVoid') -> T:
raise NotImplementedError
class RVoid(RType):
"""The void type (no value).
This is a singleton -- use void_rtype (below) to refer to this instead of
constructing a new instance.
"""
is_unboxed = False
name = 'void'
ctype = 'void'
def accept(self, visitor: 'RTypeVisitor[T]') -> T:
return visitor.visit_rvoid(self)
def serialize(self) -> str:
return 'void'
def __eq__(self, other: object) -> bool:
return isinstance(other, RVoid)
def __hash__(self) -> int:
return hash(RVoid)
# Singleton instance of RVoid
void_rtype: Final = RVoid()
class RPrimitive(RType):
"""Primitive type such as 'object' or 'int'.
These often have custom ops associated with them. The 'object'
primitive type can be used to hold arbitrary Python objects.
Different primitive types have different representations, and
primitives may be unboxed or boxed. Primitive types don't need to
directly correspond to Python types, but most do.
NOTE: All supported primitive types are defined below
(e.g. object_rprimitive).
"""
# Map from primitive names to primitive types and is used by deserialization
primitive_map: ClassVar[Dict[str, "RPrimitive"]] = {}
def __init__(self,
name: str,
is_unboxed: bool,
is_refcounted: bool,
ctype: str = 'PyObject *',
size: int = PLATFORM_SIZE) -> None:
RPrimitive.primitive_map[name] = self
self.name = name
self.is_unboxed = is_unboxed
self._ctype = ctype
self.is_refcounted = is_refcounted
self.size = size
# TODO: For low-level integers, they actually don't have undefined values
# we need to figure out some way to represent here.
if ctype == 'CPyTagged':
self.c_undefined = 'CPY_INT_TAG'
elif ctype in ('int32_t', 'int64_t', 'CPyPtr', 'uint32_t', 'uint64_t'):
self.c_undefined = '0'
elif ctype == 'PyObject *':
# Boxed types use the null pointer as the error value.
self.c_undefined = 'NULL'
elif ctype == 'char':
self.c_undefined = '2'
elif ctype == 'PyObject **':
self.c_undefined = 'NULL'
else:
assert False, 'Unrecognized ctype: %r' % ctype
def accept(self, visitor: 'RTypeVisitor[T]') -> T:
return visitor.visit_rprimitive(self)
def serialize(self) -> str:
return self.name
def __repr__(self) -> str:
return '<RPrimitive %s>' % self.name
def __eq__(self, other: object) -> bool:
return isinstance(other, RPrimitive) and other.name == self.name
def __hash__(self) -> int:
return hash(self.name)
# NOTE: All the supported instances of RPrimitive are defined
# below. Use these instead of creating new instances.
# Used to represent arbitrary objects and dynamically typed (Any)
# values. There are various ops that let you perform generic, runtime
# checked operations on these (that match Python semantics). See the
# ops in mypyc.primitives.misc_ops, including py_getattr_op,
# py_call_op, and many others.
#
# If there is no more specific RType available for some value, we fall
# back to using this type.
#
# NOTE: Even though this is very flexible, this type should be used as
# little as possible, as generic ops are typically slow. Other types,
# including other primitive types and RInstance, are usually much
# faster.
object_rprimitive: Final = RPrimitive("builtins.object", is_unboxed=False, is_refcounted=True)
# represents a low level pointer of an object
object_pointer_rprimitive: Final = RPrimitive(
"object_ptr", is_unboxed=False, is_refcounted=False, ctype="PyObject **"
)
# Arbitrary-precision integer (corresponds to Python 'int'). Small
# enough values are stored unboxed, while large integers are
# represented as a tagged pointer to a Python 'int' PyObject. The
# lowest bit is used as the tag to decide whether it is a signed
# unboxed value (shifted left by one) or a PyObject * pointing to an
# 'int' object. Pointers have the least significant bit set.
#
# The undefined/error value is the null pointer (1 -- only the least
# significant bit is set)).
#
# This cannot represent a subclass of int. An instance of a subclass
# of int is coerced to the corresponding 'int' value.
int_rprimitive: Final = RPrimitive(
"builtins.int", is_unboxed=True, is_refcounted=True, ctype="CPyTagged"
)
# An unboxed integer. The representation is the same as for unboxed
# int_rprimitive (shifted left by one). These can be used when an
# integer is known to be small enough to fit size_t (CPyTagged).
short_int_rprimitive: Final = RPrimitive(
"short_int", is_unboxed=True, is_refcounted=False, ctype="CPyTagged"
)
# Low level integer types (correspond to C integer types)
int32_rprimitive: Final = RPrimitive(
"int32", is_unboxed=True, is_refcounted=False, ctype="int32_t", size=4
)
int64_rprimitive: Final = RPrimitive(
"int64", is_unboxed=True, is_refcounted=False, ctype="int64_t", size=8
)
uint32_rprimitive: Final = RPrimitive(
"uint32", is_unboxed=True, is_refcounted=False, ctype="uint32_t", size=4
)
uint64_rprimitive: Final = RPrimitive(
"uint64", is_unboxed=True, is_refcounted=False, ctype="uint64_t", size=8
)
# The C 'int' type
c_int_rprimitive = int32_rprimitive
if IS_32_BIT_PLATFORM:
c_size_t_rprimitive = uint32_rprimitive
c_pyssize_t_rprimitive = RPrimitive('native_int', is_unboxed=True, is_refcounted=False,
ctype='int32_t', size=4)
else:
c_size_t_rprimitive = uint64_rprimitive
c_pyssize_t_rprimitive = RPrimitive('native_int', is_unboxed=True, is_refcounted=False,
ctype='int64_t', size=8)
# Low level pointer, represented as integer in C backends
pointer_rprimitive: Final = RPrimitive("ptr", is_unboxed=True, is_refcounted=False, ctype="CPyPtr")
# Floats are represent as 'float' PyObject * values. (In the future
# we'll likely switch to a more efficient, unboxed representation.)
float_rprimitive: Final = RPrimitive("builtins.float", is_unboxed=False, is_refcounted=True)
# An unboxed Python bool value. This actually has three possible values
# (0 -> False, 1 -> True, 2 -> error). If you only need True/False, use
# bit_rprimitive instead.
bool_rprimitive: Final = RPrimitive(
"builtins.bool", is_unboxed=True, is_refcounted=False, ctype="char", size=1
)
# A low-level boolean value with two possible values: 0 and 1. Any
# other value results in undefined behavior. Undefined or error values
# are not supported.
bit_rprimitive: Final = RPrimitive(
"bit", is_unboxed=True, is_refcounted=False, ctype="char", size=1
)
# The 'None' value. The possible values are 0 -> None and 2 -> error.
none_rprimitive: Final = RPrimitive(
"builtins.None", is_unboxed=True, is_refcounted=False, ctype="char", size=1
)
# Python list object (or an instance of a subclass of list).
list_rprimitive: Final = RPrimitive("builtins.list", is_unboxed=False, is_refcounted=True)
# Python dict object (or an instance of a subclass of dict).
dict_rprimitive: Final = RPrimitive("builtins.dict", is_unboxed=False, is_refcounted=True)
# Python set object (or an instance of a subclass of set).
set_rprimitive: Final = RPrimitive("builtins.set", is_unboxed=False, is_refcounted=True)
# Python str object. At the C layer, str is referred to as unicode
# (PyUnicode).
str_rprimitive: Final = RPrimitive("builtins.str", is_unboxed=False, is_refcounted=True)
# Python bytes object.
bytes_rprimitive: Final = RPrimitive('builtins.bytes', is_unboxed=False, is_refcounted=True)
# Tuple of an arbitrary length (corresponds to Tuple[t, ...], with
# explicit '...').
tuple_rprimitive: Final = RPrimitive("builtins.tuple", is_unboxed=False, is_refcounted=True)
# Python range object.
range_rprimitive: Final = RPrimitive("builtins.range", is_unboxed=False, is_refcounted=True)
def is_tagged(rtype: RType) -> bool:
return rtype is int_rprimitive or rtype is short_int_rprimitive
def is_int_rprimitive(rtype: RType) -> bool:
return rtype is int_rprimitive
def is_short_int_rprimitive(rtype: RType) -> bool:
return rtype is short_int_rprimitive
def is_int32_rprimitive(rtype: RType) -> bool:
return (rtype is int32_rprimitive or
(rtype is c_pyssize_t_rprimitive and rtype._ctype == 'int32_t'))
def is_int64_rprimitive(rtype: RType) -> bool:
return (rtype is int64_rprimitive or
(rtype is c_pyssize_t_rprimitive and rtype._ctype == 'int64_t'))
def is_uint32_rprimitive(rtype: RType) -> bool:
return rtype is uint32_rprimitive
def is_uint64_rprimitive(rtype: RType) -> bool:
return rtype is uint64_rprimitive
def is_c_py_ssize_t_rprimitive(rtype: RType) -> bool:
return rtype is c_pyssize_t_rprimitive
def is_pointer_rprimitive(rtype: RType) -> bool:
return rtype is pointer_rprimitive
def is_float_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.float'
def is_bool_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.bool'
def is_bit_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'bit'
def is_object_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.object'
def is_none_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.None'
def is_list_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.list'
def is_dict_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.dict'
def is_set_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.set'
def is_str_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.str'
def is_bytes_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.bytes'
def is_tuple_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.tuple'
def is_range_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and rtype.name == 'builtins.range'
def is_sequence_rprimitive(rtype: RType) -> bool:
return isinstance(rtype, RPrimitive) and (
is_list_rprimitive(rtype) or is_tuple_rprimitive(rtype) or is_str_rprimitive(rtype)
)
class TupleNameVisitor(RTypeVisitor[str]):
"""Produce a tuple name based on the concrete representations of types."""
def visit_rinstance(self, t: 'RInstance') -> str:
return "O"
def visit_runion(self, t: 'RUnion') -> str:
return "O"
def visit_rprimitive(self, t: 'RPrimitive') -> str:
if t._ctype == 'CPyTagged':
return 'I'
elif t._ctype == 'char':
return 'C'
assert not t.is_unboxed, "{} unexpected unboxed type".format(t)
return 'O'
def visit_rtuple(self, t: 'RTuple') -> str:
parts = [elem.accept(self) for elem in t.types]
return 'T{}{}'.format(len(parts), ''.join(parts))
def visit_rstruct(self, t: 'RStruct') -> str:
assert False, 'RStruct not supported in tuple'
def visit_rarray(self, t: 'RArray') -> str:
assert False, 'RArray not supported in tuple'
def visit_rvoid(self, t: 'RVoid') -> str:
assert False, "rvoid in tuple?"
class RTuple(RType):
"""Fixed-length unboxed tuple (represented as a C struct).
These are used to represent mypy TupleType values (fixed-length
Python tuples). Since this is unboxed, the identity of a tuple
object is not preserved within compiled code. If the identity of a
tuple is important, or there is a need to have multiple references
to a single tuple object, a variable-length tuple should be used
(tuple_rprimitive or Tuple[T, ...] with explicit '...'), as they
are boxed.
These aren't immutable. However, user code won't be able to mutate
individual tuple items.
"""
is_unboxed = True
def __init__(self, types: List[RType]) -> None:
self.name = 'tuple'
self.types = tuple(types)
self.is_refcounted = any(t.is_refcounted for t in self.types)
# Generate a unique id which is used in naming corresponding C identifiers.
# This is necessary since C does not have anonymous structural type equivalence
# in the same way python can just assign a Tuple[int, bool] to a Tuple[int, bool].
self.unique_id = self.accept(TupleNameVisitor())
# Nominally the max c length is 31 chars, but I'm not honestly worried about this.
self.struct_name = 'tuple_{}'.format(self.unique_id)
self._ctype = '{}'.format(self.struct_name)
def accept(self, visitor: 'RTypeVisitor[T]') -> T:
return visitor.visit_rtuple(self)
def __str__(self) -> str:
return 'tuple[%s]' % ', '.join(str(typ) for typ in self.types)
def __repr__(self) -> str:
return '<RTuple %s>' % ', '.join(repr(typ) for typ in self.types)
def __eq__(self, other: object) -> bool:
return isinstance(other, RTuple) and self.types == other.types
def __hash__(self) -> int:
return hash((self.name, self.types))
def serialize(self) -> JsonDict:
types = [x.serialize() for x in self.types]
return {'.class': 'RTuple', 'types': types}
@classmethod
def deserialize(cls, data: JsonDict, ctx: 'DeserMaps') -> 'RTuple':
types = [deserialize_type(t, ctx) for t in data['types']]
return RTuple(types)
# Exception tuple: (exception class, exception instance, traceback object)
exc_rtuple = RTuple([object_rprimitive, object_rprimitive, object_rprimitive])
# Dictionary iterator tuple: (should continue, internal offset, key, value)
# See mypyc.irbuild.for_helpers.ForDictionaryCommon for more details.
dict_next_rtuple_pair = RTuple(
[bool_rprimitive, short_int_rprimitive, object_rprimitive, object_rprimitive]
)
# Same as above but just for key or value.
dict_next_rtuple_single = RTuple(
[bool_rprimitive, short_int_rprimitive, object_rprimitive]
)
def compute_rtype_alignment(typ: RType) -> int:
"""Compute alignment of a given type based on platform alignment rule"""
platform_alignment = PLATFORM_SIZE
if isinstance(typ, RPrimitive):
return typ.size
elif isinstance(typ, RInstance):
return platform_alignment
elif isinstance(typ, RUnion):
return platform_alignment
elif isinstance(typ, RArray):
return compute_rtype_alignment(typ.item_type)
else:
if isinstance(typ, RTuple):
items = list(typ.types)
elif isinstance(typ, RStruct):
items = typ.types
else:
assert False, "invalid rtype for computing alignment"
max_alignment = max([compute_rtype_alignment(item) for item in items])
return max_alignment
def compute_rtype_size(typ: RType) -> int:
"""Compute unaligned size of rtype"""
if isinstance(typ, RPrimitive):
return typ.size
elif isinstance(typ, RTuple):
return compute_aligned_offsets_and_size(list(typ.types))[1]
elif isinstance(typ, RUnion):
return PLATFORM_SIZE
elif isinstance(typ, RStruct):
return compute_aligned_offsets_and_size(typ.types)[1]
elif isinstance(typ, RInstance):
return PLATFORM_SIZE
elif isinstance(typ, RArray):
alignment = compute_rtype_alignment(typ)
aligned_size = (compute_rtype_size(typ.item_type) + (alignment - 1)) & ~(alignment - 1)
return aligned_size * typ.length
else:
assert False, "invalid rtype for computing size"
def compute_aligned_offsets_and_size(types: List[RType]) -> Tuple[List[int], int]:
"""Compute offsets and total size of a list of types after alignment
Note that the types argument are types of values that are stored
sequentially with platform default alignment.
"""
unaligned_sizes = [compute_rtype_size(typ) for typ in types]
alignments = [compute_rtype_alignment(typ) for typ in types]
current_offset = 0
offsets = []
final_size = 0
for i in range(len(unaligned_sizes)):
offsets.append(current_offset)
if i + 1 < len(unaligned_sizes):
cur_size = unaligned_sizes[i]
current_offset += cur_size
next_alignment = alignments[i + 1]
# compute aligned offset,
# check https://en.wikipedia.org/wiki/Data_structure_alignment for more information
current_offset = (current_offset + (next_alignment - 1)) & -next_alignment
else:
struct_alignment = max(alignments)
final_size = current_offset + unaligned_sizes[i]
final_size = (final_size + (struct_alignment - 1)) & -struct_alignment
return offsets, final_size
class RStruct(RType):
"""C struct type"""
def __init__(self,
name: str,
names: List[str],
types: List[RType]) -> None:
self.name = name
self.names = names
self.types = types
# generate dummy names
if len(self.names) < len(self.types):
for i in range(len(self.types) - len(self.names)):
self.names.append('_item' + str(i))
self.offsets, self.size = compute_aligned_offsets_and_size(types)
self._ctype = name
def accept(self, visitor: 'RTypeVisitor[T]') -> T:
return visitor.visit_rstruct(self)
def __str__(self) -> str:
# if not tuple(unnamed structs)
return '%s{%s}' % (self.name, ', '.join(name + ":" + str(typ)
for name, typ in zip(self.names, self.types)))
def __repr__(self) -> str:
return '<RStruct %s{%s}>' % (self.name, ', '.join(name + ":" + repr(typ) for name, typ
in zip(self.names, self.types)))
def __eq__(self, other: object) -> bool:
return (isinstance(other, RStruct) and self.name == other.name
and self.names == other.names and self.types == other.types)
def __hash__(self) -> int:
return hash((self.name, tuple(self.names), tuple(self.types)))
def serialize(self) -> JsonDict:
assert False
@classmethod
def deserialize(cls, data: JsonDict, ctx: 'DeserMaps') -> 'RStruct':
assert False
class RInstance(RType):
"""Instance of user-defined class (compiled to C extension class).
The runtime representation is 'PyObject *', and these are always
boxed and thus reference-counted.
These support fast method calls and fast attribute access using
vtables, and they usually use a dict-free, struct-based
representation of attributes. Method calls and attribute access
can skip the vtable if we know that there is no overriding.
These are also sometimes called 'native' types, since these have
the most efficient representation and ops (along with certain
RPrimitive types and RTuple).
"""
is_unboxed = False
def __init__(self, class_ir: 'ClassIR') -> None:
# name is used for formatting the name in messages and debug output
# so we want the fullname for precision.
self.name = class_ir.fullname
self.class_ir = class_ir
self._ctype = 'PyObject *'
def accept(self, visitor: 'RTypeVisitor[T]') -> T:
return visitor.visit_rinstance(self)
def struct_name(self, names: NameGenerator) -> str:
return self.class_ir.struct_name(names)
def getter_index(self, name: str) -> int:
return self.class_ir.vtable_entry(name)
def setter_index(self, name: str) -> int:
return self.getter_index(name) + 1
def method_index(self, name: str) -> int:
return self.class_ir.vtable_entry(name)
def attr_type(self, name: str) -> RType:
return self.class_ir.attr_type(name)
def __repr__(self) -> str:
return '<RInstance %s>' % self.name
def __eq__(self, other: object) -> bool:
return isinstance(other, RInstance) and other.name == self.name
def __hash__(self) -> int:
return hash(self.name)
def serialize(self) -> str:
return self.name
class RUnion(RType):
"""union[x, ..., y]"""
is_unboxed = False
def __init__(self, items: List[RType]) -> None:
self.name = 'union'
self.items = items
self.items_set = frozenset(items)
self._ctype = 'PyObject *'
def accept(self, visitor: 'RTypeVisitor[T]') -> T:
return visitor.visit_runion(self)
def __repr__(self) -> str:
return '<RUnion %s>' % ', '.join(str(item) for item in self.items)
def __str__(self) -> str:
return 'union[%s]' % ', '.join(str(item) for item in self.items)
# We compare based on the set because order in a union doesn't matter
def __eq__(self, other: object) -> bool:
return isinstance(other, RUnion) and self.items_set == other.items_set
def __hash__(self) -> int:
return hash(('union', self.items_set))
def serialize(self) -> JsonDict:
types = [x.serialize() for x in self.items]
return {'.class': 'RUnion', 'types': types}
@classmethod
def deserialize(cls, data: JsonDict, ctx: 'DeserMaps') -> 'RUnion':
types = [deserialize_type(t, ctx) for t in data['types']]
return RUnion(types)
def optional_value_type(rtype: RType) -> Optional[RType]:
"""If rtype is the union of none_rprimitive and another type X, return X.
Otherwise return None.
"""
if isinstance(rtype, RUnion) and len(rtype.items) == 2:
if rtype.items[0] == none_rprimitive:
return rtype.items[1]
elif rtype.items[1] == none_rprimitive:
return rtype.items[0]
return None
def is_optional_type(rtype: RType) -> bool:
"""Is rtype an optional type with exactly two union items?"""
return optional_value_type(rtype) is not None
class RArray(RType):
"""Fixed-length C array type (for example, int[5]).
Note that the implementation is a bit limited, and these can basically
be only used for local variables that are initialized in one location.
"""
def __init__(self,
item_type: RType,
length: int) -> None:
self.item_type = item_type
# Number of items
self.length = length
self.is_refcounted = False
def accept(self, visitor: 'RTypeVisitor[T]') -> T:
return visitor.visit_rarray(self)
def __str__(self) -> str:
return '%s[%s]' % (self.item_type, self.length)
def __repr__(self) -> str:
return '<RArray %r[%s]>' % (self.item_type, self.length)
def __eq__(self, other: object) -> bool:
return (isinstance(other, RArray) and self.item_type == other.item_type
and self.length == other.length)
def __hash__(self) -> int:
return hash((self.item_type, self.length))
def serialize(self) -> JsonDict:
assert False
@classmethod
def deserialize(cls, data: JsonDict, ctx: 'DeserMaps') -> 'RArray':
assert False
PyObject = RStruct(
name='PyObject',
names=['ob_refcnt', 'ob_type'],
types=[c_pyssize_t_rprimitive, pointer_rprimitive])
PyVarObject = RStruct(
name='PyVarObject',
names=['ob_base', 'ob_size'],
types=[PyObject, c_pyssize_t_rprimitive])
setentry = RStruct(
name='setentry',
names=['key', 'hash'],
types=[pointer_rprimitive, c_pyssize_t_rprimitive])
smalltable = RStruct(
name='smalltable',
names=[],
types=[setentry] * 8)
PySetObject = RStruct(
name='PySetObject',
names=['ob_base', 'fill', 'used', 'mask', 'table', 'hash', 'finger',
'smalltable', 'weakreflist'],
types=[PyObject, c_pyssize_t_rprimitive, c_pyssize_t_rprimitive, c_pyssize_t_rprimitive,
pointer_rprimitive, c_pyssize_t_rprimitive, c_pyssize_t_rprimitive, smalltable,
pointer_rprimitive])
PyListObject = RStruct(
name='PyListObject',
names=['ob_base', 'ob_item', 'allocated'],
types=[PyVarObject, pointer_rprimitive, c_pyssize_t_rprimitive]
)