Image courtesy Ned Batchelder
"""Docstring for example.py""" def sum(a, b): """Return a * 2 + b * 3""" a = a * 2 c = b * 3 return a + c if __name__ == '__main__': print(sum(15, 4))
Happens implicit
Explicit:
python3 -m py_compile example.py
py_compile is the reference for how to write .pyc files
Creates __pycache__/example.cpython-32.pyc (CPython 3.2)
See imp.cache_from_source() & Co
Lazy developers search the web rather then read the code.
http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
created using od -Ad -N 16 -w4 -tx2c example.cpython-32.pyc
0000000 0c6c 0a0d l \f \r \n 0000004 d667 4e6f g 326 o N 0000008 0063 0000 c \0 \0 \0 0000012 0000 0000 \0 \0 \0 \0 0000016
4 bytes -> b'l\x0c\r\n'
CR LF to detect encoding corruption
struct.unpack('<H', b'l\x0c') -> (3180,)
Python/import.c:
Python 3.2a2 3180 (add DELETE_DEREF)
imp.get_magic()
Unix timestamp, 4 bytes:
b'\xe4\xd7\xfdM'
Unsigned long, little endian
time.ctime(struct.unpack('<L', b'xe4xd7xfdM')[0]):
'Sun Jun 19 12:05:08 2011'
Marshal module:
marshal.load(fp):
<code object <module> at 0x1a3bbe0, file "example.py", line 1>
with open('__pycache__/example.cpython-32.pyc', 'rb') as fp: magic = fp.read(4) assert magic == imp.get_magic() timestamp = struct.unpack('<L', fp.read(4)) timestamp = timestamp[0] code = marshal.load(fp) # Use code here...
Compile your own code object:
compile(source, filename, mode, flags=0, dont_inherit=False, optimize=-1)
source: open('example.py').read()
filename: 'example.py', '<string>'
mode: "exec", "eval" or "single"
exec: | for a module |
---|---|
eval: | for an expression |
single: | automatically prints to the terminal for you etc. |
co_filename = 'example.py'
co_firstlineno = 1
co_lnotab = b'x06x03tx07x0cx01' (Objects/lnotab_notes.txt)
co_flags = 0x40
co_name = '<module>'
co_names = ('__doc__', 'sum', '__name__', 'print')
co_consts = ('Docstring for example.py',
<code object sum at 0x1a3bbe0, file "example.py", line 4>,
'__main__', 15, 4, None)
co_stacksize = 4
co_code = b'dx00x00Zx00x00dx01x00x84x00x00Zx01x00ex02x00dx02x00kx02x00r1x00ex03x00ex01x00dx03x00dx04x00x83x02x00x83x01x00x01nx00x00dx05x00S'
co_name = 'sum'
co_names = ()
co_consts = ('Return a * 2 + b * 3', 2, 3)
The first item would be None if there was no docstring.
co_argcount = 2
co_kwonlyargcount = 0
co_cellvars = ()
This is used when nested functions use variables from this function.
co_freevars = ()
Free variables which are variables which need to come from an enclosing context.
co_nlocals = 3
co_varnames = ('a', 'b', 'c')
co_stacksize = 2
co_code = b'|x00x00dx01x00x14}x00x00|x01x00dx02x00x14}x02x00|x00x00|x02x00x17S'
>>> dis.show_code(code_sum) Name: sum Filename: example.py Argument count: 2 Kw-only arguments: 0 Number of locals: 3 Stack size: 2 Flags: OPTIMIZED, NEWLOCALS, NOFREE Constants: 0: 'Return a * 2 + b * 3' 1: 2 2: 3 Variable names: 0: a 1: b 2: c
def sum(a, b): a = a * 2 c = b * 3 return a + c
>>> dis.dis(code_sum) 6 0 LOAD_FAST 0 (a) 3 LOAD_CONST 1 (2) 6 BINARY_MULTIPLY 7 STORE_FAST 0 (a) 7 10 LOAD_FAST 1 (b) 13 LOAD_CONST 2 (3) 16 BINARY_MULTIPLY 17 STORE_FAST 2 (c) 8 20 LOAD_FAST 0 (a) 23 LOAD_FAST 2 (c) 26 BINARY_ADD 27 RETURN_VALUE
LOAD_FAST(var_num):
Pushes a reference to the local co_varnames[var_num] onto the stack.
LOAD_CONST(consti):
Pushes co_consts[consti] onto the stack.
BINARY_MULTIPLY:
Implements TOS = TOS1 * TOS.
STORE_FAST(var_num):
Stores TOS into the local co_varnames[var_num].
BINARY_ADD:
Implements TOS = TOS1 + TOS.
RETURN_VALUE:
Returns with TOS to the caller of the function.
class VirtualMachine: """Very simple and incomplete python virtual machine""" def __init__(self): self.valuestack = [] def run_pyc_as_main(self, fname): with open(imp.cache_from_source(fname), 'rb') as fp: assert fp.read(4) == imp.get_magic() fp.seek(4, io.SEEK_CUR) code = marshal.load(fp) module = imp.new_module('__main__') # check out it's __dict__! self.eval_code(code, globals=module.__dict__, locals=module.__dict__) return module def eval_code(self, code, globals=None, locals=None): frame = Frame(self.valuestack, code, globals=globals, locals=locals, builtins=builtins.__dict__) frame.eval()
class Frame: def __init__(self, stack, code, prev_frame=None, globals=None, locals=None, builtins=builtins.__dict__): self.stack = stack self.f_code = code self.f_back = prev_frame self.f_globals = globals if globals else {} self.f_locals = locals if locals else copy.copy(self.f_globals) self.f_builtins = builtins self.f_lasti = None def eval(self): pass
co_code = b'|x00x00dx01x00x14}x00x00|x01x00dx02x00x14}x02x00|x00x00|x02x00x17S'
' '.join(format(i, 'd') for i in cobj_sum.co_code)
'124 0 0 100 1 0 20 125 0 0 124 1 0 100 2 0 20 125 2 0 124 0 0 124 2 0 23 83'
dis.opname[124] = 'LOAD_FAST'
dis.opmap['LOAD_FAST'] = 124
Bytecode arguments:
def eval(self): co_code = self.f_code.co_code self.f_lasti = 0 while True: opcode = co_code[self.f_lasti] if opcode == dis.opmap['RETURN_VALUE']: break meth = getattr(self, dis.opname[opcode]) if opcode >= dis.HAVE_ARGUMENT: arg = co_code[self.f_lasti+1] + (co_code[self.f_lasti+2] << 8) meth(arg) self.f_lasti += 3 else: meth() self.f_lasti += 1
Here implemented as Frame methods
Only 6 so far:
>>> dis.dis(code_sum) 6 0 LOAD_FAST 0 (a) 3 LOAD_CONST 1 (2) 6 BINARY_MULTIPLY 7 STORE_FAST 0 (a) 7 10 LOAD_FAST 1 (b) 13 LOAD_CONST 2 (3) 16 BINARY_MULTIPLY 17 STORE_FAST 2 (c) 8 20 LOAD_FAST 0 (a) 23 LOAD_FAST 2 (c) 26 BINARY_ADD 27 RETURN_VALUE
Move variables between the frame and the stack
def LOAD_FAST(self, var_num): varname = self.f_code.co_varnames[var_num] self.stack.append(self.f_locals[varname])
def STORE_FAST(self, var_num): varname = self.f_code.co_varnames[var_num] self.f_locals[varname] = self.stack.pop()
def LOAD_CONST(self, consti): self.stack.append(self.f_code.co_consts[consti])
obj.__mul__(other)
Can return NotImplemented
other.__rmul__(obj)
def BINARY_MULTIPLY(self): right = self.stack.pop() left = self.stack.pop() res = NotImplemented if hasattr(left, '__mul__'): res = left.__mul__(right) if res is NotImplemented and hasattr(right, '__rmul__'): res = right.__rmul__(left) if res is NotImplemented: raise TypeError( "unsuppored oprand type(s) for *: '{}' and '{}'".format( left.__class__.__name__, right.__class__.__name__)) self.stack.append(res)
other python code executed as part of this one bytecode
obj.__add__(other)
Can return NotImplemented
other.__radd__(obj)
def BINARY_ADD(self): right = self.stack.pop() left = self.stack.pop() res = NotImplemented if hasattr(left, '__add__'): res = left.__add__(right) if res is NotImplemented and hasattr(right, '__add__'): res = right.__add__(left) if res is NotImplemented: raise TypeError( "usuppored operand type(s) for +: '{}' and '{}'".format( left.__class__.__name__, right.__class__.__name__)) self.stack.append(res)
other python code executed as part of this one bytecode
Leave return value on TOS
def RETURN_VALUE(self): pass
def eval(self, *args): ... while True: ... if opcode == dis.opmap['RETURN_VALUE']: break
Can execute sum()!
>>> valuestack = [] >>> f_sum = Frame(valuestack, code_sum, locals={'a': 15, 'b': 4}) >>> f_sum.eval() >>> valuestack [42]
XXX Verify this!
>>> dis.dis(code_mod) 1 0 LOAD_CONST 0 ('Docstring for example.py') 3 STORE_NAME 0 (__doc__) 4 6 LOAD_CONST 1 (<code object sum at 0x12b3470, file "example.py", line 4>) 9 MAKE_FUNCTION 0 12 STORE_NAME 1 (sum) 11 15 LOAD_NAME 2 (__name__) 18 LOAD_CONST 2 ('__main__') 21 COMPARE_OP 2 (==) 24 POP_JUMP_IF_FALSE 49 12 27 LOAD_NAME 3 (print) 30 LOAD_NAME 1 (sum) 33 LOAD_CONST 3 (15) 36 LOAD_CONST 4 (4) 39 CALL_FUNCTION 2 42 CALL_FUNCTION 1 45 POP_TOP 46 JUMP_FORWARD 0 (to 49) >> 49 LOAD_CONST 5 (None) 52 RETURN_VALUE
def STORE_NAME(self, namei): name = self.f_code.co_names[namei] self.f_locals[name] = self.stack.pop()
def LOAD_NAME(self, namei): name = self.f_code.co_names[namei] if name in self.f_locals: val = self.f_locals[name] elif name in self.f_globals: val = self.f_globals[name] elif name in self.f_builtins: val = self.f_builtins[name] else: raise NameError("name '{}' is not defined".format(name)) self.stack.append(val)
def MAKE_FUNCTION(self, argc): # XXX Doesn't handle default argument values cobj = self.stack.pop() func = types.FunctionType(cobj, self.f_globals) self.stack.append(func)
Re-using CPython's function type
Notice globals dict
__doc__, __name__, __module__, __defaults__, __code__, __globals__, __dict__, __closure__, __annotations__, __kwdefaults__
def CALL_FUNCTION(self, argc): # XXX Doesn't handle default values or keyword args nkwargs = argc >> 8 nargs = argc & 0xff assert nkwargs == 0 args = [self.stack.pop() for i in range(nargs)] args.reverse() func = self.stack.pop() if hasattr(func, '__code__'): frame = Frame(self.stack, func.__code__, prev_frame=self, globals=func.__globals__) for i, arg in enumerate(args): frame.f_locals[frame.f_code.co_varnames[i]] = arg frame.eval() else: ret = func.__call__(*args) # assume builtin function self.stack.append(ret)
def COMPARE_OP(self, opname): op = dis.cmp_op[opname] arg0 = self.stack.pop() arg1 = self.stack.pop() source = '__crude_hack_arg0 {} __crude_hack_arg1'.format(op) locals = copy.copy(self.f_locals) assert '__crude_hack_arg0' not in locals assert '__crude_hack_arg1' not in locals locals['__crude_hack_arg0'] = arg0 locals['__crude_hack_arg1'] = arg1 res = eval(source, self.f_globals, locals) self.stack.append(res)
def POP_JUMP_IF_FALSE(self, target): if self.stack.pop(): self.f_lasti = target - 3
-3 as eval loop will add 3:
... if opcode >= dis.HAVE_ARGUMENT: arg = co_code[self.f_lasti+1] + (co_code[self.f_lasti+2] << 8) meth(arg) self.f_lasti += 3 ...
def POP_TOP(self): self.stack.pop()
def JUMP_FORWARD(self, delta): self.f_lasti += delta
if delta == 0: advance as normal
vm = VirtualMachine() module = vm.run_pyc_as_main('example.py') assert 'sum' in module.__dict__
$ python3 vm.py 42
If opcode arg >= 2**16
Prefix to any opcode >= HAVE_ARGUMENT
Max arg now 2**32 - 1
This applies to:
i.e pretty rare
Questions?
Floris Bruynooghe
@flubdevork
Table of Contents | t |
---|---|
Exposé | ESC |
Full screen slides | e |
Presenter View | p |
Source Files | s |
Slide Numbers | n |
Toggle screen blanking | b |
Show/hide slide context | c |
Notes | 2 |
Help | h |