SlideShare a Scribd company logo
Byterun:
A (C)Python interpreter in Python
Allison Kaptur
!
github.com/akaptur
akaptur.github.io
@akaptur
Byterun
Ned Batchelder
!
Based on
# pyvm2 by Paul Swartz (z3p)
from http://www.twistedmatrix.com/users/z3p/
Why would you do such a thing
>>> if a or b:
... do_stuff()
Some things we can do
out = ""
for i in range(5):
out = out + str(i)
print(out)
Some things we can do
def fn(a, b=17, c="Hello", d=[]):
d.append(99)
print(a, b, c, d)
!
fn(1)
fn(2, 3)
fn(3, c="Bye")
fn(4, d=["What?"])
fn(5, "b", "c")
Some things we can do
def verbose(func):
def _wrapper(*args, **kwargs):
return func(*args, **kwargs)
return _wrapper
!
@verbose
def add(x, y):
return x+y
!
add(7, 3)
Some things we can do
try:
raise ValueError("oops")
except ValueError as e:
print("Caught: %s" % e)
print("All done")
Some things we can do
class NullContext(object):
def __enter__(self):
l.append('i')
return self
!
def __exit__(self, exc_type, exc_val, exc_tb):
l.append('o')
return False
!
l = []
for i in range(3):
with NullContext():
l.append('w')
if i % 2:
break
l.append('z')
l.append('e')
!
l.append('r')
s = ''.join(l)
print("Look: %r" % s)
assert s == "iwzoeiwor"
Some things we can do
g = (x*x for x in range(3))
print(list(g))
A problem
g = (x*x for x in range(5))
h = (y+1 for y in g)
print(list(h))
The Python virtual machine:
!
A bytecode interpreter
Bytecode:
the internal representation of a python
program in the interpreter
Bytecode: it’s bytes!
>>> def mod(a, b):
... ans = a % b
... return ans
Bytecode: it’s bytes!
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod.func_code.co_code
Function Code
object
Bytecode
Bytecode: it’s bytes!
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod.func_code.co_code
'|x00x00|x01x00x16}x02x00|x02x00S'
Bytecode: it’s bytes!
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod.func_code.co_code
‘|x00x00|x01x00x16}x02x00|x02x00S'
>>> [ord(b) for b in mod.func_code.co_code]
[124, 0, 0, 124, 1, 0, 22, 125, 2, 0, 124, 2, 0, 83]
dis, a bytecode disassembler
>>> import dis
>>> dis.dis(mod)
2 0 LOAD_FAST 0 (a)
3 LOAD_FAST 1 (b)
6 BINARY_MODULO
7 STORE_FAST 2 (ans)
!
3 10 LOAD_FAST 2 (ans)
13 RETURN_VALUE
dis, a bytecode disassembler
>>> import dis
>>> dis.dis(mod)
2 0 LOAD_FAST 0 (a)
3 LOAD_FAST 1 (b)
6 BINARY_MODULO
7 STORE_FAST 2 (ans)
!
3 10 LOAD_FAST 2 (ans)
13 RETURN_VALUE
Line
Number
Index in
bytecode
Instruction
name, for
humans
More bytes, the
argument to each
instruction
Hint about
arguments
whatever
some other thing
something
whatever
some other thing
something
a
b
whatever
some other thing
something
ans
Before
After
BINARY_MODULO
After
LOAD_FAST
Data stack on a frame
def foo():
x = 1
def bar(y):
z = y + 2 # <--- (3)
return z
return bar(x) # <--- (2)
foo() # <--- (1)
!
c ---------------------
a | bar Frame | -> blocks: []
l | (newest) | -> data: [1, 2]
l ---------------------
| foo Frame | -> blocks: []
s | | -> data: [<foo.<lcl>.bar, 1]
t ---------------------
a | main (module) Frame | -> blocks: []
c | (oldest) | -> data: [<foo>]
k ---------------------
dis, a bytecode disassembler
>>> import dis
>>> dis.dis(mod)
2 0 LOAD_FAST 0 (a)
3 LOAD_FAST 1 (b)
6 BINARY_MODULO
7 STORE_FAST 2 (ans)
!
3 10 LOAD_FAST 2 (ans)
13 RETURN_VALUE
Byterun, a Python bytecode interpreter - Allison Kaptur at NYCPython
} /*switch*/
/* Main switch on opcode
*/
READ_TIMESTAMP(inst0);
!
switch (opcode) {
#ifdef CASE_TOO_BIG
default: switch (opcode) {
#endif
/* Turn this on if your compiler chokes on the big switch: */
/* #define CASE_TOO_BIG 1 */
Back to that bytecode
!
>>> dis.dis(mod)
2 0 LOAD_FAST 0 (a)
3 LOAD_FAST 1 (b)
6 BINARY_MODULO
7 STORE_FAST 2 (ans)
!
3 10 LOAD_FAST 2
(ans)
13 RETURN_VALUE
case LOAD_FAST:
x = GETLOCAL(oparg);
if (x != NULL) {
Py_INCREF(x);
PUSH(x);
goto fast_next_opcode;
}
format_exc_check_arg(PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(co->co_varnames, oparg));
break;
case BINARY_MODULO:
w = POP();
v = TOP();
if (PyString_CheckExact(v))
x = PyString_Format(v, w);
else
x = PyNumber_Remainder(v, w);
Py_DECREF(v);
Py_DECREF(w);
SET_TOP(x);
if (x != NULL) continue;
break;
It’s “dynamic”
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod(15, 4)
3
“Dynamic”
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod(15, 4)
3
>>> mod(“%s%s”, (“NYC”, “Python”))
“Dynamic”
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod(15, 4)
3
>>> mod(“%s %s”, (“NYC”, “Python”))
NYC Python
“Dynamic”
>>> def mod(a, b):
... ans = a % b
... return ans
>>> mod(15, 4)
3
>>> mod(“%s %s”, (“NYC”, “Python”))
NYC Python
>>> print “%s %s” % (“NYC”, “Python”)
NYC Python
case BINARY_MODULO:
w = POP();
v = TOP();
if (PyString_CheckExact(v))
x = PyString_Format(v, w);
else
x = PyNumber_Remainder(v, w);
Py_DECREF(v);
Py_DECREF(w);
SET_TOP(x);
if (x != NULL) continue;
break;
>>> class Surprising(object):
… def __mod__(self, other):
… print “Surprise!”
!
>>> s = Surprising()
>>> t = Surprsing()
>>> s % t
Surprise!
“In the general absence of type information, almost
every instruction must be treated as
INVOKE_ARBITRARY_METHOD.”
!
- Russell Power and Alex Rubinsteyn, “How Fast Can
We Make Interpreted Python?”
Back to our problem
g = (x*x for x in range(5))
h = (y+1 for y in g)
print(list(h))
def foo():
x = 1
def bar(y):
z = y + 2 # <--- (3)
return z
return bar(x) # <--- (2)
foo() # <--- (1)
!
c ---------------------
a | bar Frame | -> blocks: []
l | (newest) | -> data: [1, 2]
l ---------------------
| foo Frame | -> blocks: []
s | | -> data: [<foo.<lcl>.bar, 1]
t ---------------------
a | main (module) Frame | -> blocks: []
c | (oldest) | -> data: [<foo>]
k ---------------------
def foo():
x = 1
def bar(y):
z = y + 2 # <--- (3)
return z
return bar(x) # <--- (2)
foo() # <--- (1)
!
!
!
l ---------------------
| foo Frame | -> blocks: []
s | | -> data: [3]
t ---------------------
a | main (module) Frame | -> blocks: []
c | (oldest) | -> data: [<foo>]
k ---------------------
def foo():
x = 1
def bar(y):
z = y + 2 # <--- (3)
return z
return bar(x) # <--- (2)
foo() # <--- (1)
!
!
s
t ---------------------
a | main (module) Frame | -> blocks: []
c | (oldest) | -> data: [3]
k ---------------------
Back to our problem
g = (x*x for x in range(5))
h = (y+1 for y in g)
print(list(h))
More
Great blogs
http://tech.blog.aknin.name/category/my-projects/
pythons-innards/ by @aknin
http://eli.thegreenplace.net/ by Eli Bendersky
!
Contribute! Find bugs!
https://github.com/nedbat/byterun
!
Apply to Hacker School!
www.hackerschool.com/apply

More Related Content

PDF
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
PDF
Diving into byte code optimization in python
PDF
Bytes in the Machine: Inside the CPython interpreter
PDF
"A 1,500 line (!!) switch statement powers your Python!" - Allison Kaptur, !!...
PDF
Exploring slides
PDF
Python opcodes
PPTX
TCO in Python via bytecode manipulation.
PDF
All I know about rsc.io/c2go
Allison Kaptur: Bytes in the Machine: Inside the CPython interpreter, PyGotha...
Diving into byte code optimization in python
Bytes in the Machine: Inside the CPython interpreter
"A 1,500 line (!!) switch statement powers your Python!" - Allison Kaptur, !!...
Exploring slides
Python opcodes
TCO in Python via bytecode manipulation.
All I know about rsc.io/c2go

What's hot (20)

PDF
Implementing Software Machines in C and Go
PPT
Python легко и просто. Красиво решаем повседневные задачи
PDF
Faster Python, FOSDEM
PDF
Introducción a Elixir
PDF
Go a crash course
DOCX
Wap to implement bitwise operators
PDF
PDF
Implementing Software Machines in Go and C
PDF
Functional Programming inside OOP? It’s possible with Python
PPTX
Load-time Hacking using LD_PRELOAD
PDF
Playing 44CON CTF for fun and profit
ODP
The secrets of inverse brogramming
PPT
Whats new in_csharp4
PDF
Phil Bartie QGIS PLPython
PDF
«Отладка в Python 3.6: Быстрее, Выше, Сильнее» Елизавета Шашкова, JetBrains
PDF
Metarhia KievJS 22-Feb-2018
PDF
When RV Meets CEP (RV 2016 Tutorial)
PDF
Let's golang
KEY
JavaScript @ CTK
PDF
Are we ready to Go?
Implementing Software Machines in C and Go
Python легко и просто. Красиво решаем повседневные задачи
Faster Python, FOSDEM
Introducción a Elixir
Go a crash course
Wap to implement bitwise operators
Implementing Software Machines in Go and C
Functional Programming inside OOP? It’s possible with Python
Load-time Hacking using LD_PRELOAD
Playing 44CON CTF for fun and profit
The secrets of inverse brogramming
Whats new in_csharp4
Phil Bartie QGIS PLPython
«Отладка в Python 3.6: Быстрее, Выше, Сильнее» Елизавета Шашкова, JetBrains
Metarhia KievJS 22-Feb-2018
When RV Meets CEP (RV 2016 Tutorial)
Let's golang
JavaScript @ CTK
Are we ready to Go?
Ad

Similar to Byterun, a Python bytecode interpreter - Allison Kaptur at NYCPython (20)

PDF
Building Interpreters with PyPy
PPT
python within 50 page .ppt
PDF
Understanding PyPy - PyConEs 14
PDF
Cluj.py Meetup: Extending Python in C
PPT
FALLSEM2022-23_ITA3007_ETH_VL2022230100613_Reference_Material_I_23-09-2022_py...
PDF
Python introduction
PDF
Notes about moving from python to c++ py contw 2020
PDF
PyPy's approach to construct domain-specific language runtime
PDF
Tutorial on-python-programming
PDF
Intro to Python
PPTX
Chapter 2 Python Language Basics, IPython.pptx
PPT
C463_02_python.ppt
PPT
C463_02_python.ppt
PPTX
An Introduction : Python
PPT
Python Kick Start
PDF
What is python
PPTX
Introduction to Programming.pptx ok ok ok
PDF
web programming UNIT VIII python by Bhavsingh Maloth
PPT
PYTHON
PDF
From 0 to mine sweeper in pyside
Building Interpreters with PyPy
python within 50 page .ppt
Understanding PyPy - PyConEs 14
Cluj.py Meetup: Extending Python in C
FALLSEM2022-23_ITA3007_ETH_VL2022230100613_Reference_Material_I_23-09-2022_py...
Python introduction
Notes about moving from python to c++ py contw 2020
PyPy's approach to construct domain-specific language runtime
Tutorial on-python-programming
Intro to Python
Chapter 2 Python Language Basics, IPython.pptx
C463_02_python.ppt
C463_02_python.ppt
An Introduction : Python
Python Kick Start
What is python
Introduction to Programming.pptx ok ok ok
web programming UNIT VIII python by Bhavsingh Maloth
PYTHON
From 0 to mine sweeper in pyside
Ad

Recently uploaded (20)

PPTX
FINAL REVIEW FOR COPD DIANOSIS FOR PULMONARY DISEASE.pptx
PDF
PRIZ Academy - 9 Windows Thinking Where to Invest Today to Win Tomorrow.pdf
PPTX
Simulation of electric circuit laws using tinkercad.pptx
PPTX
Sustainable Sites - Green Building Construction
PPTX
Internet of Things (IOT) - A guide to understanding
PPTX
MET 305 MODULE 1 KTU 2019 SCHEME 25.pptx
PDF
Queuing formulas to evaluate throughputs and servers
PPTX
Strings in CPP - Strings in C++ are sequences of characters used to store and...
PDF
Arduino robotics embedded978-1-4302-3184-4.pdf
PPTX
UNIT-1 - COAL BASED THERMAL POWER PLANTS
PDF
July 2025 - Top 10 Read Articles in International Journal of Software Enginee...
PPTX
bas. eng. economics group 4 presentation 1.pptx
PDF
Evaluating the Democratization of the Turkish Armed Forces from a Normative P...
PPTX
MCN 401 KTU-2019-PPE KITS-MODULE 2.pptx
PPTX
Unit 5 BSP.pptxytrrftyyydfyujfttyczcgvcd
PPTX
Lesson 3_Tessellation.pptx finite Mathematics
PPTX
Geodesy 1.pptx...............................................
PPT
Drone Technology Electronics components_1
PPTX
CYBER-CRIMES AND SECURITY A guide to understanding
DOCX
ASol_English-Language-Literature-Set-1-27-02-2023-converted.docx
FINAL REVIEW FOR COPD DIANOSIS FOR PULMONARY DISEASE.pptx
PRIZ Academy - 9 Windows Thinking Where to Invest Today to Win Tomorrow.pdf
Simulation of electric circuit laws using tinkercad.pptx
Sustainable Sites - Green Building Construction
Internet of Things (IOT) - A guide to understanding
MET 305 MODULE 1 KTU 2019 SCHEME 25.pptx
Queuing formulas to evaluate throughputs and servers
Strings in CPP - Strings in C++ are sequences of characters used to store and...
Arduino robotics embedded978-1-4302-3184-4.pdf
UNIT-1 - COAL BASED THERMAL POWER PLANTS
July 2025 - Top 10 Read Articles in International Journal of Software Enginee...
bas. eng. economics group 4 presentation 1.pptx
Evaluating the Democratization of the Turkish Armed Forces from a Normative P...
MCN 401 KTU-2019-PPE KITS-MODULE 2.pptx
Unit 5 BSP.pptxytrrftyyydfyujfttyczcgvcd
Lesson 3_Tessellation.pptx finite Mathematics
Geodesy 1.pptx...............................................
Drone Technology Electronics components_1
CYBER-CRIMES AND SECURITY A guide to understanding
ASol_English-Language-Literature-Set-1-27-02-2023-converted.docx

Byterun, a Python bytecode interpreter - Allison Kaptur at NYCPython

  • 1. Byterun: A (C)Python interpreter in Python Allison Kaptur ! github.com/akaptur akaptur.github.io @akaptur
  • 2. Byterun Ned Batchelder ! Based on # pyvm2 by Paul Swartz (z3p) from http://www.twistedmatrix.com/users/z3p/
  • 3. Why would you do such a thing >>> if a or b: ... do_stuff()
  • 4. Some things we can do out = "" for i in range(5): out = out + str(i) print(out)
  • 5. Some things we can do def fn(a, b=17, c="Hello", d=[]): d.append(99) print(a, b, c, d) ! fn(1) fn(2, 3) fn(3, c="Bye") fn(4, d=["What?"]) fn(5, "b", "c")
  • 6. Some things we can do def verbose(func): def _wrapper(*args, **kwargs): return func(*args, **kwargs) return _wrapper ! @verbose def add(x, y): return x+y ! add(7, 3)
  • 7. Some things we can do try: raise ValueError("oops") except ValueError as e: print("Caught: %s" % e) print("All done")
  • 8. Some things we can do class NullContext(object): def __enter__(self): l.append('i') return self ! def __exit__(self, exc_type, exc_val, exc_tb): l.append('o') return False ! l = [] for i in range(3): with NullContext(): l.append('w') if i % 2: break l.append('z') l.append('e') ! l.append('r') s = ''.join(l) print("Look: %r" % s) assert s == "iwzoeiwor"
  • 9. Some things we can do g = (x*x for x in range(3)) print(list(g))
  • 10. A problem g = (x*x for x in range(5)) h = (y+1 for y in g) print(list(h))
  • 11. The Python virtual machine: ! A bytecode interpreter
  • 12. Bytecode: the internal representation of a python program in the interpreter
  • 13. Bytecode: it’s bytes! >>> def mod(a, b): ... ans = a % b ... return ans
  • 14. Bytecode: it’s bytes! >>> def mod(a, b): ... ans = a % b ... return ans >>> mod.func_code.co_code Function Code object Bytecode
  • 15. Bytecode: it’s bytes! >>> def mod(a, b): ... ans = a % b ... return ans >>> mod.func_code.co_code '|x00x00|x01x00x16}x02x00|x02x00S'
  • 16. Bytecode: it’s bytes! >>> def mod(a, b): ... ans = a % b ... return ans >>> mod.func_code.co_code ‘|x00x00|x01x00x16}x02x00|x02x00S' >>> [ord(b) for b in mod.func_code.co_code] [124, 0, 0, 124, 1, 0, 22, 125, 2, 0, 124, 2, 0, 83]
  • 17. dis, a bytecode disassembler >>> import dis >>> dis.dis(mod) 2 0 LOAD_FAST 0 (a) 3 LOAD_FAST 1 (b) 6 BINARY_MODULO 7 STORE_FAST 2 (ans) ! 3 10 LOAD_FAST 2 (ans) 13 RETURN_VALUE
  • 18. dis, a bytecode disassembler >>> import dis >>> dis.dis(mod) 2 0 LOAD_FAST 0 (a) 3 LOAD_FAST 1 (b) 6 BINARY_MODULO 7 STORE_FAST 2 (ans) ! 3 10 LOAD_FAST 2 (ans) 13 RETURN_VALUE Line Number Index in bytecode Instruction name, for humans More bytes, the argument to each instruction Hint about arguments
  • 19. whatever some other thing something whatever some other thing something a b whatever some other thing something ans Before After BINARY_MODULO After LOAD_FAST Data stack on a frame
  • 20. def foo(): x = 1 def bar(y): z = y + 2 # <--- (3) return z return bar(x) # <--- (2) foo() # <--- (1) ! c --------------------- a | bar Frame | -> blocks: [] l | (newest) | -> data: [1, 2] l --------------------- | foo Frame | -> blocks: [] s | | -> data: [<foo.<lcl>.bar, 1] t --------------------- a | main (module) Frame | -> blocks: [] c | (oldest) | -> data: [<foo>] k ---------------------
  • 21. dis, a bytecode disassembler >>> import dis >>> dis.dis(mod) 2 0 LOAD_FAST 0 (a) 3 LOAD_FAST 1 (b) 6 BINARY_MODULO 7 STORE_FAST 2 (ans) ! 3 10 LOAD_FAST 2 (ans) 13 RETURN_VALUE
  • 23. } /*switch*/ /* Main switch on opcode */ READ_TIMESTAMP(inst0); ! switch (opcode) {
  • 24. #ifdef CASE_TOO_BIG default: switch (opcode) { #endif /* Turn this on if your compiler chokes on the big switch: */ /* #define CASE_TOO_BIG 1 */
  • 25. Back to that bytecode ! >>> dis.dis(mod) 2 0 LOAD_FAST 0 (a) 3 LOAD_FAST 1 (b) 6 BINARY_MODULO 7 STORE_FAST 2 (ans) ! 3 10 LOAD_FAST 2 (ans) 13 RETURN_VALUE
  • 26. case LOAD_FAST: x = GETLOCAL(oparg); if (x != NULL) { Py_INCREF(x); PUSH(x); goto fast_next_opcode; } format_exc_check_arg(PyExc_UnboundLocalError, UNBOUNDLOCAL_ERROR_MSG, PyTuple_GetItem(co->co_varnames, oparg)); break;
  • 27. case BINARY_MODULO: w = POP(); v = TOP(); if (PyString_CheckExact(v)) x = PyString_Format(v, w); else x = PyNumber_Remainder(v, w); Py_DECREF(v); Py_DECREF(w); SET_TOP(x); if (x != NULL) continue; break;
  • 28. It’s “dynamic” >>> def mod(a, b): ... ans = a % b ... return ans >>> mod(15, 4) 3
  • 29. “Dynamic” >>> def mod(a, b): ... ans = a % b ... return ans >>> mod(15, 4) 3 >>> mod(“%s%s”, (“NYC”, “Python”))
  • 30. “Dynamic” >>> def mod(a, b): ... ans = a % b ... return ans >>> mod(15, 4) 3 >>> mod(“%s %s”, (“NYC”, “Python”)) NYC Python
  • 31. “Dynamic” >>> def mod(a, b): ... ans = a % b ... return ans >>> mod(15, 4) 3 >>> mod(“%s %s”, (“NYC”, “Python”)) NYC Python >>> print “%s %s” % (“NYC”, “Python”) NYC Python
  • 32. case BINARY_MODULO: w = POP(); v = TOP(); if (PyString_CheckExact(v)) x = PyString_Format(v, w); else x = PyNumber_Remainder(v, w); Py_DECREF(v); Py_DECREF(w); SET_TOP(x); if (x != NULL) continue; break;
  • 33. >>> class Surprising(object): … def __mod__(self, other): … print “Surprise!” ! >>> s = Surprising() >>> t = Surprsing() >>> s % t Surprise!
  • 34. “In the general absence of type information, almost every instruction must be treated as INVOKE_ARBITRARY_METHOD.” ! - Russell Power and Alex Rubinsteyn, “How Fast Can We Make Interpreted Python?”
  • 35. Back to our problem g = (x*x for x in range(5)) h = (y+1 for y in g) print(list(h))
  • 36. def foo(): x = 1 def bar(y): z = y + 2 # <--- (3) return z return bar(x) # <--- (2) foo() # <--- (1) ! c --------------------- a | bar Frame | -> blocks: [] l | (newest) | -> data: [1, 2] l --------------------- | foo Frame | -> blocks: [] s | | -> data: [<foo.<lcl>.bar, 1] t --------------------- a | main (module) Frame | -> blocks: [] c | (oldest) | -> data: [<foo>] k ---------------------
  • 37. def foo(): x = 1 def bar(y): z = y + 2 # <--- (3) return z return bar(x) # <--- (2) foo() # <--- (1) ! ! ! l --------------------- | foo Frame | -> blocks: [] s | | -> data: [3] t --------------------- a | main (module) Frame | -> blocks: [] c | (oldest) | -> data: [<foo>] k ---------------------
  • 38. def foo(): x = 1 def bar(y): z = y + 2 # <--- (3) return z return bar(x) # <--- (2) foo() # <--- (1) ! ! s t --------------------- a | main (module) Frame | -> blocks: [] c | (oldest) | -> data: [3] k ---------------------
  • 39. Back to our problem g = (x*x for x in range(5)) h = (y+1 for y in g) print(list(h))
  • 40. More Great blogs http://tech.blog.aknin.name/category/my-projects/ pythons-innards/ by @aknin http://eli.thegreenplace.net/ by Eli Bendersky ! Contribute! Find bugs! https://github.com/nedbat/byterun ! Apply to Hacker School! www.hackerschool.com/apply