Benchy, python framework for performance benchmarking of Python Scripts

Benchy
Lightweight performing benchmark framework for
Python scripts
Marcel Caraciolo
@marcelcaraciolo
Developer, Cientist, contributor to the Crab recsys project,
works with Python for 6 years, interested at mobile,
education, machine learning and dataaaaa!
Recife, Brazil - http://aimotion.blogspot.com

About me
Co-founder of Crab - Python recsys library
Cientist Chief at Atepassar, e-learning social network
Co-Founder and Instructor of PyCursos, teaching Python on-line
Co-Founder of Pingmind, on-line infrastructure for MOOC’s
Interested at Python, mobile, e-learning and machine learning!

Solutions ?
In
[1]:
def
f(x):

...:

return
x*x

...:

In
[2]:
%timeit
for
x
in
range
(100):
f(x)
100000
loops,
best
of
3:
20.3
us

per
loop

Stop. Help is near
Performance benchmarks to compare several python code
alternatives
Generates graphs using matplotlib
Memory consumption, Performance timing available
https://github.com/python-recsys/benchy

Writing benchmarks
$
easy_install
-‐U
benchy

#
pip
install
-‐U
benchy

Writing benchmarks
from
benchy.api
import
Benchmark
common_setup
=
""
statement
=
"lst
=
['i'
for
x
in
range(100000)]"
benchmark1
=
Benchmark(statement,
common_setup,
name=
"range")
statement
=
"lst
=
['i'
for
x
in
xrange(100000)]"
benchmark2
=
common_setup,
name=
"xrange")
statement
=
"lst
=
['i']
*
100000"
benchmark3
=
common_setup,
name=
"range")

Use them in your
workﬂow
[1]:
print
benchmark1.run()
{'memory':
{'repeat':
3,

'success':
True,

'units':
'MB',

'usage':
2.97265625},

'runtime':
{'loops':
100,

'repeat':
3,

'success':
True,

'timing':
7.5653696060180664,

'units':
'ms'}}
Same code as %timeit
and %memit

Beautiful reports
rst_text
=
benchmark1.to_rst(results)

Benchmark suite
from
benchy.api
import
BenchmarkSuite
suite
=
BenchmarkSuite()
suite.append(benchmark1)

Run the benchmarks
from
benchy.api
import
BenchmarkRunner
runner
=
BenchmarkRunner(benchmarks=suite,
tmp_dir='.',

name=
'List
Allocation
Benchmark')
n_benchs,
results
=
runner.run()

Who is the faster ?
{Benchmark('list
with
"*"'):

{'runtime':
{'timing':
0.47582697868347168,
'repeat':
3,
'success':
True,
'loops':
1000,

'timeBaselines':
1.0,
'units':
'ms'},

'memory':
{'usage':
0.3828125,
'units':
'MB',
'repeat':
3,
'success':
True}},
Benchmark('list
with
xrange'):

{'runtime':
{'timing':
5.623779296875,
'repeat':
3,
'success':
True,
'loops':
100,

'timeBaselines':
11.818958463504936,
'units':
'ms'},

'memory':
{'usage':
0.71484375,
'units':
'MB',
'repeat':
3,
'success':
True}},
Benchmark('list
with
range'):
{

'runtime':
{'timing':
6.5933513641357422,
'repeat':
3,
'success':
True,
'loops':
100,

'timeBaselines':
13.856615239384636,
'units':
'ms'},

'memory':
{'usage':
2.2109375,
'units':
'MB',
'repeat':
3,
'success':
True}}}

Plot relative
fig
=
runner.plot_relative(results,
horizontal=True)
plt.savefig('%s_r.png'
%
runner.name,
bbox_inches='tight')

Plot absolute
runner.plot_absolute(results,
horizontal=False)
plt.savefig('%s.png'
%
runner.name)
#
bbox_inches='tight')

Full report
rst_text
=
runner.to_rst(results,
runner.name
+
'png',

runner.name
+
'_r.png')
with
open('teste.rst',
'w')
as
f:

f.write(rst_text)

Why ?
Benchmark pairwise functions at Crab recsys library
http://aimotion.blogspot.com.br/2013/03/performing-runtime-benchmarks-with.html

Get involved
Create the benchmarks as TestCases
Check automatically for benchmark ﬁles and run like %nose.test()
More setup and teardown control
Group benchmarks at the same graph

Improvements
Added Database Handler
Added Git Support
Added New Runner
Run benchmarks

db.py
import
sqlite3

class
BenchmarkDb(object):

"""

Persistence
handler
for
bechmark
results

"""

def
_create_tables(self):

self._cursor.execute("drop
table
if
exists
benchmarksuites")

table
if
exists
benchmarks")

table
if
exists
results")

...

self._cursor.execute('CREATE
TABLE

benchmarks(checksum
text
PRIMARY
KEY,

name
text,
description
text,
suite_id
integer,

FOREIGN
KEY(suite_id)
REFERENCES
benchmarksuites(id))')

self._cursor.execute('CREATE
TABLE
results(id
integer

PRIMARY
KEY
AUTOINCREMENT,
checksum
text,

timestamp
timestamp,
ncalls
text,
timing
float,
traceback
text,

FOREIGN
KEY(checksum)
REFERENCES
benchmarks(checksum))')

self._con.commit()

def
write_benchmark(self,
bm,
suite=None):

if
suite
is
not
None:

self._cursor.execute('SELECT
id
FROM
benchmarksuites

where
name
=
"%s"'
%
suite.name)

row
=
self._cursor.fetchone()

else:

row
=
None

if
row
==
None:

self._cursor.execute('INSERT
INTO
benchmarks
VALUES
(?,
?,
?,
?)',

(bm.checksum,
bm.name,
bm.description,
None))

else:

self._cursor.execute('INSERT
INTO
benchmarks
VALUES
(?,
?,
?,
?)',

(bm.checksum,
bm.name,
bm.description,
row[0]))

Git Repo
class
GitRepository(Repository):

"""

Read
some
basic
statistics
about
a
git
repository

"""

def
__init__(self,
repo_path):

self.repo_path
=
repo_path

self.git
=
_git_command(self.repo_path)

(self.shas,
self.messages,

self.timestamps,
self.authors)
=
self._parse_commit_log()
[('d87fdf2', datetime.datetime(2013, 3, 22, 16, 55, 38)), ('a90a449', datetime.datetime(2013, 3, 22, 16, 54, 36)),
('fe66a86', datetime.datetime(2013, 3, 22, 16, 51, 2)), ('bea6b21', datetime.datetime(2013, 3, 22, 13, 14, 22)),
('bde5e63', datetime.datetime(2013, 3, 22, 5, 2, 56)), ('89634f6', datetime.datetime(2013, 3, 20, 4, 16, 19))]

Git Repo
class
BenchmarkRepository(object):

"""

Manage
an
isolated
copy
of
a
repository
for
benchmarking

"""

...

def
_copy_repo(self):

if
os.path.exists(self.target_dir):

print
'Deleting
%s
first'
%
self.target_dir

#
response
=
raw_input('%s
exists,
delete?
y/n'
%
self.target_dir)

#
if
response
==
'n':

#

raise
Exception('foo')

cmd
=
'rm
-‐rf
%s'
%
self.target_dir

print
cmd

os.system(cmd)

self._clone(self.target_dir_tmp,
self.target_dir)

self._prep()

self._copy_benchmark_scripts_and_deps()

def
_clone(self,
source,
target):

cmd
=
'git
clone
%s
%s'
%
(source,
target)

print
cmd

os.system(cmd)

def
_copy_benchmark_scripts_and_deps(self):

pth,
_
=
os.path.split(os.path.abspath(__file__))

deps
=
[os.path.join(pth,
'run_benchmarks.py')]

if
self.dependencies
is
not
None:

deps.extend(self.dependencies)

for
dep
in
deps:

cmd
=
'cp
%s
%s'
%
(dep,
self.target_dir)

print
cmd

proc
=
subprocess.Popen(cmd,
shell=True)

proc.wait()

New Runner

class
BenchmarkGitRunner(BenchmarkRunner):

...

def
_register_benchmarks(self):

ex_benchmarks
=
self.db.get_benchmarks()

db_checksums
=
set(ex_benchmarks.index)

for
bm
in
self.benchmarks:

if
bm.checksum
in
db_checksums:

self.db.update_name(bm)

else:

print
'Writing
new
benchmark
%s,
%s'
%
(bm.name,

bm.checksum)

self.db.write_benchmark(bm)

New runner

class

...

def
_run_revision(self,
rev):

need_to_run
=
self._get_benchmarks_for_rev(rev)

if
not
need_to_run:

print
'No
benchmarks
need
running
at
%s'
%
rev

return
0,
{}

print
'Running
%d
benchmarks
for
revision
%s'
%
(len(need_to_run),
rev)

for
bm
in
need_to_run:

print
bm.name

self.bench_repo.switch_to_revision(rev)

pickle_path
=
os.path.join(self.tmp_dir,
'benchmarks.pickle')

results_path
=
os.path.join(self.tmp_dir,
'results.pickle')

if
os.path.exists(results_path):

os.remove(results_path)

pickle.dump(need_to_run,
open(pickle_path,
'w'))

#
run
the
process

cmd
=
'python
%s/run_benchmarks.py
%s
%s'
%
(pickle_path,
results_path)

print
cmd

proc
=
stdout=subprocess.PIPE,

stderr=subprocess.PIPE,

shell=True,

cwd=self.tmp_dir)

stdout,
stderr
=
proc.communicate()

New runner

class

...

def
_run_revision(self,
rev):

need_to_run
=
self._get_benchmarks_for_rev(rev)

if
not
need_to_run:

print
'No
benchmarks
need
running
at
%s'
%
rev

return
0,
{}

print
'Running
%d
benchmarks
for
revision
%s'
%
(len(need_to_run),
rev)

for
bm
in
need_to_run:

print
bm.name

self.bench_repo.switch_to_revision(rev)

#
run
the
process

cmd
=
'python
%s/run_benchmarks.py
%s
%s'
%
(pickle_path,
results_path)

print
cmd

proc
=
stdout=subprocess.PIPE,

stderr=subprocess.PIPE,

shell=True,

cwd=self.tmp_dir)

stdout,
stderr
=
proc.communicate()

if
stderr:

if
("object
has
no
attribute"
in
stderr
or

'ImportError'
in
stderr):

print
stderr

print
'HARD
CLEANING!'

self.bench_repo.hard_clean()

print
stderr

if
not
os.path.exists(results_path):

print
'Failed
for
revision
%s'
%
rev

return
len(need_to_run),
{}

results
=
pickle.load(open(results_path,
'r'))

Running
from
benchmark
import
Benchmark,
BenchmarkRepository,
BenchmarkGitRunner
try:

REPO_PATH
=
config.get('setup',
'repo_path')

REPO_URL
=
config.get('setup',
'repo_url')

DB_PATH
=
config.get('setup',
'db_path')

TMP_DIR
=
config.get('setup',
'tmp_dir')
except:

REPO_PATH
=
os.path.abspath(os.path.join(os.path.dirname(__file__),

"../"))

REPO_URL
=
'git@github.com:python-‐recsys/crab.git'

DB_PATH
=
os.path.join(REPO_PATH,
'suite/benchmarks.db')

TMP_DIR
=
os.path.join(HOME,
'tmp/base_benchy/')
PREPARE
=
"""
python
setup.py
clean
"""
BUILD
=
"""
python
setup.py
build_ext
-‐-‐inplace
"""
repo
=
BenchmarkRepository(REPO_PATH,
REPO_URL,
DB_PATH,
TMP_DIR)

Running

common_setup
=
"""

import
numpy

from
crab.metrics
import
cosine_distances

X
=
numpy.random.uniform(1,5,(1000,))

"""

bench
=
setup_bk1,
name="Crab

Cosine")

suite
=
BenchmarkSuite()

suite.append(bench)

statement
=
"cosine_distances(X,
X)"

runner
=
BenchmarkGitRunner(suite,
'.',
'Absolute

timing
in
ms')

n_benchs,
results
=
runner.run()

runner.plot_history(results)

plt.show()

Improvements
Historical commits from version control now
benchmarked

Working now:
Module detection
by_module
=
{}
benchmarks
=
[]
modules
=
['metrics',

'recommenders',

'similarities']
for
modname
in
modules:

ref
=
__import__(modname)

by_module[modname]
=
[v
for
v
in
ref.__dict__.values()

if
isinstance(v,
Benchmark)]

benchmarks.extend(by_module[modname])
for
bm
in
benchmarks:

assert(bm.name
is
not
None)

https://github.com/python-recsys/benchy
Forks and pull requests are welcomed!

Benchy, python framework for performance benchmarking of Python Scripts

Recommandé

Recommandé

Contenu connexe

Tendances

Tendances (20)

En vedette

En vedette (20)

Similaire à Benchy, python framework for performance benchmarking of Python Scripts

Similaire à Benchy, python framework for performance benchmarking of Python Scripts (20)

Plus de Marcel Caraciolo

Plus de Marcel Caraciolo (20)

Dernier

Dernier (20)

Benchy, python framework for performance benchmarking of Python Scripts