From 566130d1b350c61d940787b44f3fd3c678ded5a0 Mon Sep 17 00:00:00 2001 From: andrei Date: Thu, 20 Jul 2017 16:26:44 -0700 Subject: [PATCH] [perf] greatly increase hashmap performance UserDict was the wrong way to achieve this, modern versions of Python can just subclass dict. This provides an immense performance boost by allowing getitem/setitem calls to be routed directly to the underlying storage within cpython land, instead of having to route through the items MRO and eventually hit __dict__ --- disco/util/hashmap.py | 16 ++++++---------- tests/util/hashmap.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 10 deletions(-) create mode 100644 tests/util/hashmap.py diff --git a/disco/util/hashmap.py b/disco/util/hashmap.py index 50cf6f4..19a6b2d 100644 --- a/disco/util/hashmap.py +++ b/disco/util/hashmap.py @@ -1,21 +1,17 @@ import six -from six.moves import filter, map, UserDict +from six.moves import filter, map from collections import defaultdict -class HashMap(UserDict): +class HashMap(dict): + __slots__ = () + def iter(self): - return iter(self.data) + return iter(self) def items(self): - return six.iteritems(self.data) - - def keys(self): - return six.iterkeys(self.data) - - def values(self): - return six.itervalues(self.data) + return six.iteritems(self) def find(self, predicate): if not callable(predicate): diff --git a/tests/util/hashmap.py b/tests/util/hashmap.py new file mode 100644 index 0000000..b57da6a --- /dev/null +++ b/tests/util/hashmap.py @@ -0,0 +1,40 @@ +import pytest +import random + +from disco.util.hashmap import HashMap + + +@pytest.fixture +def hashmap(): + return HashMap({i: random.randint(1, 1000000) for i in range(100000)}) + + +def test_hashmap_insert_performance(benchmark): + def bench_hashmap_insert(hsh): + hsh[random.randint(1, 100000)] = True + + benchmark(bench_hashmap_insert, HashMap()) + + +def test_hashmap_lookup_performance(benchmark, hashmap): + def bench_hashmap_lookup(): + assert hashmap[random.randint(1, 10000)] > 0 + + benchmark(bench_hashmap_lookup) + + +def test_hashmap_find(hashmap): + assert len(list(hashmap.find(lambda v: v > 0))) == len(hashmap) + assert hashmap.find_one(lambda v: v > 0) > 0 + + +def test_hashmap_filter(hashmap): + for item in list(hashmap.filter(lambda v: v % 2 == 0)): + assert item % 2 == 0 + + +def test_hashmap_builtins(hashmap): + for item in hashmap: + assert item in hashmap + + assert hashmap.popitem()[1] > 1