Build list for simhash lazily to save memory.

This commit is contained in:
Michael Herzberg 2018-08-17 15:20:00 +01:00
parent e492f516ac
commit 873c249504
1 changed files with 1 additions and 1 deletions

View File

@ -56,7 +56,7 @@ def get_features(s):
width = 3
s = s.lower()
s = re.sub(r'[^\w]+', '', s)
return [s[i:i + width] for i in range(max(len(s) - width + 1, 1))]
return (s[i:i + width] for i in range(max(len(s) - width + 1, 1)))
def get_simhash(encoding, data):