diff --git a/ExtensionCrawler/archive.py b/ExtensionCrawler/archive.py
index ddea8be..1f4187d 100644
--- a/ExtensionCrawler/archive.py
+++ b/ExtensionCrawler/archive.py
@@ -23,7 +23,6 @@ import os
 import glob
 import re
 import json
-from multiprocessing import Pool
 from concurrent.futures import TimeoutError
 from pebble import ProcessPool, ProcessExpired
 from functools import partial
@@ -44,6 +43,7 @@ from ExtensionCrawler.config import (
 from ExtensionCrawler.util import google_dos_protection, value_of, log_info, log_warning, log_exception, setup_logger
 from ExtensionCrawler.db import update_db_incremental
 
+
 class Error(Exception):
     pass
 
@@ -82,11 +82,11 @@ class RequestResult:
 
 
 class UpdateResult:
-    def __init__(self, id, is_new, exception, res_overview, res_crx,
+    def __init__(self, ext_id, is_new, exception, res_overview, res_crx,
                  res_reviews, res_support, res_sql, sql_update, worker_exception=None):
-        self.id = id
+        self.ext_id = ext_id
         self.new = is_new
-        self.exception = exception # TODO: should be tar_exception
+        self.exception = exception  # TODO: should be tar_exception
         self.res_overview = res_overview
         self.res_crx = res_crx
         self.res_reviews = res_reviews
@@ -188,7 +188,7 @@ def last_modified_http_date(path):
 
 
 def last_crx(archivedir, extid, date=None):
-    last_crx = ""
+    last_crx_path = ""
     last_crx_etag = ""
 
     etag_file = os.path.join(archivedir, get_local_archive_dir(extid),
@@ -198,14 +198,13 @@ def last_crx(archivedir, extid, date=None):
             with open(etag_file, 'r') as f:
                 d = json.load(f)
                 return d["last_crx"], d["last_crx_etag"]
-        except Exception as e:
+        except Exception:
             log_exception("Something was wrong with the etag file {}, deleting it ...".format(etag_file))
             try:
                 os.remove(etag_file)
-            except Exception as e:
+            except Exception:
                 log_exception("Could not remove etag file {}!".format(etag_file))
 
-
     # If we do not yet have an .etag file present, open the tarfile and look
     # there for one. After having done that once, the crawler creates the .etag
     # file to avoid opening the tar file in the future.
@@ -219,23 +218,23 @@ def last_crx(archivedir, extid, date=None):
                     date is None or (dateutil.parser.parse(
                         os.path.split(os.path.split(x.name)[0])[1]) <= date))
             ])
-            if old_crxs != []:
-                last_crx = old_crxs[-1]
+            if old_crxs:
+                last_crx_path = old_crxs[-1]
                 headers_content = t.extractfile(
-                    last_crx + ".headers").read().decode().replace(
+                    last_crx_path + ".headers").read().decode().replace(
                         '"', '\\"').replace("'", '"')
                 headers_json = json.loads(headers_content)
                 last_crx_etag = headers_json["ETag"]
 
                 if date is None:
                     with open(etag_file, 'w') as f:
-                        json.dump({"last_crx": last_crx, "last_crx_etag": last_crx_etag}, f)
+                        json.dump({"last_crx": last_crx_path, "last_crx_etag": last_crx_etag}, f)
 
-    return last_crx, last_crx_etag
+    return last_crx_path, last_crx_etag
 
 
 def first_crx(archivedir, extid, date=None):
-    first_crx = ""
+    first_crx_path = ""
     tar = os.path.join(archivedir, get_local_archive_dir(extid),
                        extid + ".tar")
     if os.path.exists(tar):
@@ -247,10 +246,10 @@ def first_crx(archivedir, extid, date=None):
                     os.path.split(os.path.split(x.name)[0])[1])))
         ])
         t.close()
-        if old_crxs != []:
-            first_crx = old_crxs[0]
+        if old_crxs:
+            first_crx_path = old_crxs[0]
 
-    return first_crx
+    return first_crx_path
 
 
 def all_crx(archivedir, extid, date=None):
@@ -283,7 +282,7 @@ def update_overview(tar, date, ext_id):
 
 def validate_crx_response(res, extid, extfilename):
     regex_extfilename = re.compile(r'^extension[_0-9]+\.crx$')
-    if not 'Content-Type' in res.headers:
+    if 'Content-Type' not in res.headers:
         raise CrawlError(extid, 'Did not find Content-Type header.', '\n'.join(
             res.iter_lines()))
     if not res.headers['Content-Type'] == 'application/x-chrome-extension':
@@ -351,10 +350,12 @@ def update_crx(archivedir, tmptardir, ext_id, date):
                         f.write(chunk)
             write_text(tmptardir, date, extfilename + ".etag",
                        res.headers.get("ETag"))
-            etag_file = os.path.join(archivedir, get_local_archive_dir(ext_id),
-                                    ext_id + ".etag")
+            etag_file = os.path.join(archivedir, get_local_archive_dir(ext_id), ext_id + ".etag")
             with open(etag_file, 'w') as f:
-                json.dump({"last_crx": os.path.join(ext_id, date, extfilename), "last_crx_etag": res.headers.get("ETag")}, f)
+                json.dump({
+                              "last_crx": os.path.join(ext_id, date, extfilename),
+                              "last_crx_etag": res.headers.get("ETag")
+                          }, f)
     except Exception as e:
         log_exception("Exception when updating crx", 3, ext_id)
         write_text(tmptardir, date, extfilename + ".exception",
@@ -367,9 +368,10 @@ def iterate_authors(pages):
     for page in pages:
         json_page = json.loads(page[page.index("{\""):page.rindex("}}},") + 1])
         for annotation in json_page["annotations"]:
-            if "attributes" in annotation and "replyExists" in annotation["attributes"] and annotation["attributes"]["replyExists"]:
-                yield (annotation["entity"]["author"],
-                       annotation["entity"]["groups"])
+            if "attributes" in annotation:
+                if "replyExists" in annotation["attributes"]:
+                    if annotation["attributes"]["replyExists"]:
+                        yield (annotation["entity"]["author"], annotation["entity"]["groups"])
 
 
 def update_reviews(tar, date, ext_id):
@@ -550,7 +552,7 @@ def update_extension(archivedir, forums, ext_id):
         try:
             write_text(tardir, date, ext_id + ".sql.exception",
                        traceback.format_exc())
-        except Exception as e:
+        except Exception:
             pass
     try:
         shutil.rmtree(path=tmpdir)
@@ -581,13 +583,11 @@ def init_process(verbose, start_pystuck=False):
         pystuck.run_server(port=((os.getpid() % 10000) + 10001))
 
 
-def execute_parallel(archivedir, max_retry, timeout, max_workers, ext_ids, forums, verbose, start_pystuck):
-    results=[]
-    with ProcessPool(max_workers=max_workers, max_tasks=100, initializer=init_process, initargs=(verbose, start_pystuck)) as pool:
-        future = pool.map(partial(update_extension, archivedir, forums),
-                            ext_ids,
-                            chunksize=1,
-                            timeout=timeout)
+def execute_parallel(archivedir, timeout, max_workers, ext_ids, forums, verbose, start_pystuck):
+    results = []
+    with ProcessPool(max_workers=max_workers, max_tasks=100, initializer=init_process,
+                     initargs=(verbose, start_pystuck)) as pool:
+        future = pool.map(partial(update_extension, archivedir, forums), ext_ids, chunksize=1, timeout=timeout)
         iterator = future.result()
         for ext_id in ext_ids:
             try:
@@ -595,7 +595,7 @@ def execute_parallel(archivedir, max_retry, timeout, max_workers, ext_ids, forum
             except StopIteration:
                 break
             except TimeoutError as error:
-                log_warning("WorkerException: Processing of %s took longer than %d seconds" % (ext_id,error.args[1]))
+                log_warning("WorkerException: Processing of %s took longer than %d seconds" % (ext_id, error.args[1]))
                 results.append(UpdateResult(ext_id, False, None, None, None, None, None, None, None, error))
             except ProcessExpired as error:
                 log_warning("WorkerException: %s (%s), exit code: %d" % (error, ext_id, error.exitcode))
@@ -609,8 +609,6 @@ def execute_parallel(archivedir, max_retry, timeout, max_workers, ext_ids, forum
 
 
 def update_extensions(archivedir, parallel, forums_ext_ids, ext_ids, timeout, verbose, start_pystuck):
-    ext_with_forums = []
-    ext_without_forums = []
     forums_ext_ids = (list(set(forums_ext_ids)))
 
     log_info("Updating {} extensions ({} including forums)".format(
@@ -621,13 +619,13 @@ def update_extensions(archivedir, parallel, forums_ext_ids, ext_ids, timeout, ve
     parallel_ids = ext_ids
     log_info("Updating {} extensions excluding forums (parallel)".format(
         len(parallel_ids)), 1)
-    ext_without_forums = execute_parallel(archivedir, 3, timeout, parallel, parallel_ids, False, verbose, start_pystuck)
+    ext_without_forums = execute_parallel(archivedir, timeout, parallel, parallel_ids, False, verbose, start_pystuck)
 
     # Second, update extensions with forums sequentially (and with delays) to
     # avoid running into Googles DDOS detection.
     log_info("Updating {} extensions including forums (sequentially)".format(
         len(forums_ext_ids)), 1)
-    ext_with_forums = execute_parallel(archivedir, 3, timeout, 1, forums_ext_ids, True, verbose, start_pystuck)
+    ext_with_forums = execute_parallel(archivedir, timeout, 1, forums_ext_ids, True, verbose, start_pystuck)
 
     return ext_with_forums + ext_without_forums
 
diff --git a/ExtensionCrawler/cdnjs_crawler.py b/ExtensionCrawler/cdnjs_crawler.py
index ce20a8c..b5f7f5b 100644
--- a/ExtensionCrawler/cdnjs_crawler.py
+++ b/ExtensionCrawler/cdnjs_crawler.py
@@ -169,10 +169,10 @@ def update_lib(force, archive, lib):
         outphased = []
         for lib_ver in local_lib_json['assets']:
             version = lib_ver['version']
-            if not version in cdnjs_versions:
+            if version not in cdnjs_versions:
                 logging.warning("Found outphased versions for " + name + " " +
                                 str(version) + " , preserving from archive.")
-                if not 'outphased' in lib_ver:
+                if 'outphased' not in lib_ver:
                     lib_ver[
                         'outphased'] = datetime.datetime.utcnow().isoformat()
                 outphased.append(lib_ver)
@@ -260,7 +260,7 @@ def delete_orphaned(archive, local_libs, cdnjs_current_libs):
     """Delete all orphaned local libaries."""
     dirname = os.path.join(archive, "filedb", "cdnjs", "lib")
     for lib in local_libs:
-        if not lib in cdnjs_current_libs:
+        if lib not in cdnjs_current_libs:
             os.remove(os.path.join(dirname, lib + ".json"))
 
 
diff --git a/ExtensionCrawler/cdnjs_git.py b/ExtensionCrawler/cdnjs_git.py
index bd66ad4..138555c 100644
--- a/ExtensionCrawler/cdnjs_git.py
+++ b/ExtensionCrawler/cdnjs_git.py
@@ -25,8 +25,7 @@ import logging
 import os
 import re
 import sys
-from functools import partial, reduce
-from multiprocessing import Pool
+from functools import reduce
 
 import dateutil.parser
 import git
@@ -70,8 +69,8 @@ def pull_list_changed_files(git_path):
         for diff in single_fetch_info.commit.diff(
                 single_fetch_info.old_commit):
             logging.debug("Found diff: " + str(diff))
-            if not diff.a_blob is None:
-                if not diff.a_blob.path in files:
+            if diff.a_blob is not None:
+                if diff.a_blob.path not in files:
                     files.append(diff.a_blob.path)
     return files
 
@@ -98,7 +97,7 @@ def hackish_pull_list_changed_files(git_path):
 
     for line in pull_lines:
         match = re.search(r'^ (.+) \| .*$', line)
-        if not match is None:
+        if match is not None:
             changed_files = match.group(1).split('=>')
             for changed_file in changed_files:
                 files.add(changed_file.strip())
@@ -139,6 +138,7 @@ def get_file_libinfo(release_dic, git_path, libfile):
     file_info['library'] = lib
     file_info['version'] = version
     file_info['add_date'] = release_dic[(lib, version)]
+    # TODO: why is package not used?
     package = os.path.join(
         reduce(os.path.join, plist[:idx + 1]), "package.json")
     return file_info
@@ -167,7 +167,7 @@ def get_all_lib_files(cdnjs_git_path, localpath=None):
     libvers = set()
     files = []
     versionidx = len(path_to_list(cdnjs_git_path)) + 4
-    if not localpath is None:
+    if localpath is not None:
         paths = os.path.join(cdnjs_git_path, localpath)
     else:
         paths = os.path.join(cdnjs_git_path, 'ajax/libs/**/*')
@@ -196,7 +196,7 @@ def update_database_for_file(create_csv, release_dic, cdnjs_git_path, filename,
     if os.path.isfile(filename):
         logging.info("Updating database for file " + filename)
         file_info = get_file_libinfo(release_dic, cdnjs_git_path, filename)
-        if not file_info is None:
+        if file_info is not None:
             if create_csv:
                 print(file_info['path'])
                 print(cdnjs_git_path)
@@ -268,7 +268,7 @@ def update_database_for_file_chunked(create_csv, release_dic, cdnjs_git_path,
     retries = 0
     success = False
     max_retries = 4
-    while (not success and (retries < max_retries)):
+    while not success and (retries < max_retries):
         try:
             update_database_for_file_chunked_timeout(create_csv, release_dic,
                                                      cdnjs_git_path, filenames)
@@ -305,7 +305,7 @@ def get_release_triple(git_path, libver):
     lib = plist[-2]
     date = get_add_date(git_path, libver)
     logging.info("Release information:" + lib + " " + ver + ": " + str(date))
-    return (lib, ver, date)
+    return lib, ver, date
 
 
 def build_release_date_dic(git_path, libvers):
@@ -332,7 +332,6 @@ def pull_and_update_db(cdnjs_git_path, create_csv):
 
 def update_db_from_listfile(cdnjs_git_path, listfile, create_csv):
     """Update database (without pull) for files in listfile)"""
-    paths = []
     with open(listfile) as listfileobj:
         paths = listfileobj.read().splitlines()
     files = []
diff --git a/ExtensionCrawler/config.py b/ExtensionCrawler/config.py
index 8011092..c6b7754 100644
--- a/ExtensionCrawler/config.py
+++ b/ExtensionCrawler/config.py
@@ -142,10 +142,12 @@ def const_verbose():
     """Default verbosity."""
     return True
 
+
 def const_use_process_pool():
     """Use ProcessPool (from module 'pebble') for concurrency."""
     return False
 
+
 def const_log_format():
     return '%(process)6s %(asctime)s %(levelname)8s %(message)s'
 
@@ -154,14 +156,17 @@ def const_discover():
     """Default configuration of discovery mode"""
     return False
 
+
 def const_download_ext_ids_with_forums():
     """Download extensions with forums (sequential mode)"""
     return True
 
+
 def const_download_ext_ids_without_forums():
     """Download extensions without forums (parallel mode)"""
     return True
 
+
 def const_ext_timeout():
     """Timeout for downloading an individual extension (2 hours)."""
     return 2*60*60
diff --git a/ExtensionCrawler/db.py b/ExtensionCrawler/db.py
index a54b6e0..9c1e278 100644
--- a/ExtensionCrawler/db.py
+++ b/ExtensionCrawler/db.py
@@ -15,13 +15,12 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 
-from ExtensionCrawler.config import *
-from ExtensionCrawler.util import *
-from ExtensionCrawler.crx import *
-from ExtensionCrawler.archive import *
-from ExtensionCrawler.js_decomposer import decompose_js_with_connection, DetectionType, FileClassification
+from ExtensionCrawler.config import const_mysql_config_file
+from ExtensionCrawler.crx import read_crx
+from ExtensionCrawler.js_decomposer import decompose_js_with_connection
+from ExtensionCrawler.util import log_warning, log_debug, log_exception, log_info
 
-from ExtensionCrawler.dbbackend.mysql_backend import MysqlBackend
+from ExtensionCrawler.dbbackend.mysql_backend import MysqlBackend, convert_date
 
 import re
 from bs4 import BeautifulSoup
@@ -63,7 +62,7 @@ def get_etag(ext_id, datepath, con):
             link = f.read()
             linked_date = link[3:].split("/")[0]
 
-            result = con.get_etag(ext_id, con.convert_date(linked_date))
+            result = con.get_etag(ext_id, convert_date(linked_date))
             if result is not None:
                 return result
 
@@ -166,7 +165,7 @@ def parse_and_insert_overview(ext_id, date, datepath, con):
             con.insert(
                 "extension",
                 extid=ext_id,
-                date=con.convert_date(date),
+                date=convert_date(date),
                 name=name,
                 version=version,
                 description=description,
@@ -184,12 +183,12 @@ def parse_and_insert_overview(ext_id, date, datepath, con):
                     con.insert(
                         "category",
                         extid=ext_id,
-                        date=con.convert_date(date),
+                        date=convert_date(date),
                         category_md5=hashlib.md5(category.encode()).digest(),
                         category=category)
 
 
-def parse_and_insert_crx(ext_id, date, datepath, con):
+def parse_and_insert_crx(ext_id, datepath, con):
     crx_path = next(iter(glob.glob(os.path.join(datepath, "*.crx"))), None)
     if not crx_path:
         return
@@ -314,7 +313,7 @@ def parse_and_insert_review(ext_id, date, reviewpath, con):
                     con.insert(
                         "review",
                         extid=ext_id,
-                        date=con.convert_date(date),
+                        date=convert_date(date),
                         commentdate=datetime.datetime.utcfromtimestamp(
                             get(review, "timestamp")).isoformat()
                         if "timestamp" in review else None,
@@ -345,7 +344,7 @@ def parse_and_insert_support(ext_id, date, supportpath, con):
                     con.insert(
                         "support",
                         extid=ext_id,
-                        date=con.convert_date(date),
+                        date=convert_date(date),
                         commentdate=datetime.datetime.utcfromtimestamp(
                             get(review, "timestamp")).isoformat()
                         if "timestamp" in review else None,
@@ -365,7 +364,7 @@ def parse_and_insert_replies(ext_id, date, repliespath, con):
     log_debug("- parsing reply file", 3, ext_id)
     with open(repliespath) as f:
         d = json.load(f)
-        if not "searchResults" in d:
+        if "searchResults" not in d:
             log_warning("* WARNING: there are no search results in {}".format(
                 repliespath), 3, ext_id)
             return
@@ -379,7 +378,7 @@ def parse_and_insert_replies(ext_id, date, repliespath, con):
                     con.insert(
                         "reply",
                         extid=ext_id,
-                        date=con.convert_date(date),
+                        date=convert_date(date),
                         commentdate=datetime.datetime.utcfromtimestamp(
                             get(annotation, "timestamp")).isoformat()
                         if "timestamp" in annotation else None,
@@ -413,7 +412,7 @@ def parse_and_insert_status(ext_id, date, datepath, con):
     con.insert(
         "status",
         extid=ext_id,
-        date=con.convert_date(date),
+        date=convert_date(date),
         crx_status=crx_status,
         overview_status=overview_status,
         overview_exception=overview_exception)
@@ -439,8 +438,8 @@ def update_db_incremental_with_connection(tmptardir, ext_id, date, con):
 
     if etag:
         try:
-            parse_and_insert_crx(ext_id, date, datepath, con)
-        except Exception as e:
+            parse_and_insert_crx(ext_id, datepath, con)
+        except Exception:
             log_exception("Exception when parsing crx", 3, ext_id)
     else:
         crx_status = get_crx_status(datepath)
@@ -449,40 +448,40 @@ def update_db_incremental_with_connection(tmptardir, ext_id, date, con):
 
     try:
         parse_and_insert_overview(ext_id, date, datepath, con)
-    except Exception as e:
+    except Exception:
         log_exception("Exception when parsing overview", 3, ext_id)
 
     try:
         parse_and_insert_status(ext_id, date, datepath, con)
-    except Exception as e:
+    except Exception:
         log_exception("Exception when parsing status", 3, ext_id)
 
     reviewpaths = glob.glob(os.path.join(datepath, "reviews*-*.text"))
     for reviewpath in reviewpaths:
         try:
             parse_and_insert_review(ext_id, date, reviewpath, con)
-        except json.decoder.JSONDecodeError as e:
+        except json.decoder.JSONDecodeError:
             log_warning("- WARNING: Review is not a proper json file!", 3,
                         ext_id)
-        except Exception as e:
+        except Exception:
             log_exception("Exception when parsing review", 3, ext_id)
 
     supportpaths = glob.glob(os.path.join(datepath, "support*-*.text"))
     for supportpath in supportpaths:
         try:
             parse_and_insert_support(ext_id, date, supportpath, con)
-        except json.decoder.JSONDecodeError as e:
+        except json.decoder.JSONDecodeError:
             log_warning("- WARNING: Support is not a proper json file!", 3,
                         ext_id)
-        except Exception as e:
+        except Exception:
             log_exception("Exception when parsing support", 3, ext_id)
 
     repliespaths = glob.glob(os.path.join(datepath, "*replies.text"))
     for repliespath in repliespaths:
         try:
             parse_and_insert_replies(ext_id, date, repliespath, con)
-        except json.decoder.JSONDecodeError as e:
+        except json.decoder.JSONDecodeError:
             log_warning("- WARNING: Reply is not a proper json file!", 3,
                         ext_id)
-        except Exception as e:
+        except Exception:
             log_exception("Exception when parsing reply", 3, ext_id)
diff --git a/ExtensionCrawler/dbbackend/mysql_backend.py b/ExtensionCrawler/dbbackend/mysql_backend.py
index 5a55c4f..fb0b39a 100644
--- a/ExtensionCrawler/dbbackend/mysql_backend.py
+++ b/ExtensionCrawler/dbbackend/mysql_backend.py
@@ -18,18 +18,17 @@
 import time
 import datetime
 from random import uniform
-from itertools import starmap
-import logging
 
 import MySQLdb
 import _mysql_exceptions
 
 import ExtensionCrawler.config as config
-from ExtensionCrawler.util import log_info, log_error, log_exception, log_warning
+from ExtensionCrawler.util import log_info, log_error, log_warning
 
 
 class MysqlBackend:
-    def __init__(self, ext_id, try_wait=config.const_mysql_try_wait(), maxtries=config.const_mysql_maxtries(), **kwargs):
+    def __init__(self, ext_id, try_wait=config.const_mysql_try_wait(), maxtries=config.const_mysql_maxtries(),
+                 **kwargs):
         self.ext_id = ext_id
         self.dbargs = kwargs
         self.try_wait = try_wait
@@ -147,5 +146,6 @@ class MysqlBackend:
         result = self.retry(lambda: self.cursor.fetchone())
         return result
 
-    def convert_date(self, date):
-        return date[:-6]
+
+def convert_date(date):
+    return date[:-6]
diff --git a/ExtensionCrawler/discover.py b/ExtensionCrawler/discover.py
index af51ba6..3ef9e86 100644
--- a/ExtensionCrawler/discover.py
+++ b/ExtensionCrawler/discover.py
@@ -17,12 +17,10 @@
 """Python mnodule providing methods for discovering extensions in the
    Chrome extension store."""
 
-import xml.etree.ElementTree as ET
+from xml.etree.ElementTree import fromstring
 import re
-from functools import reduce
 import requests
 from ExtensionCrawler import config
-from ExtensionCrawler.util import log_info, log_exception
 
 
 def crawl_nearly_all_of_ext_ids():
@@ -30,7 +28,7 @@ def crawl_nearly_all_of_ext_ids():
 
     def get_inner_elems(doc):
         """Get inner element."""
-        return ET.fromstring(doc).iterfind(r".//{{{}}}loc".format(
+        return fromstring(doc).iterfind(r".//{{{}}}loc".format(
             config.const_sitemap_scheme()))
 
     def is_generic_url(url):
diff --git a/ExtensionCrawler/file_identifiers.py b/ExtensionCrawler/file_identifiers.py
index c12a5b8..31e0262 100644
--- a/ExtensionCrawler/file_identifiers.py
+++ b/ExtensionCrawler/file_identifiers.py
@@ -30,12 +30,14 @@ import magic
 
 from ExtensionCrawler.js_mincer import mince_js
 
+
 def is_binary_resource(mimetype_magic):
     return (mimetype_magic.startswith("image/") or
             mimetype_magic.startswith("video/") or
             mimetype_magic.startswith("audio/") or
             mimetype_magic == "application/pdf")
 
+
 def normalize_jsdata(str_data):
     """Compute normalized code blocks of a JavaScript file"""
     txt = ""
@@ -59,9 +61,8 @@ def get_features(s):
 
 def get_simhash(encoding, data):
     """Compute simhash of text."""
-    str_data = ""
-    if not encoding is None:
-        str_data = data.decode(encoding=encoding,errors="replace")
+    if encoding is not None:
+        str_data = data.decode(encoding=encoding, errors="replace")
     else:
         str_data = str(data)
     simhash = Simhash(get_features(str_data)).value
@@ -82,31 +83,30 @@ def compute_difference(hx, hy):
 def get_data_identifiers(data):
     """Get basic data identifiers (size, hashes, normalized hashes, etc.)."""
 
-    data_identifier = {}
-
-    data_identifier['encoding'] = None
-    data_identifier['description'] = None
-    data_identifier['size'] = None
-    data_identifier['loc'] = None
-    data_identifier['mimetype_magic'] = None
-    data_identifier['md5'] = None
-    data_identifier['sha1'] = None
-    data_identifier['sha256'] = None
-    data_identifier['simhash'] = None
-    data_identifier['size_stripped'] = None
-    data_identifier['normalized_encoding'] = None
-    data_identifier['normalized_description'] = None
-    data_identifier['normalized_size'] = None
-    data_identifier['normalized_loc'] = None
-    data_identifier['normalized_mimetype_magic'] = None
-    data_identifier['normalized_md5'] = None
-    data_identifier['normalized_sha1'] = None
-    data_identifier['normalized_sha256'] = None
-    data_identifier['normalized_simhash'] = None
+    data_identifier = {
+        'encoding': None,
+        'description': None,
+        'size': None,
+        'loc': None,
+        'mimetype_magic': None,
+        'md5': None,
+        'sha1': None,
+        'sha256': None,
+        'simhash': None,
+        'size_stripped': None,
+        'normalized_encoding': None,
+        'normalized_description': None,
+        'normalized_size': None,
+        'normalized_loc': None,
+        'normalized_mimetype_magic': None,
+        'normalized_md5': None,
+        'normalized_sha1': None,
+        'normalized_sha256': None,
+        'normalized_simhash': None
+    }
 
     mimetype_magic = magic.from_buffer(data, mime=True)
 
-    magic_desc = ""
     try:
         magic_desc = magic.from_buffer(data)
     except magic.MagicException as exp:
@@ -137,9 +137,10 @@ def get_data_identifiers(data):
     data_identifier['encoding'] = encoding
     try:
         normalized_data, normalized_loc = normalize_jsdata(
-            data.decode(encoding=data_identifier['encoding'],errors="replace"))
+            data.decode(encoding=data_identifier['encoding'], errors="replace"))
     except Exception:
         normalized_data = None
+        normalized_loc = 0
 
     if normalized_data is not None:
         normalized_magic_desc = ""
@@ -149,7 +150,7 @@ def get_data_identifiers(data):
             rgx = re.compile(r' name use count.*$')
             msg = str(exp.message)
             if re.search(rgx, msg):
-                magic_desc = re.sub(rgx, '', msg)
+                normalized_magic_desc = re.sub(rgx, '', msg)
             else:
                 raise exp
         normalized_encoding = chardet.detect(normalized_data)['encoding']
diff --git a/ExtensionCrawler/js_decomposer.py b/ExtensionCrawler/js_decomposer.py
index 7292874..62e5b2e 100644
--- a/ExtensionCrawler/js_decomposer.py
+++ b/ExtensionCrawler/js_decomposer.py
@@ -18,12 +18,10 @@
    general and Chrome extensions in particular."""
 
 import os
-import io
 from io import StringIO
 import re
 import json
 import zlib
-import logging
 from enum import Enum
 from ExtensionCrawler.js_mincer import mince_js
 from ExtensionCrawler.file_identifiers import get_file_identifiers, is_binary_resource
@@ -107,15 +105,15 @@ def unknown_lib_identifiers():
         re.compile(
             r'[\/|\/\/|\s]\*?\s?([a-zA-Z0-9\.]+)\sv?([0-9][\.|\-|\_][0-9.a-z_\\\\-]+)',
             re.IGNORECASE
-        ),  #MatchType: name version, e.g. mylib v1.2.9b or mylib.anything 1.2.8
+        ),  # MatchType: name version, e.g. mylib v1.2.9b or mylib.anything 1.2.8
         re.compile(
             r'[\/|\/\/|\s]\*?\s?([a-zA-Z0-9\.]+)\s(?: version)\:?\s?v?([0-9][0-9.a-z_\\\\-]+)',
             re.IGNORECASE
-        ),  #MatchType: name version: ver, e.g. mylib version: v1.2.9, or mylib.js version 1.2.8
+        ),  # MatchType: name version: ver, e.g. mylib version: v1.2.9, or mylib.js version 1.2.8
         re.compile(
             r'\@*(version)\s?[\:|-]?\s?v?([0-9][\.|\-|\_][0-9.a-z_\\\\-]+)',
             re.IGNORECASE
-        ),  #MatchType: version x.x.x, e.g. @version: 1.2.5 or version - 1.2.5 etc.
+        ),  # MatchType: version x.x.x, e.g. @version: 1.2.5 or version - 1.2.5 etc.
         re.compile(
             r'(version)[\:|\=]\s?.?([0-9]{1,2}[\.|\-|\_][0-9.a-z_\\\\-]+).?',
             re.IGNORECASE),
@@ -188,13 +186,9 @@ def check_md5_decompressed(con, file_info):
     """Check for known md5 hash (decompressed file content)."""
     if con is None:
         return file_info
-    if file_info['dec_md5'] is None:
-        return file_info
-    else:
+    if file_info['dec_md5'] is not None:
         libver = con.get_cdnjs_info(file_info['dec_md5'])
-        if libver is None:
-            return file_info
-        else:
+        if libver is not None:
             file_info['lib'] = libver[0]
             file_info['version'] = libver[1]
             file_info['lib_filename'] = libver[2]
@@ -203,7 +197,6 @@ def check_md5_decompressed(con, file_info):
             else:
                 file_info['type'] = FileClassification.LIBRARY
             file_info['detectionMethod'] = DetectionType.MD5_DECOMPRESSED
-            return file_info
     return file_info
 
 
@@ -361,7 +354,7 @@ def analyse_comment_known_libs(zipfile, js_file, js_info, comment):
     else:
         filename = js_file
     for lib, regex in load_lib_identifiers().items():
-        if ('filecontent' in regex):
+        if 'filecontent' in regex:
             for unkregex in regex['filecontent']:
                 unkown_lib_matched = unkregex.finditer(comment.content)
                 for match in unkown_lib_matched:
@@ -481,13 +474,14 @@ def decompose_js_with_connection(path_or_zipfileobj, con):
                 try:
                     str_data = data.decode(file_info['encoding'])
                 except Exception:
-                    log_info("Exception during data decoding for entry " +
-                                 file_info['filename'], 3)
+                    log_info("Exception during data decoding for entry " + file_info['filename'], 3)
                     str_data = ''
             else:
                 str_data = ''
 
             info_data_blocks = check_data_blocks(file_info, str_data)
+        else:
+            info_data_blocks = None
 
         if info_data_blocks:
             inventory = inventory + merge_filename_and_data_info(
diff --git a/ExtensionCrawler/js_mincer.py b/ExtensionCrawler/js_mincer.py
index e93331e..12825cc 100644
--- a/ExtensionCrawler/js_mincer.py
+++ b/ExtensionCrawler/js_mincer.py
@@ -198,8 +198,8 @@ def mince_js_fileobj(fileobj):
                         except StopIteration:
                             pass
 
-        if ((is_comment(state) and is_code_or_string_literal(suc_state)) or
-            (is_code_or_string_literal(state) and is_comment(suc_state))):
+        if ((is_comment(state) and is_code_or_string_literal(suc_state)) or (
+                is_code_or_string_literal(state) and is_comment(suc_state))):
             if content.strip():
                 yield (JsBlock(state, (block_start_line, block_start_cpos),
                                (line, cpos), content, string_literals))
diff --git a/cdnjs-git-miner b/cdnjs-git-miner
index ea139e2..47e5534 100755
--- a/cdnjs-git-miner
+++ b/cdnjs-git-miner
@@ -107,7 +107,7 @@ def main(argv):
         logging.info("Starting update of new db libs")
         pull_and_update_db(cdnjs_git_path, csv)
         logging.info("Finished update of new db libs")
-    if not listfile is None:
+    if listfile is not None:
         logging.info("Starting update from list file")
         update_db_from_listfile(cdnjs_git_path, listfile, csv)
         logging.info("Finished update from list file")
diff --git a/crawler b/crawler
index fc23c20..3a39faf 100755
--- a/crawler
+++ b/crawler
@@ -19,7 +19,6 @@
 A crawler for extensions from the Chrome Web Store.
 """
 
-import os
 import sys
 import datetime
 import time
@@ -141,7 +140,7 @@ def log_summary(res, runtime=0):
     log_info("    Total runtime:            {}".format(
         str(datetime.timedelta(seconds=int(runtime)))))
 
-    if corrupt_tar_archives != []:
+    if corrupt_tar_archives:
         log_info("")
         log_info("List of extensions with corrupted files/archives:")
         list(
@@ -229,7 +228,8 @@ def parse_args(argv):
             max_discover = int(arg)
         elif opt == '--pystuck':
             start_pystuck = True
-    return basedir, parallel, verbose, discover, max_discover, download_ext_ids_with_forums, download_ext_ids_without_forums, ext_timeout, start_pystuck
+    return [basedir, parallel, verbose, discover, max_discover, download_ext_ids_with_forums,
+            download_ext_ids_without_forums, ext_timeout, start_pystuck]
 
 
 def main(argv):
@@ -242,8 +242,8 @@ def main(argv):
     multiprocessing.set_start_method("forkserver")
 
     today = datetime.datetime.now(datetime.timezone.utc).isoformat()
-    basedir, parallel, verbose, discover, max_discover, download_ext_ids_with_forums, download_ext_ids_without_forums, ext_timeout, start_pystuck = parse_args(
-        argv)
+    [basedir, parallel, verbose, discover, max_discover, download_ext_ids_with_forums, download_ext_ids_without_forums,
+     ext_timeout, start_pystuck] = parse_args(argv)
 
     setup_logger(verbose)
 
@@ -304,7 +304,7 @@ def main(argv):
     # We re-try (once) the extensions with unknown exceptions, as
     # they are often temporary
     has_exception = list(filter(lambda x: x.has_exception(), res))
-    if has_exception != []:
+    if has_exception:
         log_info(
             "  {} extensions with unknown exceptions, start another try ...".
             format(str(len(has_exception))))
@@ -318,7 +318,7 @@ def main(argv):
         res = list(set(res) - set(has_exception)) + res_update
 
     end_time = time.time()
-    log_summary(res, end_time - start_time)
+    log_summary(res, int(end_time - start_time))
     log_failures_to_file(log_dir, today, res)
 
 
diff --git a/create-db b/create-db
index 3d9e88e..df993dc 100755
--- a/create-db
+++ b/create-db
@@ -17,7 +17,6 @@
 #
 
 import getopt
-import os
 import sys
 import tarfile
 import time
@@ -30,12 +29,12 @@ import datetime
 
 from ExtensionCrawler.archive import update_db_incremental
 from ExtensionCrawler.config import *
-from ExtensionCrawler.util import log_info, log_warning, log_error, log_exception
+from ExtensionCrawler.util import log_info, log_exception
 
 from ExtensionCrawler.dbbackend.mysql_backend import MysqlBackend
 
 
-def help():
+def print_help():
     print("""create-db [OPTION]""")
     print("""  -h                  print this help text""")
     print("""  -a <DIR>            archive directory""")
@@ -122,11 +121,11 @@ def parse_args(argv):
             "maxtaskid=", "from-date=", "until-date=", "help"
         ])
     except getopt.GetoptError:
-        help()
+        print_help()
         sys.exit(2)
     for opt, arg in opts:
         if opt in ("-h", "--help"):
-            help()
+            print_help()
             sys.exit()
         elif opt in ("-a", "--archive"):
             archive = arg
@@ -140,12 +139,12 @@ def parse_args(argv):
             taskid = int(arg)
         elif opt in ("-N", "--maxtaskid"):
             maxtaskid = int(arg)
-        elif opt in ("--from-date"):
+        elif opt == "--from-date":
             from_date = arg
-        elif opt in ("--until-date"):
+        elif opt == "--until-date":
             until_date = arg
 
-    if paths == []:
+    if not paths:
         paths = list(find(archive, "*"))
 
     chunksize = int(len(paths) / maxtaskid)
diff --git a/crx-extract b/crx-extract
index 781fd99..3b1ea23 100755
--- a/crx-extract
+++ b/crx-extract
@@ -58,7 +58,6 @@ def main(argv):
     basedir = const_basedir()
     verbose = True
     date = None
-    extid = ""
     useetag = False
     output = ""
     winfs = False
diff --git a/crx-jsinventory b/crx-jsinventory
index 1cdce60..f7eab69 100755
--- a/crx-jsinventory
+++ b/crx-jsinventory
@@ -44,7 +44,6 @@ def main(argv):
     """Main function of the extension crawler."""
     verbose = False
     silent = False
-    filename = None
     csvfile = None
     database = True
     try:
diff --git a/crx-jsstrings b/crx-jsstrings
index 45cc7b2..d6a4b49 100755
--- a/crx-jsstrings
+++ b/crx-jsstrings
@@ -39,7 +39,7 @@ import jsbeautifier
 
 from ExtensionCrawler.config import (const_log_format, const_basedir)
 from ExtensionCrawler.archive import last_crx, first_crx, all_crx
-from ExtensionCrawler.config import (archive_file, get_local_archive_dir)
+from ExtensionCrawler.config import get_local_archive_dir
 from ExtensionCrawler.js_decomposer import init_file_info
 from ExtensionCrawler.js_mincer import mince_js
 
@@ -54,7 +54,7 @@ def is_file_with_c_style_comments(filename):
 
 def jsstrings_data(conf, path, data):
     """Analyze data in memory."""
-    if not conf.file_pattern is None:
+    if conf.file_pattern is not None:
         if path is None:
             return False
         elif not fnmatch.fnmatch(path, conf.file_pattern):
@@ -98,7 +98,7 @@ def jsstrings_data(conf, path, data):
             if analyze_block(conf, path, block, first):
                 match = True
                 first = False
-    if match and conf.output_decoration > 0 and conf.output_decoration < 2:
+    if match and 0 < conf.output_decoration < 2:
         print(path)
     return match
 
@@ -112,6 +112,7 @@ def print_block(conf,
     if conf.output_decoration > 1:
         line_no = block.start[0]
         prefix = " " * (block.start[1] - 1)
+        # TODO: use classifier
         classifier = "X"
         sep = "=" * (len(path) + 17)
         if not first:
@@ -129,10 +130,10 @@ def print_block(conf,
                     path, loc, block.string_literals[0].rstrip())
                 print(line)
             else:
-                for (pos, str) in block.string_literals:
+                for (pos, string) in block.string_literals:
                     loc = '({0[0]:d}/{0[1]:d})'.format(pos)
                     loc = (' ' * (11 - len(loc))) + loc
-                    line = '{0} {1} [L]: {2}'.format(path, loc, str.rstrip())
+                    line = '{0} {1} [L]: {2}'.format(path, loc, string.rstrip())
                     print(line)
             if code_match:
                 print("-" * (len(path) + 17))
@@ -151,7 +152,7 @@ def analyze_block(conf, path, block, first=False):
     """Print code/comment blocks."""
     match = False
     regexps = []
-    if not conf.reg_exp is None:
+    if conf.reg_exp is not None:
         for regexp in conf.reg_exp:
             if conf.case_insensitive:
                 regexps.append(re.compile(r'(' + regexp + ')', re.IGNORECASE))
@@ -159,7 +160,7 @@ def analyze_block(conf, path, block, first=False):
                 regexps.append(re.compile(r'(' + regexp + ')'))
     if block.is_comment():
         content = block.content
-        if not conf.reg_exp_comments is None:
+        if conf.reg_exp_comments is not None:
             for regexp in conf.reg_exp_comments:
                 if conf.case_insensitive:
                     regexps.append(
@@ -179,14 +180,14 @@ def analyze_block(conf, path, block, first=False):
         content = block.content
         regexps_string = regexps.copy()
         regexps_code = regexps.copy()
-        if not conf.reg_exp_string_literals is None:
+        if conf.reg_exp_string_literals is not None:
             for regexp in conf.reg_exp_string_literals:
                 if conf.case_insensitive:
                     regexps.append(
                         re.compile(r'(' + regexp + ')', re.IGNORECASE))
                 else:
                     regexps.append(re.compile(r'(' + regexp + ')'))
-        if not conf.reg_exp_source is None:
+        if conf.reg_exp_source is not None:
             for regexp in conf.reg_exp_source:
                 if conf.case_insensitive:
                     regexps.append(
@@ -222,9 +223,9 @@ def analyze_block(conf, path, block, first=False):
                             match_idxs.add(idx)
                         string_match = True
             block.string_literals = []
-            for idx, str in enumerate(string_literals):
+            for idx, string in enumerate(string_literals):
                 if idx in match_idxs:
-                    block.string_literals.append(str)
+                    block.string_literals.append(string)
 
         code_match = False
         for regexp in regexps_code:
@@ -259,9 +260,6 @@ def analyze_crx(conf, crx, path=""):
 
 def analyze_tar(conf, tarfilename):
     last_crx_file = ''
-    #   from_date
-    #    latest_date
-    match = False
     extid = os.path.splitext(os.path.basename(tarfilename))[0]
     from_dateobj = None
     latest_dateobj = None
@@ -303,7 +301,7 @@ def analyze_tar(conf, tarfilename):
             # both dates are given
             all_crx_files = all_crx(
                 os.path.join(conf.archive_dir, "data"), extid)
-            if all_crx_files == []:
+            if not all_crx_files:
                 logging.warning("No crx in  " + extid)
             else:
                 with tarfile.open(tarfilename, 'r') as archive:
@@ -354,14 +352,13 @@ def analyze_task(conf, task):
     """Analyze one file/tar/crx/extid."""
     logging.debug("Analyzing " + task)
     extid_re = re.compile('^[a-p]+$')
-    retval = False
     if task.endswith('.crx'):
         retval = analyze_crx(conf, task)
     elif task.endswith('.tar'):
         retval = analyze_tar(conf, task)
     elif extid_re.match(task):
-        tarfile = "data/" + get_local_archive_dir(task) + "/" + task + '.tar'
-        retval = analyze_tar(conf, conf.archive_dir + "/" + tarfile)
+        tarfilename = "data/" + get_local_archive_dir(task) + "/" + task + '.tar'
+        retval = analyze_tar(conf, conf.archive_dir + "/" + tarfilename)
     else:
         retval = analyze_file(conf, task)
     return retval
diff --git a/extfind b/extfind
index 9fb20c0..a76324d 100755
--- a/extfind
+++ b/extfind
@@ -21,10 +21,12 @@ import glob
 import os
 import sys
 import logging
+import re
 
 from ExtensionCrawler import config
 
-def help():
+
+def print_help():
     print("""extfind [OPTION]""")
     print("""  -h                  print this help text""")
     print("""  -a <DIR>            archive directory""")
@@ -51,7 +53,7 @@ def iter_extension_paths_from_file(archive, n, N, extidlistfile):
             if re.fullmatch("[a-p]{32}", line) and os.path.exists(path):
                 paths += [path]
             else:
-                logging.warn("WARNING: {} is not a valid extension path!".format(path))
+                logging.warning("WARNING: {} is not a valid extension path!".format(path))
     return split(paths, n, N)
 
 
@@ -67,19 +69,17 @@ def main(argv):
     taskid = 1
     maxtaskid = 1
 
-    paths = []
-
     try:
         opts, args = getopt.getopt(argv, "ha:g:e:n:N:", [
             "archive=", "glob=", "extidlistfile=", "taskid=",
             "maxtaskid=", "help"
         ])
     except getopt.GetoptError:
-        help()
+        print_help()
         sys.exit(2)
     for opt, arg in opts:
         if opt in ("-h", "--help"):
-            help()
+            print_help()
             sys.exit()
         elif opt in ("-a", "--archive"):
             archive = arg
@@ -99,11 +99,12 @@ def main(argv):
     elif extidglob is not None and extidlistfile is None:
         paths = iter_extension_paths(archive, taskid, maxtaskid, extidglob)
     else:
-        help()
+        print_help()
         sys.exit(2)
 
     for path in paths:
         print(path)
 
+
 if __name__ == "__main__":
     main(sys.argv[1:])
diff --git a/requirements.txt b/requirements.txt
index afb0a2d..06020a9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,10 @@
+colorama==0.3.9
+pystuck==0.8.5
 simhash==1.8.0
 tabulate==0.7.7
 setuptools==36.2.7
 cchardet==2.1.1
-mysqlclient==1.3.10
+mysqlclient==1.3.12
 requests==2.18.1
 pycrypto==2.6.1
 beautifulsoup4==4.6.0
diff --git a/setup.py b/setup.py
index 3f4e224..2eb52fd 100644
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,12 @@
 from setuptools import setup
 
+with open('requirements.txt') as f:
+    requirements = f.read().splitlines()
+
 setup(
     name='Extension Crawler',
     description='A collection of utilities for downloading and analyzing browser extension from the Chrome Web store.',
     author='Achim D. Brucker, Michael Herzberg',
     license='GPL 3.0',
-    install_requires=['GitPython', 'pebble', 'simhash', 'colorama', 'python_magic', 'tabulate', 'requests', 'pycrypto', 'beautifulsoup4', 'python_dateutil', 'mysqlclient', 'cchardet', 'jsbeautifier', 'pystuck']
+    install_requires=requirements
 )