Browse Source

Refactoring: Moved default configuration to config module.

production
Achim D. Brucker 5 years ago
parent
commit
eb0054b47d
  1. 16
      ExtensionCrawler/config.py
  2. 9
      crawler

16
ExtensionCrawler/config.py

@ -130,7 +130,21 @@ def db_file(archivedir, ext_id):
return os.path.join(archivedir,
get_local_archive_dir(ext_id), ext_id + ".sqlite")
def jsloc_timeout():
"""Maximum number of seconds for counting jsloc per extension."""
return 600
def const_basedir():
"""Top-level directory for the extension crawler archive."""
return "archive"
def const_parallel_downloads():
"""Number of parallel downloads."""
return 36
def const_verbose():
"""Default verbosity."""
return True
def const_discover():
"""Default configuration of discovery mode"""
return False

9
crawler

@ -29,6 +29,7 @@ from functools import reduce
from ExtensionCrawler.discover import get_new_ids
from ExtensionCrawler.archive import get_forum_ext_ids, get_existing_ids, update_extensions
from ExtensionCrawler.util import log
import ExtensionCrawler.config
# Script should run with python 3.4 or 3.5
assert sys.version_info >= (3, 4) and sys.version_info < (3, 6)
@ -168,10 +169,10 @@ def print_config(verbose, basedir, archive_dir, conf_dir, discover, parallel):
def parse_args(argv):
"""Parse command line arguments. """
basedir = "archive"
parallel = 24
verbose = True
discover = False
basedir = ExtensionCrawler.config.const_basedir()
parallel = ExtensionCrawler.config.const_parallel_downloads()
verbose = ExtensionCrawler.config.const_verbose()
discover = ExtensionCrawler.config.const_discover()
try:
opts, _ = getopt.getopt(argv, "hsda:p:", ["archive=", 'parallel='])
except getopt.GetoptError:

Loading…
Cancel
Save