Reformatting.

This commit is contained in:
Achim D. Brucker 2017-09-13 00:02:20 +01:00
parent ea9339bc53
commit 88efe2b8a4
1 changed files with 9 additions and 6 deletions

View File

@ -21,8 +21,8 @@
import hashlib import hashlib
import mimetypes import mimetypes
import os import os
from functools import reduce
import zlib import zlib
from functools import reduce
from io import StringIO from io import StringIO
import cchardet as chardet import cchardet as chardet
@ -66,6 +66,7 @@ def normalize_jsdata(str_data):
txt += line.strip() txt += line.strip()
return txt.encode() return txt.encode()
def get_data_identifiers(data): def get_data_identifiers(data):
"""Get basic data identifiers (size, hashes, normalized hashes, etc.).""" """Get basic data identifiers (size, hashes, normalized hashes, etc.)."""
data_identifier = { data_identifier = {
@ -77,7 +78,8 @@ def get_data_identifiers(data):
'encoding': chardet.detect(data)['encoding'], 'encoding': chardet.detect(data)['encoding'],
} }
try: try:
normalized_data = normalize_jsdata(data.decode(data_identifier['encoding'])) normalized_data = normalize_jsdata(
data.decode(data_identifier['encoding']))
except Exception: except Exception:
normalized_data = None normalized_data = None
@ -94,16 +96,17 @@ def get_data_identifiers(data):
normalized_data).digest() normalized_data).digest()
return data_identifier return data_identifier
def get_file_identifiers(path): def get_file_identifiers(path):
"""Get basic file identifiers (path, filename, etc.) and data identifiers.""" """Get basic file identifiers (path, filename, etc.) and data identifiers."""
with open(path, 'rb') as fileobj: with open(path, 'rb') as fileobj:
data = fileobj.read() data = fileobj.read()
data_identifier = get_data_identifiers(data) data_identifier = get_data_identifiers(data)
if data_identifier['description'].startswith('gzip'): if data_identifier['description'].startswith('gzip'):
with zlib.decompressobj(zlib.MAX_WBITS | 16) as dec: with zlib.decompressobj(zlib.MAX_WBITS | 16) as dec:
dec_data = dec.decompress(data, 30*data_identifier['size']) dec_data = dec.decompress(data, 30 * data_identifier['size'])
dec_data_identifier = get_data_identifiers(dec_data) dec_data_identifier = get_data_identifiers(dec_data)
else: else:
dec_data_identifier = { dec_data_identifier = {
@ -119,7 +122,7 @@ def get_file_identifiers(path):
} }
data = None data = None
dec_data = None dec_data = None
file_identifier = { file_identifier = {
'filename': os.path.basename(path), 'filename': os.path.basename(path),
'path': path, 'path': path,
@ -150,7 +153,7 @@ def get_file_identifiers(path):
def path_to_list(path): def path_to_list(path):
"""Convert a path (string) to a list of folders/files.""" """Convert a path (string) to a list of folders/files."""
plist = [] plist = []
while (True): while True:
(head, tail) = os.path.split(path) (head, tail) = os.path.split(path)
if head == '': if head == '':
if tail == '': if tail == '':