Refactoring.
This commit is contained in:
parent
920bc74838
commit
486b967d2d
|
@ -14,7 +14,6 @@
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
"""Python analys providing a decomposition analysis of JavaScript code in
|
"""Python analys providing a decomposition analysis of JavaScript code in
|
||||||
general and Chrome extensions in particular."""
|
general and Chrome extensions in particular."""
|
||||||
|
|
||||||
|
@ -24,6 +23,7 @@ import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
|
|
||||||
def lib_identifiers():
|
def lib_identifiers():
|
||||||
"""Initialize identifiers for known libraries from JSON file."""
|
"""Initialize identifiers for known libraries from JSON file."""
|
||||||
regex_file = os.path.join(
|
regex_file = os.path.join(
|
||||||
|
@ -33,12 +33,14 @@ def lib_identifiers():
|
||||||
json_content = json_file.read()
|
json_content = json_file.read()
|
||||||
return json.loads(json_content)
|
return json.loads(json_content)
|
||||||
|
|
||||||
|
|
||||||
def unknown_filename_identifier():
|
def unknown_filename_identifier():
|
||||||
"""Identifier for extracting version information from unknown/generic file names."""
|
"""Identifier for extracting version information from unknown/generic file names."""
|
||||||
return re.compile(
|
return re.compile(
|
||||||
r'(.+)[\-\_]([0-9]{1,2}[\.|\-|\_][0-9a-z]{1,2}[\.|\-|\_][0-9a-z\-\_]*)',
|
r'(.+)[\-\_]([0-9]{1,2}[\.|\-|\_][0-9a-z]{1,2}[\.|\-|\_][0-9a-z\-\_]*)',
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
def lib_isin_list(lib, ver, lib_list):
|
def lib_isin_list(lib, ver, lib_list):
|
||||||
"""Check if a specific library/version has already been detected."""
|
"""Check if a specific library/version has already been detected."""
|
||||||
for item in lib_list:
|
for item in lib_list:
|
||||||
|
@ -47,6 +49,7 @@ def lib_isin_list(lib, ver, lib_list):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def unknown_lib_identifiers():
|
def unknown_lib_identifiers():
|
||||||
"""List of identifiers for generic library version headers."""
|
"""List of identifiers for generic library version headers."""
|
||||||
return ([
|
return ([
|
||||||
|
@ -69,13 +72,14 @@ def unknown_lib_identifiers():
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
def detectLibraries(zipfile):
|
def detectLibraries(zipfile):
|
||||||
"""JavaScript decomposition analysis for extensions."""
|
"""JavaScript decomposition analysis for extensions."""
|
||||||
detection_type = Enum("DetectionType",
|
detection_type = Enum("DetectionType",
|
||||||
'FILENAME FILECONTENT FILENAME_FILECONTENT URL HASH')
|
'FILENAME FILECONTENT FILENAME_FILECONTENT URL HASH')
|
||||||
known_libs = []
|
known_libs = []
|
||||||
unkown_libs = []
|
unkown_libs = []
|
||||||
identifiedApplicationsList = []
|
app_js = []
|
||||||
|
|
||||||
js_files = list(
|
js_files = list(
|
||||||
filter(lambda x: x.filename.endswith(".js"), zipfile.infolist()))
|
filter(lambda x: x.filename.endswith(".js"), zipfile.infolist()))
|
||||||
|
@ -86,15 +90,16 @@ def detectLibraries(zipfile):
|
||||||
with zipfile.open(js_file) as js_file_obj:
|
with zipfile.open(js_file) as js_file_obj:
|
||||||
data = js_file_obj.read()
|
data = js_file_obj.read()
|
||||||
|
|
||||||
js_info = {'lib': None,
|
js_info = {
|
||||||
'ver': None,
|
'lib': None,
|
||||||
'detectMethod': None,
|
'ver': None,
|
||||||
'type': None,
|
'detectMethod': None,
|
||||||
'jsFilename': os.path.basename(js_file.filename),
|
'type': None,
|
||||||
'md5': hashlib.md5(data).hexdigest(),
|
'jsFilename': os.path.basename(js_file.filename),
|
||||||
'size': int(js_file.file_size),
|
'md5': hashlib.md5(data).hexdigest(),
|
||||||
'path': js_file.filename
|
'size': int(js_file.file_size),
|
||||||
}
|
'path': js_file.filename
|
||||||
|
}
|
||||||
|
|
||||||
lib_identified = False
|
lib_identified = False
|
||||||
|
|
||||||
|
@ -104,13 +109,14 @@ def detectLibraries(zipfile):
|
||||||
#if it matches to one of the defined filename regex, store in the dict
|
#if it matches to one of the defined filename regex, store in the dict
|
||||||
#check if there is a filename regex exists for this lib
|
#check if there is a filename regex exists for this lib
|
||||||
if 'filename' in regex:
|
if 'filename' in regex:
|
||||||
filenameMatched = re.search(regex['filename'],
|
filename_matched = re.search(regex['filename'],
|
||||||
js_file.filename, re.IGNORECASE)
|
js_file.filename, re.IGNORECASE)
|
||||||
|
|
||||||
if filenameMatched:
|
if filename_matched:
|
||||||
#check whether this lib has already been identified in the dict, otherwise store the libname and version from the filename
|
# check whether this lib has already been identified in the dict,
|
||||||
|
# otherwise store the libname and version from the filename
|
||||||
js_info['lib'] = lib
|
js_info['lib'] = lib
|
||||||
js_info['ver'] = filenameMatched.group(2)
|
js_info['ver'] = filename_matched.group(2)
|
||||||
js_info['type'] = "library"
|
js_info['type'] = "library"
|
||||||
js_info['detectMethod'] = detection_type.FILENAME.name
|
js_info['detectMethod'] = detection_type.FILENAME.name
|
||||||
known_libs.append(js_info)
|
known_libs.append(js_info)
|
||||||
|
@ -121,32 +127,34 @@ def detectLibraries(zipfile):
|
||||||
#check if there is filecontent regex exists for this lib
|
#check if there is filecontent regex exists for this lib
|
||||||
if 'filecontent' in regex:
|
if 'filecontent' in regex:
|
||||||
#iterate over the filecontent regexes for this to see if it has a match
|
#iterate over the filecontent regexes for this to see if it has a match
|
||||||
for aFilecontent in regex['filecontent']:
|
for file_content in regex['filecontent']:
|
||||||
libraryMatched = re.search(aFilecontent.encode(), data,
|
lib_matched = re.search(file_content.encode(), data,
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
if libraryMatched:
|
if lib_matched:
|
||||||
ver = libraryMatched.group(2).decode()
|
ver = lib_matched.group(2).decode()
|
||||||
if (not lib_isin_list(
|
if not lib_isin_list(lib, ver, known_libs):
|
||||||
lib, ver, known_libs)):
|
|
||||||
js_info['lib'] = lib
|
js_info['lib'] = lib
|
||||||
js_info['ver'] = ver
|
js_info['ver'] = ver
|
||||||
js_info['type'] = "library"
|
js_info['type'] = "library"
|
||||||
js_info['detectMethod'] = detection_type.FILECONTENT.name
|
js_info[
|
||||||
known_libs.append(js_info)
|
'detectMethod'] = detection_type.FILECONTENT.name
|
||||||
|
known_libs.append(js_info)
|
||||||
|
|
||||||
lib_identified = True
|
lib_identified = True
|
||||||
is_app_js = False
|
is_app_js = False
|
||||||
break
|
break
|
||||||
#do not need to check the other regex for this library - since its already found
|
# do not need to check the other regex for this library,
|
||||||
|
# since its already found
|
||||||
|
|
||||||
#if none of the regexes in the repository match, check whether the unknown regexes match
|
#if none of the regexes in the repository match, check whether the unknown
|
||||||
|
# regexes match
|
||||||
if not lib_identified:
|
if not lib_identified:
|
||||||
#check the filename
|
#check the filename
|
||||||
unkFilenameMatch = unknown_filename_identifier().search(
|
unknown_filename_match = unknown_filename_identifier().search(
|
||||||
js_file.filename)
|
js_file.filename)
|
||||||
if unkFilenameMatch:
|
if unknown_filename_match:
|
||||||
js_info['lib'] = unkFilenameMatch.group(1)
|
js_info['lib'] = unknown_filename_match.group(1)
|
||||||
js_info['ver'] = unkFilenameMatch.group(2)
|
js_info['ver'] = unknown_filename_match.group(2)
|
||||||
js_info['type'] = "likely_library"
|
js_info['type'] = "likely_library"
|
||||||
js_info['detectMethod'] = detection_type.FILENAME.name
|
js_info['detectMethod'] = detection_type.FILENAME.name
|
||||||
unkown_libs.append(js_info)
|
unkown_libs.append(js_info)
|
||||||
|
@ -157,20 +165,22 @@ def detectLibraries(zipfile):
|
||||||
#otherwise check the filecontent
|
#otherwise check the filecontent
|
||||||
for unkregex in unknown_lib_identifiers():
|
for unkregex in unknown_lib_identifiers():
|
||||||
#print("Analysing for regex: {}".format(unkregex))
|
#print("Analysing for regex: {}".format(unkregex))
|
||||||
unknownLibraryMatched = unkregex.search(data)
|
unkown_lib_matched = unkregex.search(data)
|
||||||
if unknownLibraryMatched:
|
if unkown_lib_matched:
|
||||||
#check whether this library is actually unknown, by comparing it with identified dicts
|
#check whether this library is actually unknown, by comparing it with
|
||||||
#unkLib = unknownLibraryMatched.group(1).lower().decode()
|
# identified dicts
|
||||||
unkVer = unknownLibraryMatched.group(2).decode()
|
unkown_version = unkown_lib_matched.group(2).decode()
|
||||||
unkjsFile = ((js_file.filename).replace(
|
unkown_js_file = ((js_file.filename).replace(
|
||||||
'.js', '')).replace('.min', '')
|
'.js', '')).replace('.min', '')
|
||||||
|
|
||||||
if (not lib_isin_list(unkjsFile, unkVer,
|
if (not lib_isin_list(unkown_js_file, unkown_version,
|
||||||
known_libs)):
|
known_libs)):
|
||||||
#put this unknown library in the unknown dictionary. use the filename instead - safer
|
#put this unknown library in the unknown dictionary. use the filename
|
||||||
js_info['lib'] = unkjsFile
|
# instead - safer
|
||||||
js_info['ver'] = unkVer
|
js_info['lib'] = unkown_js_file
|
||||||
js_info['detectMethod'] = detection_type.FILENAME_FILECONTENT.name
|
js_info['ver'] = unkown_version
|
||||||
|
js_info[
|
||||||
|
'detectMethod'] = detection_type.FILENAME_FILECONTENT.name
|
||||||
js_info['type'] = "likely_library"
|
js_info['type'] = "likely_library"
|
||||||
unkown_libs.append(js_info)
|
unkown_libs.append(js_info)
|
||||||
is_app_js = False
|
is_app_js = False
|
||||||
|
@ -183,7 +193,6 @@ def detectLibraries(zipfile):
|
||||||
js_info['ver'] = None
|
js_info['ver'] = None
|
||||||
js_info['detectMethod'] = None
|
js_info['detectMethod'] = None
|
||||||
js_info['type'] = "application"
|
js_info['type'] = "application"
|
||||||
identifiedApplicationsList.append(js_info)
|
app_js.append(js_info)
|
||||||
|
|
||||||
return (known_libs + unkown_libs +
|
return known_libs + unkown_libs + app_js
|
||||||
identifiedApplicationsList)
|
|
||||||
|
|
Loading…
Reference in New Issue