Integreated js_mincer into decomposition analysis to allow, in the future, to check comments, code, and string literals explicitely.
This commit is contained in:
parent
9ef27f9ac9
commit
f10923af03
|
@ -18,10 +18,12 @@
|
|||
general and Chrome extensions in particular."""
|
||||
|
||||
import os
|
||||
import io
|
||||
import re
|
||||
import json
|
||||
from enum import Enum
|
||||
import hashlib
|
||||
from ExtensionCrawler.js_mincer import mince_js
|
||||
|
||||
class DetectionType(Enum):
|
||||
"""Enumeration for detection types."""
|
||||
|
@ -67,21 +69,21 @@ def unknown_lib_identifiers():
|
|||
"""List of identifiers for generic library version headers."""
|
||||
return ([
|
||||
re.compile(
|
||||
rb'[\/|\/\/|\s]\*?\s?([a-zA-Z0-9\.]+)\sv?([0-9][\.|\-|\_][0-9.a-z_\\\\-]+)',
|
||||
r'[\/|\/\/|\s]\*?\s?([a-zA-Z0-9\.]+)\sv?([0-9][\.|\-|\_][0-9.a-z_\\\\-]+)',
|
||||
re.IGNORECASE
|
||||
), #MatchType: name version, e.g. mylib v1.2.9b or mylib.anything 1.2.8
|
||||
re.compile(
|
||||
rb'[\/|\/\/|\s]\*?\s?([a-zA-Z0-9\.]+)\s(?: version)\:?\s?v?([0-9][0-9.a-z_\\\\-]+)',
|
||||
r'[\/|\/\/|\s]\*?\s?([a-zA-Z0-9\.]+)\s(?: version)\:?\s?v?([0-9][0-9.a-z_\\\\-]+)',
|
||||
re.IGNORECASE
|
||||
), #MatchType: name version: ver, e.g. mylib version: v1.2.9, or mylib.js version 1.2.8
|
||||
re.compile(
|
||||
rb'\@*(version)\s?[\:|-]?\s?v?([0-9][\.|\-|\_][0-9.a-z_\\\\-]+)',
|
||||
r'\@*(version)\s?[\:|-]?\s?v?([0-9][\.|\-|\_][0-9.a-z_\\\\-]+)',
|
||||
re.IGNORECASE
|
||||
), #MatchType: version x.x.x, e.g. @version: 1.2.5 or version - 1.2.5 etc.
|
||||
re.compile(
|
||||
rb'(version)[\:|\=]\s?.?([0-9]{1,2}[\.|\-|\_][0-9.a-z_\\\\-]+).?',
|
||||
r'(version)[\:|\=]\s?.?([0-9]{1,2}[\.|\-|\_][0-9.a-z_\\\\-]+).?',
|
||||
re.IGNORECASE),
|
||||
re.compile(rb'(.+) v([0-9]{1,2}[\.|\-|\_][0-9]{1,2}[0-9.a-z_\\\\-]*)',
|
||||
re.compile(r'(.+) v([0-9]{1,2}[\.|\-|\_][0-9]{1,2}[0-9.a-z_\\\\-]*)',
|
||||
re.IGNORECASE)
|
||||
])
|
||||
|
||||
|
@ -123,28 +125,6 @@ def analyse_known_filename(zipfile, js_file):
|
|||
libs.append(js_info)
|
||||
return libs
|
||||
|
||||
def analyse_known_filecontent(zipfile, js_file):
|
||||
"""Check for known file content (license headers)."""
|
||||
libs = list()
|
||||
data = ""
|
||||
with zipfile.open(js_file) as js_file_obj:
|
||||
data = js_file_obj.read()
|
||||
for lib, regex in lib_identifiers().items():
|
||||
if 'filecontent' in regex:
|
||||
#iterate over the filecontent regexes for this to see if it has a match
|
||||
for file_content in regex['filecontent']:
|
||||
lib_matched = re.finditer(file_content.encode(), data,
|
||||
re.IGNORECASE)
|
||||
for match in lib_matched:
|
||||
ver = match.group(2).decode()
|
||||
js_info = init_jsinfo(zipfile, js_file)
|
||||
js_info['lib'] = lib
|
||||
js_info['ver'] = ver
|
||||
js_info['type'] = FileClassification.LIBRARY
|
||||
js_info['detectMethod'] = DetectionType.FILECONTENT
|
||||
libs.append(js_info)
|
||||
return libs
|
||||
|
||||
def analyse_generic_filename(zipfile, js_file):
|
||||
"""Check for generic file name patterns."""
|
||||
libs = list()
|
||||
|
@ -159,30 +139,57 @@ def analyse_generic_filename(zipfile, js_file):
|
|||
libs.append(js_info)
|
||||
return libs
|
||||
|
||||
def analyse_generic_filecontent(zipfile, js_file):
|
||||
"""Check for generic file content (license headers)."""
|
||||
def analyse_filename(zipfile, js_file):
|
||||
"""Check for file name patterns of libraries (known and generic as fall back)`"""
|
||||
res = analyse_known_filename(zipfile, js_file)
|
||||
if not res:
|
||||
res = analyse_generic_filename(zipfile, js_file)
|
||||
return res
|
||||
|
||||
|
||||
def analyse_comment_known_libs(zipfile, js_file, comment):
|
||||
"""Search for library specific identifiers in comment block."""
|
||||
libs = list()
|
||||
data = ""
|
||||
with zipfile.open(js_file) as js_file_obj:
|
||||
data = js_file_obj.read()
|
||||
for unkregex in unknown_lib_identifiers():
|
||||
unkown_lib_matched = unkregex.finditer(data)
|
||||
unkown_lib_matched = unkregex.finditer(comment.content)
|
||||
for match in unkown_lib_matched:
|
||||
js_info = init_jsinfo(zipfile, js_file)
|
||||
js_info['lib'] = ((js_file.filename).replace(
|
||||
'.js', '')).replace('.min', '')
|
||||
js_info['ver'] = match.group(2).decode()
|
||||
js_info['ver'] = match.group(2)
|
||||
js_info['detectMethod'] = DetectionType.FILENAME_FILECONTENT
|
||||
js_info['type'] = FileClassification.LIKELY_LIBRARY
|
||||
libs.append(js_info)
|
||||
return libs
|
||||
|
||||
def analyse_filename(zipfile, js_file):
|
||||
"""Check for file name patterns of libraries (known and generic as fall back)`"""
|
||||
res = analyse_known_filename(zipfile, js_file)
|
||||
if not res:
|
||||
res = analyse_generic_filecontent(zipfile, js_file)
|
||||
return res
|
||||
def analyse_comment_generic_libs(zipfile, js_file, comment):
|
||||
"""Search for generic identifiers in comment block."""
|
||||
libs = list()
|
||||
for unkregex in unknown_lib_identifiers():
|
||||
unkown_lib_matched = unkregex.finditer(comment.content)
|
||||
for match in unkown_lib_matched:
|
||||
js_info = init_jsinfo(zipfile, js_file)
|
||||
js_info['lib'] = ((js_file.filename).replace(
|
||||
'.js', '')).replace('.min', '')
|
||||
js_info['ver'] = match.group(2)
|
||||
js_info['detectMethod'] = DetectionType.FILENAME_FILECONTENT
|
||||
js_info['type'] = FileClassification.LIKELY_LIBRARY
|
||||
libs.append(js_info)
|
||||
return libs
|
||||
|
||||
def analyse_comment_blocks(zipfile, js_file):
|
||||
"""Search for library identifiers in comment."""
|
||||
libs = list()
|
||||
with zipfile.open(js_file) as js_file_obj:
|
||||
with io.TextIOWrapper(js_file_obj, 'utf-8') as js_text_file_obj:
|
||||
for block in mince_js(js_text_file_obj, single_line_comments_block=True):
|
||||
block_libs = list()
|
||||
if block.is_comment():
|
||||
block_libs = analyse_comment_known_libs(zipfile, js_file, block)
|
||||
if block_libs is None:
|
||||
block_libs = analyse_comment_generic_libs(zipfile, js_file, block)
|
||||
libs += block_libs
|
||||
return libs
|
||||
|
||||
def decompose_js(zipfile):
|
||||
"""JavaScript decomposition analysis for extensions."""
|
||||
|
@ -196,11 +203,8 @@ def decompose_js(zipfile):
|
|||
|
||||
js_inventory = []
|
||||
for js_file in list(filter(lambda x: x.filename.endswith(".js"), zipfile.infolist())):
|
||||
|
||||
js_info_file = analyse_filename(zipfile, js_file)
|
||||
|
||||
js_info_file += analyse_generic_filecontent(zipfile, js_file)
|
||||
js_info_file += analyse_known_filecontent(zipfile, js_file)
|
||||
js_info_file += analyse_comment_blocks(zipfile, js_file)
|
||||
|
||||
if not js_info_file:
|
||||
# if no library could be detected, we report the JavaScript file as 'application'.
|
||||
|
|
Loading…
Reference in New Issue