Merge branch 'master' of logicalhacking.com:BrowserSecurity/ExtensionCrawler

This commit is contained in:
Michael Herzberg 2017-08-28 22:38:05 +01:00
commit f81aac7c61
4 changed files with 454 additions and 40 deletions

View File

@ -23,6 +23,7 @@ import re
import json
from enum import Enum
import hashlib
import cchardet as chardet
from ExtensionCrawler.js_mincer import mince_js
class DetectionType(Enum):
@ -39,7 +40,7 @@ class FileClassification(Enum):
LIKELY_LIBRARY = 2
APPLICATION = 3
def lib_identifiers():
def load_lib_identifiers():
"""Initialize identifiers for known libraries from JSON file."""
regex_file = os.path.join(
os.path.dirname(os.path.realpath(__file__)), '../resources/',
@ -55,16 +56,6 @@ def unknown_filename_identifier():
r'(.+)[\-\_]([0-9]{1,2}[\.|\-|\_][0-9a-z]{1,2}[\.|\-|\_][0-9a-z\-\_]*)',
re.IGNORECASE)
def lib_isin_list(lib, ver, lib_list):
"""Check if a specific library/version has already been detected."""
for item in lib_list:
if (item['lib'].lower() == lib.lower() and
item['ver'].lower() == ver.lower()):
return True
return False
def unknown_lib_identifiers():
"""List of identifiers for generic library version headers."""
return ([
@ -101,6 +92,7 @@ def init_jsinfo(zipfile, js_file):
'evidenceStartPos': None,
'evidenceEndPos': None,
'evidenceText': None,
'encoding': chardet.detect(data)['encoding'],
'jsFilename': os.path.basename(js_file.filename),
'md5': hashlib.md5(data).hexdigest(),
'size': int(js_file.file_size),
@ -108,16 +100,31 @@ def init_jsinfo(zipfile, js_file):
}
return js_info
def analyse_md5_checksum(zipfile, js_file, js_info):
"""Check for known md5 hashes (file content)."""
json_data = load_lib_identifiers()
for lib in json_data:
for info in json_data[lib]:
if info == 'md5':
for md5 in json_data[lib]['md5']:
if md5['hash'] == js_info['md5']:
js_info['lib'] = lib
js_info['ver'] = md5['version']
js_info['type'] = FileClassification.LIBRARY
js_info['detectMethod'] = DetectionType.HASH
return [js_info]
return None
def analyse_known_filename(zipfile, js_file):
def analyse_known_filename(zipfile, js_file, js_info):
"""Check for known file name patterns."""
libs = list()
for lib, regex in lib_identifiers().items():
for lib, regex in load_lib_identifiers().items():
if 'filename' in regex:
filename_matched = re.search(regex['filename'],
js_file.filename, re.IGNORECASE)
if filename_matched:
js_info = init_jsinfo(zipfile, js_file)
js_info['lib'] = lib
js_info['ver'] = filename_matched.group(2)
js_info['type'] = FileClassification.LIBRARY
@ -125,13 +132,12 @@ def analyse_known_filename(zipfile, js_file):
libs.append(js_info)
return libs
def analyse_generic_filename(zipfile, js_file):
def analyse_generic_filename(zipfile, js_file, js_info):
"""Check for generic file name patterns."""
libs = list()
unknown_filename_match = unknown_filename_identifier().search(
js_file.filename)
if unknown_filename_match:
js_info = init_jsinfo(zipfile, js_file)
js_info['lib'] = unknown_filename_match.group(1)
js_info['ver'] = unknown_filename_match.group(2)
js_info['type'] = FileClassification.LIKELY_LIBRARY
@ -139,21 +145,20 @@ def analyse_generic_filename(zipfile, js_file):
libs.append(js_info)
return libs
def analyse_filename(zipfile, js_file):
def analyse_filename(zipfile, js_file, js_info):
"""Check for file name patterns of libraries (known and generic as fall back)`"""
res = analyse_known_filename(zipfile, js_file)
res = analyse_known_filename(zipfile, js_file, js_info)
if not res:
res = analyse_generic_filename(zipfile, js_file)
res = analyse_generic_filename(zipfile, js_file, js_info)
return res
def analyse_comment_known_libs(zipfile, js_file, comment):
def analyse_comment_known_libs(zipfile, js_file, js_info, comment):
"""Search for library specific identifiers in comment block."""
libs = list()
for unkregex in unknown_lib_identifiers():
unkown_lib_matched = unkregex.finditer(comment.content)
for match in unkown_lib_matched:
js_info = init_jsinfo(zipfile, js_file)
js_info['lib'] = ((js_file.filename).replace(
'.js', '')).replace('.min', '')
js_info['ver'] = match.group(2)
@ -162,13 +167,12 @@ def analyse_comment_known_libs(zipfile, js_file, comment):
libs.append(js_info)
return libs
def analyse_comment_generic_libs(zipfile, js_file, comment):
def analyse_comment_generic_libs(zipfile, js_file, js_info, comment):
"""Search for generic identifiers in comment block."""
libs = list()
for unkregex in unknown_lib_identifiers():
unkown_lib_matched = unkregex.finditer(comment.content)
for match in unkown_lib_matched:
js_info = init_jsinfo(zipfile, js_file)
js_info['lib'] = ((js_file.filename).replace(
'.js', '')).replace('.min', '')
js_info['ver'] = match.group(2)
@ -177,18 +181,21 @@ def analyse_comment_generic_libs(zipfile, js_file, comment):
libs.append(js_info)
return libs
def analyse_comment_blocks(zipfile, js_file):
def analyse_comment_blocks(zipfile, js_file, js_info):
"""Search for library identifiers in comment."""
libs = list()
with zipfile.open(js_file) as js_file_obj:
with io.TextIOWrapper(js_file_obj, 'utf-8') as js_text_file_obj:
for block in mince_js(js_text_file_obj, single_line_comments_block=True):
block_libs = list()
if block.is_comment():
block_libs = analyse_comment_known_libs(zipfile, js_file, block)
if block_libs is None:
block_libs = analyse_comment_generic_libs(zipfile, js_file, block)
libs += block_libs
try:
with zipfile.open(js_file) as js_file_obj:
with io.TextIOWrapper(js_file_obj, js_info['encoding']) as js_text_file_obj:
for block in mince_js(js_text_file_obj, single_line_comments_block=True):
block_libs = list()
if block.is_comment():
block_libs = analyse_comment_known_libs(zipfile, js_file, js_info, block)
if block_libs is None:
block_libs = analyse_comment_generic_libs(zipfile, js_file, js_info, block)
libs += block_libs
except:
libs = list()
return libs
def decompose_js(zipfile):
@ -203,12 +210,15 @@ def decompose_js(zipfile):
js_inventory = []
for js_file in list(filter(lambda x: x.filename.endswith(".js"), zipfile.infolist())):
js_info_file = analyse_filename(zipfile, js_file)
js_info_file += analyse_comment_blocks(zipfile, js_file)
js_info = init_jsinfo(zipfile, js_file)
js_info_file = analyse_md5_checksum(zipfile, js_file, js_info)
if not js_info_file:
js_info_file = analyse_filename(zipfile, js_file, js_info)
js_info_file += analyse_comment_blocks(zipfile, js_file, js_info)
if not js_info_file:
# if no library could be detected, we report the JavaScript file as 'application'.
js_info = init_jsinfo(zipfile, js_file)
js_info['lib'] = None
js_info['ver'] = None
js_info['detectMethod'] = None

View File

@ -1,3 +1,4 @@
cchardet==2.1.1
requests==2.18.1
pycrypto==2.6.1
beautifulsoup4==4.6.0

View File

@ -7,6 +7,194 @@
"Id: (jquery)\\.js,\\s?v\\s?([0-9][0-9.a-z_\\\\-]+)",
"(jQuery).*[f|m]=.?v?([0-9][0-9.a-z_\\\\-]+).",
"[^a-z.](jQuery)\\:?[ ]?\"?v([0-9][0-9.a-z_\\\\-]+)"
],
"md5": [
{
"hash": "e071abda8fe61194711cfc2ab99fe104",
"version": "3.1.1",
"minified": "yes",
"comment":""
},
{
"hash": "e40ec2161fe7993196f23c8a07346306",
"version": "2.1.1",
"minified": "yes",
"comment":""
},
{
"hash": "f9c7afd05729f10f55b689f36bb20172",
"version": "2.1.4",
"minified": "yes",
"comment":""
},
{
"hash": "c9f5aeeca3ad37bf2aa006139b935f0a",
"version": "3.2.1",
"minified": "yes",
"comment":""
},
{
"hash": "8101d596b2b8fa35fe3a634ea342d7c3",
"version": "1.11.1",
"minified": "yes",
"comment":""
},
{
"hash": "ddb84c1587287b2df08966081ef063bf",
"version": "1.7.1",
"minified": "yes",
"comment":""
},
{
"hash": "32015dd42e9582a80a84736f5d9a44d7",
"version": "2.1.3",
"minified": "yes",
"comment":""
},
{
"hash": "397754ba49e9e0cf4e7c190da78dda05",
"version": "1.9.1",
"minified": "yes",
"comment":""
},
{
"hash": "895323ed2f7258af4fae2c738c8aea49",
"version": "1.11.3",
"minified": "yes",
"comment":""
},
{
"hash": "05e51b1db558320f1939f9789ccf5c8f",
"version": "3.1.0",
"minified": "yes",
"comment":""
},
{
"hash": "6fc159d00dc3cea4153c038739683f93",
"version": "2.2.0",
"minified": "yes",
"comment":""
},
{
"hash": "2edc942c0bd2476be8967a9f788d9e26",
"version": "2.0.0",
"minified": "yes",
"comment":""
},
{
"hash": "2f6b11a7e914718e0290410e85366fe9",
"version": "2.2.4",
"minified": "yes",
"comment":""
},
{
"hash": "b8d64d0bc142b3f670cc0611b0aebcae",
"version": "1.7.2",
"minified": "yes",
"comment":""
},
{
"hash": "107fbe9555bfc88ec5cab524c790fe34",
"version": "2.1.4",
"minified": "",
"comment":""
},
{
"hash": "46836bbc603c9565b5cc061100ccbac8",
"version": "3.1.1",
"minified": "",
"comment":""
},
{
"hash": "5790ead7ad3ba27397aedfa3d263b867",
"version": "1.11.2",
"minified": "yes",
"comment":""
},
{
"hash": "cfa9051cc0b05eb519f1e16b2a6645d7",
"version": "1.8.2",
"minified": "yes",
"comment":""
},
{
"hash": "628072e7212db1e8cdacb22b21752cda",
"version": "1.10.2",
"minified": "",
"comment":""
},
{
"hash": "e1288116312e4728f98923c79b034b67",
"version": "1.8.3",
"minified": "yes",
"comment":""
},
{
"hash": "e51be64870f23f7ba920206ed3efeab9",
"version": "2.0.0",
"minified": "min",
"comment":""
},
{
"hash": "4a356126b9573eb7bd1e9a7494737410",
"version": "2.1.4",
"minified": "yes",
"comment":""
},
{
"hash": "0a6e846b954e345951e710cd6ce3440e",
"version": "2.0.3",
"minified": "yes",
"comment":""
},
{
"hash": "91515770ce8c55de23b306444d8ea998",
"version": "1.10.2",
"minified": "",
"comment":""
},
{
"hash": "33cabfa15c1060aaa3d207c653afb1ee",
"version": "2.2.3",
"minified": "yes",
"comment":""
},
{
"hash": "5ca7582261c421482436dfdf3af9bffe",
"version": "2.1.0",
"minified": "yes",
"comment":""
},
{
"hash": "00f66eada2c54b64a3f632747ce1fe2d",
"version": "1.11.2",
"minified": "yes",
"comment":""
},
{
"hash": "b11ced65f32fedbe9bf81ef9db0f3c94",
"version": "1.7.2",
"minified": "yes",
"comment":""
},
{
"hash": "8fc25e27d42774aeae6edbc0a18b72aa",
"version": "1.11.0",
"minified": "yes",
"comment":""
},
{
"hash": "d0212568ce69457081dacf84e327fa5c",
"version": "3.0.0",
"minified": "yes",
"comment":""
}
]
},
"jquery-easing" : {
"filecontent" : [
"(jQuery Easing) v?([0-9][0-9.a-z_\\\\-]+)."
]
},
@ -106,7 +294,10 @@
},
"ember" : {
"filename" : "(ember)-([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)"
"filename" : "(ember)-([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)",
"filecontent" : [
"(Ember).VERSION\\s?\\=\\s?.([0-9][0-9.a-z_\\\\-]+)"
]
},
"dojo" : {
@ -192,16 +383,130 @@
]
},
"moment-timezone.js" : {
"filecontent" : [
"(moment-timezone.js)(?:[\n\r]+).* version\\s?:\\s?v?([0-9][0-9.a-z_\\\\-]+)",
"(moment-timezone.js)(?:.*[\n\r]+){1,60}.*version\\s?[:|=]\\s?v?.?([0-9][0-9.a-z_\\\\-]+).?"
]
},
"bootstrap" : {
"filename" : "(bootstrap)-([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)",
"filecontent" : [
"(bootstrap)(?:.js){,1} v?([0-9][0-9.a-z_\\\\-]+)"
],
"md5": [
{
"hash": "5869c96cc8f19086aee625d670d741f9",
"version": "3.3.7",
"minified": "yes",
"comment":""
},
{
"hash": "c5b5b2fa19bd66ff23211d9f844e0131",
"version": "3.3.6",
"minified": "yes",
"comment":""
},
{
"hash": "4becdc9104623e891fbb9d38bba01be4",
"version": "3.3.5",
"minified": "yes",
"comment":""
},
{
"hash": "8c237312864d2e4c4f03544cd4f9b195",
"version": "3.3.4",
"minified": "yes",
"comment":""
},
{
"hash": "046ba2b5f4cff7d2eaaa1af55caa9fd8",
"version": "3.3.2",
"minified": "yes",
"comment":""
},
{
"hash": "2616d3564578d8f845813483352802a9",
"version": "3.3.1",
"minified": "yes",
"comment":""
},
{
"hash": "281cd50dd9f58c5550620fc148a7bc39",
"version": "3.3.0",
"minified": "yes",
"comment":""
},
{
"hash": "abda843684d022f3bc22bc83927fe05f",
"version": "3.2.0",
"minified": "yes",
"comment":""
},
{
"hash": "ba847811448ef90d98d272aeccef2a95",
"version": "3.1.1",
"minified": "yes",
"comment":""
},
{
"hash": "e1d08589ec26bec3a81625ce274d76d9",
"version": "3.1.0",
"minified": "yes",
"comment":""
},
{
"hash": "353240ad37d1b084a53b1575f8ce57da",
"version": "3.0.3",
"minified": "yes",
"comment":""
},
{
"hash": "c2e5221c3336abe0dff8568e73cd0dae",
"version": "3.0.2",
"minified": "yes",
"comment":""
},
{
"hash": "d6834e94301cc3ab9cc013574d092b61",
"version": "3.0.1",
"minified": "yes",
"comment":""
},
{
"hash": "9e25e8e29ef0ea358e9778082ffd97d8",
"version": "3.0.0",
"minified": "yes",
"comment":""
}
]
},
"ui-bootstrap-tpls" : {
"filename" : "(ui[-_]bootstrap[-_]tpls)(?:.)*[-_]([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)",
"filecontent" : [
"(ui-bootstrap)(?:.*[\r\n]){1,6}.*Version\\:?\\s?v?([0-9][0-9.a-z_\\\\-]+)"
]
},
"package" : {
"filecontent" : [
"(Package)\\.describe(?:.*[\r\n]){1,50}.*version\\:\\s?.([0-9][0-9.a-z_\\\\-]+)."
]
},
"require.js" : {
"filecontent" : [
"@license (RequireJS) v?([0-9][0-9.a-z_\\\\-]+)"
"@license (RequireJS) v?([0-9][0-9.a-z_\\\\-]+)",
"(requirejs).*version\\=.([0-9][0-9.a-z_\\\\-]+)."
]
},
"require-json" : {
"filecontent" : [
"(RequireJS).*JSON(?:.*[\r\n]){1,7}.*Version\\:?\\s?([0-9][0-9.a-z_\\\\-]+)"
]
},
@ -210,5 +515,103 @@
"https\\:\/\/(d3js)\\.org\\s+Version\\:*\\s+v?([0-9][0-9.a-z_\\\\-]+)"
]
},
"UnderscoreJS" : {
"filecontent" : [
"(Underscore[\\.js]*) v?([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)",
"(Underscore[\\.js]*)(?:.*[\n\r]+){1,60}.*\\_\\.VERSION\\s?\\=\\s?.([0-9][0-9.a-z_\\\\-]+)."
],
"md5": [
{
"hash": "543feb1ecaf06ea516f8cec5f9f3f279",
"version": "1.8.3",
"minified": "yes",
"comment":""
},
{
"hash": "b0e9839a6bb6f12774494fa30c06bcdf",
"version": "1.8.2",
"minified": "yes",
"comment":""
},
{
"hash": "6959908db2ddae758885b6c2cb2f07a5",
"version": "1.8.1",
"minified": "yes",
"comment":""
},
{
"hash": "137af05d496f59d468d1ffbce32f375d",
"version": "1.7.0",
"minified": "yes",
"comment":""
},
{
"hash": "dd9663be9a71f3570bc35f0edba28712",
"version": "1.6.0",
"minified": "yes",
"comment":""
},
{
"hash": "ca26dc8cdf5d413cd8d3b62490e28210",
"version": "1.5.2",
"minified": "yes",
"comment":""
},
{
"hash": "cc07a4658799e1512b086467e7ef5ca5",
"version": "1.5.0",
"minified": "yes",
"comment":""
}
]
},
"string.js" : {
"filecontent" : [
"(string\\.js)(?:.*[\r\n]){1,50}.*VERSION\\s?\\=\\s?.([0-9][0-9.a-z_\\\\-]+)."
]
},
"mousewheel" : {
"filecontent" : [
"jQuery (Mousewheel) v?([0-9][0-9.a-z_\\\\-]+)"
]
},
"materialize" : {
"filecontent" : [
"(Materialize[\\.js]*) v?([0-9][0-9.a-z_\\\\-]+)"
]
},
"mootools" : {
"filename" : "(MooTools)(?:.)*[-_]([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)",
"filecontent" :[
"this\\.(MooTools)(?:.*[\r\n]).*version\\:\\s?.([0-9][0-9.a-z_\\\\-]+).",
"this\\.(MooTools)\\={version\\:.([0-9][0-9.a-z_\\\\-]+)."
]
},
"require-text" : {
"filecontent" : [
"(text)\\s?\\=\\s?{(?:.*[\r\n]){1,4}.*version\\:\\s?.([0-9][0-9.a-z_\\\\-]+).",
"(text)\\s?\\=\\s?{version\\:\\s?.([0-9][0-9.a-z_\\\\-]+)."
]
},
"CryptoJS" : {
"filecontent" : [
"(CryptoJS) v?([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)"
]
},
"share-button" : {
"filecontent" : [
"(ShareButton)(?:.*[\r\n]){1,25}.*version\\:\\s?.([0-9][0-9.a-z_\\\\-]+)."
]
}
}
}

View File

@ -5,5 +5,5 @@ setup(
description='A collection of utilities for downloading and analyzing browser extension from the Chrome Web store.',
author='Achim D. Brucker, Michael Herzberg',
license='GPL 3.0',
install_requires=['requests', 'pycrypto', 'beautifulsoup4', 'python_dateutil', 'mysqlclient']
install_requires=['requests', 'pycrypto', 'beautifulsoup4', 'python_dateutil', 'mysqlclient', 'cchardet']
)