From faaa458921300977a6630776bb468d3ee48bdaae Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Fri, 1 Sep 2017 22:26:30 +0100 Subject: [PATCH 1/6] Updated regexps. --- resources/js_identifier.json | 155 +++++++++++++++++++++++++++++++---- 1 file changed, 139 insertions(+), 16 deletions(-) diff --git a/resources/js_identifier.json b/resources/js_identifier.json index 20ca6d9..cbecf7a 100644 --- a/resources/js_identifier.json +++ b/resources/js_identifier.json @@ -3,7 +3,7 @@ "filename": "(jquery)-([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)", "filecontent": [ "(jquery) JavaScript Library v?([0-9][0-9.a-z_\\\\-]+)", - "(jquery) v?([0-9][0-9.a-z_\\\\-]+)", + "(jquery) v?([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)", "Id: (jquery)\\.js,\\s?v\\s?([0-9][0-9.a-z_\\\\-]+)", "(jQuery).*[f|m]=.?v?([0-9][0-9.a-z_\\\\-]+).", "[^a-z.](jQuery)\\:?[ ]?\"?v([0-9][0-9.a-z_\\\\-]+)" @@ -58,8 +58,8 @@ "comment": "" }, { - "sha1": "e3dbb65f2b541d842b50d37304b0102a2d5f2387", - "md5": "33cabfa15c1060aaa3d207c653afb1ee", + "sha1": "e3dbb65f2b541d842b50d37304b0102a2d5f2387", + "md5": "33cabfa15c1060aaa3d207c653afb1ee", "version": "2.2.3", "minified": "yes", "comment": "" @@ -228,30 +228,153 @@ ] }, "jquery-ui": { - "filename": "(jquery.ui).([0-9]+[.a-z_\\\\-][0-9a-z]+[.a-z_\\\\-][0-9a-z]+).([custom.min|min]+)?\\.js", + "filename": "(jquery.ui).([0-9]+[.a-z_\\\\-][0-9a-z]+[.a-z_\\\\-][0-9a-z]+).([custom.min|min]+)?", "filecontent": [ "(jQuery UI)(?: -)? v?([0-9][0-9.a-z_\\\\-]+)" ] }, + "jquery-ui-widget": { + "filecontent": [ + "(jQuery UI Widget)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)" + ] + }, "jquery-ui-dialog": { "filecontent": [ "(jQuery UI Dialog)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", - "\"?(ui\\.dialog)\"?,.version:.([0-9][0-9.a-z_\\\\-]+).", - ".widget.\\s?.(ui\\.dialog).(?:.*[\n\r]+){1,2}.*version:\\s?.([0-9.]+)." + ".widget.\\s?.(ui\\.dialog).(?:.*[\n\r]+){1,2}.*version:\\s?.([0-9.]+).", + ".(ui\\.dialog).,\\{version:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-position": { + "filecontent": [ + "(jQuery UI Position)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)" + ] + }, + "jquery-ui-effects": { + "filecontent": [ + "(jQuery UI Effects)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + "\\.(effects),\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-color": { + "filecontent": [ + "(jQuery Color Animations)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)" + ] + }, + "jquery-ui-accordion": { + "filecontent": [ + "(jQuery UI Accordion)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.accordion).,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-menu": { + "filecontent": [ + "(jQuery UI Menu)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.menu).,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." ] }, "jquery-ui-autocomplete": { "filecontent": [ "(jQuery UI Autocomplete)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", - "\"?(ui\\.autocomplete)\"?,.version:.([0-9][0-9.a-z_\\\\-]+).", - ".widget.\\s?.(ui\\.autocomplete).(?:.*[\n\r]+){1,2}.*version:\\s?.([0-9.]+)." + ".widget.\\s?.(ui\\.autocomplete).(?:.*[\n\r]+){1,2}.*version:\\s?.([0-9.]+).", + ".(ui\\.autocomplete).,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-controlgroup": { + "filecontent": [ + "(jQuery UI Controlgroup)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.controlgroup).,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-checkboxradio": { + "filecontent": [ + "(jQuery UI Checkboxradio)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.checkboxradio).,\\[.\\.ui.formResetMixin,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-button": { + "filecontent": [ + "(jQuery UI Button)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.button).,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-datepicker": { + "filecontent": [ + "(jQuery UI Datepicker)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + "(datepicker)\\:{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-mouse": { + "filecontent": [ + "(jQuery UI Mouse)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.mouse).,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-draggable": { + "filecontent": [ + "(jQuery UI Draggable)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.draggable).,.\\.ui\\.mouse,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-resizable": { + "filecontent": [ + "(jQuery UI Resizable)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.resizable).,.\\.ui\\.mouse,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-droppable": { + "filecontent": [ + "(jQuery UI Droppable)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.droppable).,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-progressbar": { + "filecontent": [ + "(jQuery UI Progressbar)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.progressbar).,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-selectable": { + "filecontent": [ + "(jQuery UI Selectable)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.selectable).,.\\.ui\\.mouse,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-selectmenu": { + "filecontent": [ + "(jQuery UI Selectmenu)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.selectmenu).,\\[.\\.ui.formResetMixin,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-slider": { + "filecontent": [ + "(jQuery UI Slider)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.slider).,.\\.ui\\.mouse,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-sortable": { + "filecontent": [ + "(jQuery UI Sortable)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.sortable).,.\\.ui\\.mouse,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-spinner": { + "filecontent": [ + "(jQuery UI Spinner)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.spinner).,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." + ] + }, + "jquery-ui-tabs": { + "filecontent": [ + "(jQuery UI Tabs)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", + ".(ui\\.tabs).,\\{version\\:.([0-9][0-9.a-z_\\\\-]+)." ] }, "jquery-ui-tooltip": { "filecontent": [ "(jQuery UI Tooltip)\\s?-?\\s?v?([0-9][0-9.a-z_\\\\-]+)", - "\"?(ui\\.tooltip)\"?,.version:.([0-9][0-9.a-z_\\\\-]+).", - ".widget.\\s?.(ui\\.tooltip).(?:.*[\n\r]+){1,2}.*version:\\s?.([0-9.]+)." + ".widget.\\s?.(ui\\.tooltip).(?:.*[\n\r]+){1,2}.*version:\\s?.([0-9.]+).", + ".(ui\\.tooltip).,\\{version:.([0-9][0-9.a-z_\\\\-]+)." ] }, "jquery.prettyPhoto": { @@ -469,12 +592,12 @@ "ui-bootstrap-tpls": { "filename": "(ui[-_]bootstrap[-_]tpls)(?:.)*[-_]([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)", "filecontent": [ - "(ui-bootstrap)(?:.*[\r\n]){1,6}.*Version\\:?\\s?v?([0-9][0-9.a-z_\\\\-]+)" + "(ui-bootstrap)(?:.*[\n\r]+){1,6}.*Version\\:?\\s?v?([0-9][0-9.a-z_\\\\-]+)" ] }, "package": { "filecontent": [ - "(Package)\\.describe(?:.*[\r\n]){1,50}.*version\\:\\s?.([0-9][0-9.a-z_\\\\-]+)." + "(Package)\\.describe(?:.*[\n\r]+){1,50}.*version\\:\\s?.([0-9][0-9.a-z_\\\\-]+)." ] }, "require.js": { @@ -485,7 +608,7 @@ }, "require-json": { "filecontent": [ - "(RequireJS).*JSON(?:.*[\r\n]){1,7}.*Version\\:?\\s?([0-9][0-9.a-z_\\\\-]+)" + "(RequireJS).*JSON(?:.*[\n\r]+){1,7}.*Version\\:?\\s?([0-9][0-9.a-z_\\\\-]+)" ] }, "d3js": { @@ -493,10 +616,10 @@ "https\\://(d3js)\\.org\\s+Version\\:*\\s+v?([0-9][0-9.a-z_\\\\-]+)" ] }, - "UnderscoreJS": { + "underscore.js": { "filecontent": [ "(Underscore[\\.js]*) v?([0-9]{1,2}[\\.|\\-|\\_][0-9a-z]{1,2}[\\.|\\-|\\_][0-9a-z\\-\\_]*)", - "(Underscore[\\.js]*)(?:.*[\n\r]+){1,60}.*\\_\\.VERSION\\s?\\=\\s?.([0-9][0-9.a-z_\\\\-]+)." + "(\\_)\\.VERSION\\s?=\\s?.([0-9][0-9.a-z_\\\\-]+)." ], "md5": [ { @@ -545,7 +668,7 @@ }, "string.js": { "filecontent": [ - "(string\\.js)(?:.*[\r\n]){1,50}.*VERSION\\s?\\=\\s?.([0-9][0-9.a-z_\\\\-]+)." + "(string\\.js)(?:.*[\n\r]+){1,10}.*VERSION\\s?\\=\\s?.([0-9][0-9.a-z_\\\\-]+)." ] }, "mousewheel": { From 5c987833a4a3fc6ccf4e27b1f9785366c99d9c47 Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Fri, 1 Sep 2017 23:23:11 +0100 Subject: [PATCH 2/6] Bug fix: NoneType object is not iterable. --- ExtensionCrawler/js_decomposer.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ExtensionCrawler/js_decomposer.py b/ExtensionCrawler/js_decomposer.py index 51c2c7b..53ba1a8 100755 --- a/ExtensionCrawler/js_decomposer.py +++ b/ExtensionCrawler/js_decomposer.py @@ -240,7 +240,6 @@ def analyse_comment_generic_libs(zipfile, js_file, js_info, comment): def analyse_comment_blocks(zipfile, js_file, js_info): """Search for library identifiers in comment.""" - def mince_js_fileobj(js_text_file_obj): """Mince JavaScript file using a file object.""" libs = list() @@ -250,9 +249,10 @@ def analyse_comment_blocks(zipfile, js_file, js_info): block_libs = analyse_comment_known_libs(zipfile, js_file, js_info, block) if block_libs is None: block_libs = analyse_comment_generic_libs(zipfile, js_file, js_info, block) - libs += block_libs - return libs - + if block_libs is not None: + libs += block_libs + return libs + libs = [] try: if zipfile is not None: with zipfile.open(js_file) as js_file_obj: @@ -293,7 +293,6 @@ def decompose_js(file): if not js_info_file: js_info_file = analyse_filename(zipfile, js_file, js_info) js_info_file += analyse_comment_blocks(zipfile, js_file, js_info) - if not js_info_file: # if no library could be detected, we report the JavaScript file as 'application'. js_info['lib'] = None From 28f6aa5f4572383baba89d82020a04c0970e32ee Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Fri, 1 Sep 2017 23:24:55 +0100 Subject: [PATCH 3/6] Bug fix: indentation --- ExtensionCrawler/js_decomposer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ExtensionCrawler/js_decomposer.py b/ExtensionCrawler/js_decomposer.py index 53ba1a8..d413083 100755 --- a/ExtensionCrawler/js_decomposer.py +++ b/ExtensionCrawler/js_decomposer.py @@ -202,7 +202,7 @@ def analyse_comment_known_libs(zipfile, js_file, js_info, comment): """Search for library specific identifiers in comment block.""" libs = list() if zipfile is not None: - filename = js_file.filename + filename = js_file.filename else: filename = js_file @@ -222,7 +222,7 @@ def analyse_comment_generic_libs(zipfile, js_file, js_info, comment): """Search for generic identifiers in comment block.""" libs = list() if zipfile is not None: - filename = js_file.filename + filename = js_file.filename else: filename = js_file @@ -257,10 +257,10 @@ def analyse_comment_blocks(zipfile, js_file, js_info): if zipfile is not None: with zipfile.open(js_file) as js_file_obj: with io.TextIOWrapper(js_file_obj, js_info['encoding']) as js_text_file_obj: - libs=mince_js_fileobj(js_text_file_obj) + libs = mince_js_fileobj(js_text_file_obj) else: with open(js_file) as js_text_file_obj: - libs=mince_js_fileobj(js_text_file_obj) + libs = mince_js_fileobj(js_text_file_obj) except: libs = list() return libs From a69c1730643b5fae06af7285f37a2620cb401be0 Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Fri, 1 Sep 2017 23:41:45 +0100 Subject: [PATCH 4/6] Activated preliminary check of regexps for specific libs. --- ExtensionCrawler/js_decomposer.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/ExtensionCrawler/js_decomposer.py b/ExtensionCrawler/js_decomposer.py index d413083..193140d 100755 --- a/ExtensionCrawler/js_decomposer.py +++ b/ExtensionCrawler/js_decomposer.py @@ -205,17 +205,18 @@ def analyse_comment_known_libs(zipfile, js_file, js_info, comment): filename = js_file.filename else: filename = js_file - - for unkregex in unknown_lib_identifiers(): - unkown_lib_matched = unkregex.finditer(comment.content) - for match in unkown_lib_matched: - js_info['lib'] = ((filename).replace( - '.js', '')).replace('.min', '') - js_info['version'] = match.group(2) - js_info['detectionMethod'] = DetectionType.COMMENTBLOCK - js_info['detectionMethodDetails'] = unkregex - js_info['type'] = FileClassification.LIKELY_LIBRARY - libs.append(js_info) + for lib, regex in load_lib_identifiers().items(): + if('filecontent' in regex): + for unkregex in regex['filecontent']: + unkown_lib_matched = unkregex.finditer(comment.content) + for match in unkown_lib_matched: + js_info['lib'] = ((filename).replace( + '.js', '')).replace('.min', '') + js_info['version'] = match.group(2) + js_info['detectionMethod'] = DetectionType.COMMENTBLOCK + js_info['detectionMethodDetails'] = unkregex + js_info['type'] = FileClassification.LIBRARY + libs.append(js_info) return libs def analyse_comment_generic_libs(zipfile, js_file, js_info, comment): @@ -229,7 +230,7 @@ def analyse_comment_generic_libs(zipfile, js_file, js_info, comment): for unkregex in unknown_lib_identifiers(): unkown_lib_matched = unkregex.finditer(comment.content) for match in unkown_lib_matched: - js_info['lib'] = ((filename).replace( + js_info['lib'] = ((os.path.basename(filename)).replace( '.js', '')).replace('.min', '') js_info['version'] = match.group(2) js_info['detectionMethod'] = DetectionType.COMMENTBLOCK From 9ed8f5f9260c67c1438912716dfd810cf4c73dea Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Sat, 2 Sep 2017 00:05:07 +0100 Subject: [PATCH 5/6] Improved reporting. --- ExtensionCrawler/js_decomposer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ExtensionCrawler/js_decomposer.py b/ExtensionCrawler/js_decomposer.py index 193140d..8718631 100755 --- a/ExtensionCrawler/js_decomposer.py +++ b/ExtensionCrawler/js_decomposer.py @@ -183,7 +183,8 @@ def analyse_generic_filename(zipfile, js_file, js_info): unknown_filename_match = unknown_filename_identifier().search( filename) if unknown_filename_match: - js_info['lib'] = unknown_filename_match.group(1) + js_info['lib'] = os.path.basename(unknown_filename_match.group(1)).replace( + '.js', '').replace('.min', '') js_info['version'] = unknown_filename_match.group(2) js_info['type'] = FileClassification.LIKELY_LIBRARY js_info['detectionMethod'] = DetectionType.FILENAME @@ -210,8 +211,7 @@ def analyse_comment_known_libs(zipfile, js_file, js_info, comment): for unkregex in regex['filecontent']: unkown_lib_matched = unkregex.finditer(comment.content) for match in unkown_lib_matched: - js_info['lib'] = ((filename).replace( - '.js', '')).replace('.min', '') + js_info['lib'] = lib js_info['version'] = match.group(2) js_info['detectionMethod'] = DetectionType.COMMENTBLOCK js_info['detectionMethodDetails'] = unkregex From 8af3c99d26088fd7f0d5a9d42456873b278fdd29 Mon Sep 17 00:00:00 2001 From: "Achim D. Brucker" Date: Sat, 2 Sep 2017 00:07:50 +0100 Subject: [PATCH 6/6] Changed pip to pip3. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6d79b26..27b4072 100644 --- a/README.md +++ b/README.md @@ -45,11 +45,11 @@ in the file `requirements.txt`. ## Installation -Clone and use pip to install as a package. +Clone and use pip3 to install as a package. ```shell git clone git@logicalhacking.com:BrowserSecurity/ExtensionCrawler.git -pip install --user -e ExtensionCrawler +pip3 install --user -e ExtensionCrawler ``` ## Team