#!/usr/bin/env python3 # Copyright (c) 2018-2019 Achim D. Brucker. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # SPDX-License-Identifier: BSD-2-Clause import sys import os.path as path import os import stat import argparse import re from bs4 import BeautifulSoup from urllib.parse import urlparse, parse_qs, urlunparse import cchardet as chardet def sanitize_safelink(url): if "safelinks.protection.outlook.com" in url: try: target = urlparse(parse_qs(urlparse(url).query)['url'][0]) return target.geturl() except: return " Warning: Removed corrupted safelink. " else: return url def remove_external_sender_warning_txt(content): warning_re = re.compile( r'\nCAUTION: This email originated from outside of the organi[zs]ation. ' + r'Do not click links or open attachments unless you recogni[zs]e the sender ' + r'and know the content is safe.\n', re.MULTILINE) return re.sub(warning_re, lambda x: "", content).rstrip() def unsanitize_txt(content): url_re = re.compile( r'(http[s]?://' + '(?:[a-zA-Z]|[0-9]|[$-\'\/\+-;=\?-@.&+_]|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)', re.MULTILINE) return re.sub(url_re, lambda x: (sanitize_safelink(x.group(1))), content).rstrip() def unsanitize_html(content): soup = BeautifulSoup(content, "html.parser") for a in soup.findAll('a'): if a.has_attr('originalsrc'): a['safelink'] = a['href'] a['href'] = a['originalsrc'] del a['originalsrc'] return str(soup) def remove_external_sender_warning_html(content): warning_re = re.compile( r'CAUTION: This email originated from outside of the organi[zs]ation. ' + r'Do not click links or open attachments unless you recogni[zs]e the sender ' + r'and know the content is safe.', re.MULTILINE) soup = BeautifulSoup(content, "html.parser") for d in soup.findAll('div'): if re.search(warning_re,str(d.text)): d.decompose() return str(soup) def main(): """Main function of the safelink tool.""" parser = argparse.ArgumentParser() parser.add_argument("--html", help="HTML", action="store_true", default=False) parser.add_argument("-i", "--in-situ", help="modify file", action="store_true", default=False) parser.add_argument("-v", "--verbose", help="increase verbosity", action="store_true") parser.add_argument('file', nargs='?', default=None) args = parser.parse_args() # parse command line if args.file: fhandle = open(args.file, mode="rb") else: fhandle = sys.stdin.buffer data = fhandle.read() if fhandle is not sys.stdin.buffer: fhandle.close() encoding = chardet.detect(data)['encoding'] if encoding: content = data.decode(encoding=encoding, errors="replace") else: content = data.decode(encoding='utf8', errors="replace") if args.html: content = unsanitize_html(content) content = remove_external_sender_warning_html(content) else: content = unsanitize_txt(content) content = remove_external_sender_warning_txt(content) if args.file and args.in_situ: st = os.stat(args.file) os.chmod(args.file, st.st_mode | stat.S_IWRITE) with open(args.file, "w+") as fhandle: fhandle.write(content) fhandle.truncate() fhandle.close else: print(content) if __name__ == "__main__": main()