unsanitize-safelinks/unsanitize-safelinks

145 lines
4.9 KiB
Python
Executable File

#!/usr/bin/env python3
# Copyright (c) 2018-2019 Achim D. Brucker.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# SPDX-License-Identifier: BSD-2-Clause
import sys
import os.path as path
import os
import stat
import argparse
import re
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs, urlunparse
import cchardet as chardet
def sanitize_safelink(url):
if "safelinks.protection.outlook.com" in url:
try:
target = urlparse(parse_qs(urlparse(url).query)['url'][0])
return target.geturl()
except:
return " Warning: Removed corrupted safelink. "
else:
return url
def remove_external_sender_warning_txt(content):
warning_re = re.compile(
r'\nCAUTION: This email originated from outside of the organi[zs]ation. '
+
r'Do not click links or open attachments unless you recogni[zs]e the sender '
+ r'and know the content is safe.\n', re.MULTILINE)
return re.sub(warning_re, lambda x: "", content).rstrip()
def unsanitize_txt(content):
url_re = re.compile(
r'(http[s]?://' +
'(?:[a-zA-Z]|[0-9]|[$-\'\/\+-;=\?-@.&+_]|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)',
re.MULTILINE)
return re.sub(url_re, lambda x: (sanitize_safelink(x.group(1))),
content).rstrip()
def unsanitize_html(content):
soup = BeautifulSoup(content, "html.parser")
for a in soup.findAll('a'):
if a.has_attr('originalsrc'):
a['safelink'] = a['href']
a['href'] = a['originalsrc']
del a['originalsrc']
return str(soup)
def remove_external_sender_warning_html(content):
warning_re = re.compile(
r'CAUTION: This email originated from outside of the organi[zs]ation. '
+
r'Do not click links or open attachments unless you recogni[zs]e the sender '
+ r'and know the content is safe.', re.MULTILINE)
soup = BeautifulSoup(content, "html.parser")
for d in soup.findAll('div'):
if re.search(warning_re,str(d.text)):
d.decompose()
return str(soup)
def main():
"""Main function of the safelink tool."""
parser = argparse.ArgumentParser()
parser.add_argument("--html",
help="HTML",
action="store_true",
default=False)
parser.add_argument("-i",
"--in-situ",
help="modify file",
action="store_true",
default=False)
parser.add_argument("-v",
"--verbose",
help="increase verbosity",
action="store_true")
parser.add_argument('file', nargs='?', default=None)
args = parser.parse_args()
# parse command line
if args.file:
fhandle = open(args.file, mode="rb")
else:
fhandle = sys.stdin.buffer
data = fhandle.read()
if fhandle is not sys.stdin.buffer:
fhandle.close()
encoding = chardet.detect(data)['encoding']
if encoding:
content = data.decode(encoding=encoding, errors="replace")
else:
content = data.decode(encoding='utf8', errors="replace")
if args.html:
content = unsanitize_html(content)
content = remove_external_sender_warning_html(content)
else:
content = unsanitize_txt(content)
content = remove_external_sender_warning_txt(content)
if args.file and args.in_situ:
st = os.stat(args.file)
os.chmod(args.file, st.st_mode | stat.S_IWRITE)
with open(args.file, "w+") as fhandle:
fhandle.write(content)
fhandle.truncate()
fhandle.close
else:
print(content)
if __name__ == "__main__":
main()