diff --git a/unsanitize-safelinks b/unsanitize-safelinks index 02378ed..695b32f 100755 --- a/unsanitize-safelinks +++ b/unsanitize-safelinks @@ -33,6 +33,8 @@ import argparse import re from bs4 import BeautifulSoup from urllib.parse import urlparse, parse_qs, urlunparse +import cchardet as chardet + def sanitize_safelink(url): if "safelinks.protection.outlook.com" in url: @@ -76,17 +78,17 @@ def main(): # parse command line if args.file: - fhandle = open(args.file) + fhandle = open(args.file, mode="rb") else: - fhandle = sys.stdin + fhandle = sys.stdin.buffer - content = "" - for line in fhandle: - content += line + data = fhandle.read() - if fhandle is not sys.stdin: + if fhandle is not sys.stdin.buffer: fhandle.close() + content = data.decode(encoding=chardet.detect(data)['encoding'], errors="replace") + if args.html: content=unsanitize_html(content) else: @@ -103,4 +105,4 @@ def main(): print(content) if __name__ == "__main__": - main() \ No newline at end of file + main()