From 933c4d4d118cbf5b43e75a8798c27fb5d7818d1b Mon Sep 17 00:00:00 2001
From: "Achim D. Brucker" <adbrucker@0x5f.org>
Date: Tue, 12 Sep 2017 23:23:22 +0100
Subject: [PATCH] Determine file description from buffer instead from file
 (avoid reading file twice).

---
 ExtensionCrawler/cdnjs-git.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ExtensionCrawler/cdnjs-git.py b/ExtensionCrawler/cdnjs-git.py
index 58bc6f9..4ea32fc 100644
--- a/ExtensionCrawler/cdnjs-git.py
+++ b/ExtensionCrawler/cdnjs-git.py
@@ -22,6 +22,7 @@ import hashlib
 import mimetypes
 import os
 from functools import reduce
+import zlib
 
 import cchardet as chardet
 import dateutil.parser
@@ -64,7 +65,6 @@ def normalize_file(path, encoding):
                     txt += line.strip()
     return txt.encode()
 
-
 def get_file_identifiers(path):
     """Get basic file identifiers (size, hashes, normalized hashes, etc.)."""
     with open(path, 'rb') as fileobj:
@@ -78,7 +78,7 @@ def get_file_identifiers(path):
         'sha256': hashlib.sha256(data).digest(),
         'size': len(data),
         'mimetype': mimetypes.guess_type(path),
-        'description': magic.from_file(path),
+        'description': magic.from_buffer(data),
         'encoding': chardet.detect(data)['encoding'],
     }