1) handle files with byte order marks
2) ignore code in comment blocks
This commit is contained in:
parent
a5a80e7848
commit
50373e122d
|
@ -25,6 +25,9 @@ import java.util.Map.Entry;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.commons.io.ByteOrderMark;
|
||||||
|
import org.apache.commons.io.input.BOMInputStream;
|
||||||
|
|
||||||
import com.ibm.wala.cast.ir.translator.TranslatorToCAst.Error;
|
import com.ibm.wala.cast.ir.translator.TranslatorToCAst.Error;
|
||||||
import com.ibm.wala.cast.js.html.jericho.JerichoHtmlParser;
|
import com.ibm.wala.cast.js.html.jericho.JerichoHtmlParser;
|
||||||
import com.ibm.wala.cast.tree.CAstSourcePositionMap.Position;
|
import com.ibm.wala.cast.tree.CAstSourcePositionMap.Position;
|
||||||
|
@ -55,6 +58,7 @@ public class DomLessSourceExtractor extends JSSourceExtractor {
|
||||||
protected final SourceRegion entrypointRegion;
|
protected final SourceRegion entrypointRegion;
|
||||||
|
|
||||||
private ITag currentScriptTag;
|
private ITag currentScriptTag;
|
||||||
|
private ITag currentCommentTag;
|
||||||
|
|
||||||
private int nodeCounter = 0;
|
private int nodeCounter = 0;
|
||||||
private int scriptNodeCounter = 0;
|
private int scriptNodeCounter = 0;
|
||||||
|
@ -86,12 +90,15 @@ public class DomLessSourceExtractor extends JSSourceExtractor {
|
||||||
if (tag.getName().equalsIgnoreCase("script")) {
|
if (tag.getName().equalsIgnoreCase("script")) {
|
||||||
assert currentScriptTag != null;
|
assert currentScriptTag != null;
|
||||||
currentScriptTag = null;
|
currentScriptTag = null;
|
||||||
|
} else if (currentScriptTag != null && tag.getName().equals("!--")) {
|
||||||
|
assert currentCommentTag != null;
|
||||||
|
currentCommentTag = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void handleText(Position p, String text) {
|
public void handleText(Position p, String text) {
|
||||||
if (currentScriptTag != null) {
|
if (currentScriptTag != null && currentCommentTag == null) {
|
||||||
if (text.startsWith("<![CDATA[")) {
|
if (text.startsWith("<![CDATA[")) {
|
||||||
assert text.endsWith("]]>");
|
assert text.endsWith("]]>");
|
||||||
text = text.substring(9, text.length()-11);
|
text = text.substring(9, text.length()-11);
|
||||||
|
@ -116,6 +123,8 @@ public class DomLessSourceExtractor extends JSSourceExtractor {
|
||||||
assert currentScriptTag == null;
|
assert currentScriptTag == null;
|
||||||
currentScriptTag = tag;
|
currentScriptTag = tag;
|
||||||
scriptNodeCounter++;
|
scriptNodeCounter++;
|
||||||
|
} else if (currentScriptTag != null && tag.getName().equals("!--")){
|
||||||
|
currentCommentTag = tag;
|
||||||
}
|
}
|
||||||
handleDOM(tag);
|
handleDOM(tag);
|
||||||
}
|
}
|
||||||
|
@ -226,16 +235,17 @@ public class DomLessSourceExtractor extends JSSourceExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void getScriptFromUrl(String urlAsString, ITag scriptTag) throws IOException, MalformedURLException {
|
private void getScriptFromUrl(String urlAsString, ITag scriptTag) throws IOException, MalformedURLException {
|
||||||
// URL absoluteUrl = UrlManipulator.relativeToAbsoluteUrl(urlAsString, this.entrypointUrl);
|
|
||||||
// URL scriptSrc = urlResolver.resolve(absoluteUrl);
|
|
||||||
URL scriptSrc = new URL(entrypointUrl, urlAsString);
|
URL scriptSrc = new URL(entrypointUrl, urlAsString);
|
||||||
if (scriptSrc == null) { //Error resolving URL
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
Reader scriptInputStream;
|
Reader scriptInputStream;
|
||||||
try {
|
try {
|
||||||
scriptInputStream = new InputStreamReader(scriptSrc.openConnection().getInputStream());
|
BOMInputStream bs = new BOMInputStream(scriptSrc.openConnection().getInputStream(), false,
|
||||||
|
ByteOrderMark.UTF_8,
|
||||||
|
ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
|
||||||
|
ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
|
||||||
|
if (bs.hasBOM()) {
|
||||||
|
System.err.println("removing BOM " + bs.getBOM());
|
||||||
|
}
|
||||||
|
scriptInputStream = new InputStreamReader(bs);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
//it looks like this happens when we can't resolve the url?
|
//it looks like this happens when we can't resolve the url?
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
|
|
Loading…
Reference in New Issue