Adding Generators (Extracting js source from html) that support line numbering. Currently used only from within the JsViewerDriver.
git-svn-id: https://wala.svn.sourceforge.net/svnroot/wala/trunk@3957 f5eafffb-2e1d-0410-98e4-8ec43c5233c4
This commit is contained in:
parent
515c77c53c
commit
02842ff765
|
@ -2,10 +2,20 @@ package com.ibm.wala.cast.js.vis;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.Map;
|
||||
|
||||
import com.ibm.wala.cast.js.html.DefaultSourceExtractor;
|
||||
import com.ibm.wala.cast.js.html.DomLessSourceExtractor;
|
||||
import com.ibm.wala.cast.js.html.FileMapping;
|
||||
import com.ibm.wala.cast.js.html.IdentityUrlResover;
|
||||
import com.ibm.wala.cast.js.html.JSSourceExtractor;
|
||||
import com.ibm.wala.cast.js.html.jericho.JerichoHtmlParser;
|
||||
import com.ibm.wala.cast.js.ipa.callgraph.JSCFABuilder;
|
||||
import com.ibm.wala.cast.js.loader.JavaScriptLoader;
|
||||
import com.ibm.wala.cast.js.test.Util;
|
||||
import com.ibm.wala.cast.js.translator.CAstRhinoTranslatorFactory;
|
||||
import com.ibm.wala.cast.js.util.Generator;
|
||||
import com.ibm.wala.classLoader.SourceFileModule;
|
||||
import com.ibm.wala.ipa.callgraph.CallGraph;
|
||||
import com.ibm.wala.ipa.callgraph.propagation.PointerAnalysis;
|
||||
import com.ibm.wala.ipa.cha.ClassHierarchyException;
|
||||
|
@ -18,17 +28,41 @@ public class JsViewerDriver {
|
|||
System.out.println("Usage: <URL of html page to analyze>");
|
||||
System.exit(1);
|
||||
}
|
||||
boolean domless = false;
|
||||
|
||||
URL url = new URL(args[0]);
|
||||
URL url = new URL(args[0]);
|
||||
|
||||
// computing CG + PA
|
||||
Util.setTranslatorFactory(new CAstRhinoTranslatorFactory());
|
||||
JSCFABuilder builder = Util.makeHTMLCGBuilder(url);
|
||||
CallGraph cg = builder.makeCallGraph(builder.getOptions());
|
||||
JavaScriptLoader.addBootstrapFile(Generator.preamble);
|
||||
|
||||
SourceFileModule[] sources = getSources(domless, url);
|
||||
|
||||
JSCFABuilder builder = Util.makeCGBuilder(sources, false);
|
||||
builder.setBaseURL(url);
|
||||
|
||||
CallGraph cg = builder.makeCallGraph(builder.getOptions());
|
||||
PointerAnalysis pa = builder.getPointerAnalysis();
|
||||
|
||||
new JsViewer(cg, pa);
|
||||
}
|
||||
|
||||
private static SourceFileModule[] getSources(boolean domless, URL url)
|
||||
throws IOException {
|
||||
JSSourceExtractor sourceExtractor;
|
||||
if (domless ){
|
||||
sourceExtractor = new DomLessSourceExtractor();
|
||||
} else {
|
||||
sourceExtractor = new DefaultSourceExtractor();
|
||||
}
|
||||
|
||||
Map<SourceFileModule, FileMapping> sourcesMap = sourceExtractor.extractSources(url, new JerichoHtmlParser(), new IdentityUrlResover());
|
||||
SourceFileModule[] sources = new SourceFileModule[sourcesMap.size()];
|
||||
int i = 0;
|
||||
for (SourceFileModule m : sourcesMap.keySet()){
|
||||
sources[i++] = m;
|
||||
}
|
||||
return sources;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -14,6 +14,8 @@ Export-Package: com.ibm.wala.cast.js,
|
|||
com.ibm.wala.cast.js.cfg,
|
||||
com.ibm.wala.cast.js.client,
|
||||
com.ibm.wala.cast.js.client.impl,
|
||||
com.ibm.wala.cast.js.html,
|
||||
com.ibm.wala.cast.js.html.jericho,
|
||||
com.ibm.wala.cast.js.ipa.callgraph,
|
||||
com.ibm.wala.cast.js.ipa.summaries,
|
||||
com.ibm.wala.cast.js.loader,
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
package com.ibm.wala.cast.js.html;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Stack;
|
||||
|
||||
import com.ibm.wala.util.collections.HashMapFactory;
|
||||
|
||||
public class DefaultSourceExtractor extends DomLessSourceExtractor{
|
||||
|
||||
private static class HtmlCallBack extends DomLessSourceExtractor.HtmlCallback{
|
||||
|
||||
private final HashMap<String, String> constructors = HashMapFactory.make();
|
||||
protected final Stack<String> stack;
|
||||
|
||||
public HtmlCallBack(URL entrypointUrl, IUrlResolver urlResolver) {
|
||||
super(entrypointUrl, urlResolver);
|
||||
|
||||
stack = new Stack<String>();
|
||||
constructors.put("FORM", "DOMHTMLFormElement");
|
||||
constructors.put("TABLE", "DOMHTMLTableElement");
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleEndTag(ITag tag) {
|
||||
super.handleEndTag(tag);
|
||||
endElement(stack.pop());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void handleDOM(ITag tag, String funcName) {
|
||||
|
||||
String cons = constructors.get(tag.getName().toUpperCase());
|
||||
if(cons == null) cons = "DOMHTMLElement";
|
||||
writeElement(tag, cons, funcName);
|
||||
newLine();
|
||||
}
|
||||
|
||||
private void printlnIndented(String line, ITag relatedTag){
|
||||
StringBuilder indentedLine = new StringBuilder();
|
||||
for (int i = 0 ; i < stack.size() ; i++){
|
||||
indentedLine.append(" ");
|
||||
}
|
||||
indentedLine.append(line);
|
||||
|
||||
if (relatedTag == null){
|
||||
domRegion.println(indentedLine.toString());
|
||||
} else {
|
||||
domRegion.println(indentedLine.toString(), fileName, relatedTag.getStartingLineNum());
|
||||
}
|
||||
}
|
||||
|
||||
private void newLine(){
|
||||
domRegion.println("");
|
||||
}
|
||||
|
||||
protected void writeElement(ITag tag, String cons, String varName){
|
||||
|
||||
printlnIndented("function make_" + varName + "(parent) {", tag);
|
||||
stack.push(varName);
|
||||
|
||||
printlnIndented("this.temp = " + cons + ";", tag);
|
||||
printlnIndented("this.temp(" + tag.getName() + ");", tag);
|
||||
for (Map.Entry<String, String> e : tag.getAllAttributes().entrySet()){
|
||||
String attr = e.getKey();
|
||||
String value = e.getValue();
|
||||
writeAttribute(tag, attr, value, "this", varName);
|
||||
}
|
||||
|
||||
printlnIndented("" + varName + " = this;", tag);
|
||||
printlnIndented("dom_nodes." + varName + " = this;", tag);
|
||||
printlnIndented("parent.appendChild(this);", tag);
|
||||
}
|
||||
|
||||
protected void writeAttribute(ITag tag, String attr, String value, String varName, String varName2) {
|
||||
writePortletAttribute(tag, attr, value, varName);
|
||||
writeEventAttribute(tag, attr, value, varName, varName2);
|
||||
}
|
||||
|
||||
protected void writeEventAttribute(ITag tag, String attr, String value, String varName, String varName2){
|
||||
if(attr.substring(0,2).equals("on")) {
|
||||
printlnIndented("function " + attr + "_" + varName2 + "(event) {" + value + "};", tag);
|
||||
printlnIndented(varName + "." + attr + " = " + attr + "_" + varName2 + ";", tag);
|
||||
newLine(); newLine();
|
||||
printlnIndented(varName2 + "." + attr + "(null);\n", tag);
|
||||
} else if (value.startsWith("javascript:") || value.startsWith("javaScript:")) {
|
||||
printlnIndented("var " + varName + attr + " = " + value.substring(11), tag);
|
||||
printlnIndented(varName + ".setAttribute('" + attr + "', " + varName + attr + ");", tag);
|
||||
} else {
|
||||
if (value.indexOf('\'') > 0) {
|
||||
value = value.replaceAll("\\'", "\\\\'");
|
||||
}
|
||||
if (value.indexOf('\n') > 0) {
|
||||
value = value.replaceAll("\\n", "\\\\n");
|
||||
}
|
||||
printlnIndented(varName + ".setAttribute('" + attr + "', '" + value + "');", tag);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writePortletAttribute(ITag tag, String attr, String value, String varName){
|
||||
if(attr.equals("portletid")) {
|
||||
if(value.substring(value.length()-4).equals("vice")) {
|
||||
newLine(); newLine();
|
||||
printlnIndented("function cVice() { var contextVice = " + varName + "; }\ncVice();\n", tag);
|
||||
} else if(value.substring(value.length()-4).equals("root")) {
|
||||
newLine(); newLine();
|
||||
printlnIndented("function cRoot() { var contextRoot = " + varName + "; }\ncRoot();\n", tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void endElement(String name) {
|
||||
printlnIndented("};", null);
|
||||
if (stack.isEmpty()) {
|
||||
printlnIndented("new make_" + name + "(document);\n\n", null);
|
||||
} else {
|
||||
printlnIndented("new make_" + name + "(this);\n", null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IGeneratorCallback createHtmlCallback(URL entrypointUrl, IUrlResolver urlResolver) {
|
||||
return new HtmlCallBack(entrypointUrl, urlResolver);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,242 @@
|
|||
package com.ibm.wala.cast.js.html;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.PrintStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.ibm.wala.cast.js.html.jericho.JerichoHtmlParser;
|
||||
import com.ibm.wala.classLoader.SourceFileModule;
|
||||
import com.ibm.wala.util.collections.Pair;
|
||||
|
||||
|
||||
public class DomLessSourceExtractor implements JSSourceExtractor {
|
||||
private static final Pattern LEGAL_JS_IDENTIFIER_REGEXP = Pattern.compile("[a-zA-Z$_][a-zA-Z\\d$_]*");
|
||||
private boolean DELETE_UPON_EXIT = true;
|
||||
|
||||
interface IGeneratorCallback extends IHtmlCallback {
|
||||
void writeToFinalRegion(SourceRegion finalRegion);
|
||||
}
|
||||
|
||||
protected static class HtmlCallback implements IGeneratorCallback{
|
||||
protected final URL entrypointUrl;
|
||||
protected final IUrlResolver urlResolver;
|
||||
|
||||
protected final SourceRegion scriptRegion;
|
||||
protected final SourceRegion domRegion;
|
||||
protected final SourceRegion entrypointRegion;
|
||||
|
||||
protected final String fileName;
|
||||
|
||||
private int counter = 0;
|
||||
|
||||
public HtmlCallback(URL entrypointUrl, IUrlResolver urlResolver) {
|
||||
this.entrypointUrl = entrypointUrl;
|
||||
this.urlResolver = urlResolver;
|
||||
this.scriptRegion = new SourceRegion();
|
||||
this.domRegion = new SourceRegion();
|
||||
this.entrypointRegion = new SourceRegion();
|
||||
|
||||
this.fileName = entrypointUrl.getFile();
|
||||
}
|
||||
|
||||
//Do nothing
|
||||
public void handleEndTag(ITag tag) {}
|
||||
|
||||
public void handleStartTag(ITag tag) {
|
||||
if (tag.getName().equalsIgnoreCase("script")) {
|
||||
handleScript(tag);
|
||||
}
|
||||
handleDOM(tag);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Model the HTML DOM
|
||||
*
|
||||
* @param tag
|
||||
* - the HTML tag to module
|
||||
*/
|
||||
protected void handleDOM(ITag tag) {
|
||||
// Get the name of the modeling function either from the id attribute or a
|
||||
// running counter
|
||||
String idAttribute = tag.getAttributeByName("id");
|
||||
String funcName;
|
||||
if (idAttribute != null && LEGAL_JS_IDENTIFIER_REGEXP.matcher(idAttribute).matches()) {
|
||||
funcName = idAttribute;
|
||||
} else {
|
||||
funcName = "node" + (counter++);
|
||||
}
|
||||
handleDOM(tag, funcName);
|
||||
}
|
||||
|
||||
protected void handleDOM(ITag tag, String funcName) {
|
||||
Map<String, String> attributeSet = tag.getAllAttributes();
|
||||
for (Entry<String, String> a : attributeSet.entrySet()) {
|
||||
handleAttribute(a, funcName, tag.getStartingLineNum());
|
||||
}
|
||||
}
|
||||
|
||||
private void handleAttribute(Entry<String, String> a, String funcName, Integer lineNum) {
|
||||
String attName = a.getKey();
|
||||
String attValue = a.getValue();
|
||||
if (attName.toLowerCase().startsWith("on") || (attValue != null && attValue.toLowerCase().startsWith("javascript:"))) {
|
||||
String fName = attName + "_" + funcName;
|
||||
String signatureLine = "function " + fName + "(event) {";
|
||||
domRegion.println(signatureLine, fileName, lineNum);// Defines the function
|
||||
int offset = 0;
|
||||
for (String eventContentLine : extructJS(attValue)){
|
||||
domRegion.println("\t" + eventContentLine, fileName, lineNum + (offset++));
|
||||
}
|
||||
domRegion.println("}", fileName, lineNum);// Defines the function
|
||||
|
||||
entrypointRegion.println("\t" + fName + "(null);", fileName, lineNum);// Run it
|
||||
}
|
||||
}
|
||||
|
||||
private String[] extructJS(String attValue) {
|
||||
if (attValue == null){
|
||||
return new String[] {};
|
||||
}
|
||||
String content;
|
||||
if (attValue.toLowerCase().equals("javascript:")) {
|
||||
content = attValue.substring("javascript:".length());
|
||||
} else {
|
||||
content = attValue;
|
||||
}
|
||||
|
||||
return content.split("\\n");
|
||||
}
|
||||
|
||||
protected void handleScript(ITag tag) {
|
||||
|
||||
String value = tag.getAttributeByName("src");
|
||||
|
||||
try {
|
||||
if (value != null) {
|
||||
// script is out-of-line
|
||||
getScriptFromUrl(value);
|
||||
} else{
|
||||
getInlineScript(tag);
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
System.err.println("Error reading script file: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void getScriptFromUrl(String urlAsString) throws IOException, MalformedURLException {
|
||||
URL absoluteUrl = UrlManipulator.relativeToAbsoluteUrl(urlAsString, this.entrypointUrl);
|
||||
URL scriptSrc = urlResolver.resolve(absoluteUrl);
|
||||
if (scriptSrc == null) { //Error resolving URL
|
||||
return;
|
||||
}
|
||||
|
||||
InputStream scriptInputStream = scriptSrc.openConnection().getInputStream();
|
||||
try{
|
||||
int lineNum = 1;
|
||||
String line;
|
||||
BufferedReader scriptReader = new BufferedReader(new UnicodeReader(scriptInputStream, "UTF8"));
|
||||
|
||||
while ((line = scriptReader.readLine()) != null) {
|
||||
scriptRegion.println(line, scriptSrc.getFile(), lineNum++);
|
||||
}
|
||||
} finally {
|
||||
scriptInputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void getInlineScript(ITag tag) throws IOException {
|
||||
Pair<Integer, String> bodyWithLineNumber = tag.getBodyText();
|
||||
scriptRegion.println(bodyWithLineNumber.snd, fileName, bodyWithLineNumber.fst);
|
||||
}
|
||||
|
||||
protected String getScriptName(URL url) throws MalformedURLException {
|
||||
String file = url.getFile();
|
||||
int lastIdxOfSlash = file.lastIndexOf('/');
|
||||
file = (lastIdxOfSlash == (-1)) ? file : file.substring(lastIdxOfSlash + 1);
|
||||
return file;
|
||||
}
|
||||
|
||||
public void writeToFinalRegion(SourceRegion finalRegion) {
|
||||
finalRegion.println("document.URL = new String(\"" + entrypointUrl + "\");");
|
||||
|
||||
// wrapping the embedded scripts with a fake method of the window. Required for making this == window.
|
||||
finalRegion.println("window.__MAIN__ = function(){");
|
||||
finalRegion.write(scriptRegion);
|
||||
finalRegion.println("} // end of window.__MAIN__");
|
||||
finalRegion.println("window.__MAIN__();");
|
||||
|
||||
finalRegion.write(domRegion);
|
||||
|
||||
finalRegion.println("while (true){ ");
|
||||
finalRegion.write(entrypointRegion);
|
||||
finalRegion.println("} // while (true)");
|
||||
}
|
||||
}
|
||||
|
||||
public Map<SourceFileModule, FileMapping> extractSources(URL entrypointUrl, IHtmlParser htmlParser, IUrlResolver urlResolver)
|
||||
throws IOException {
|
||||
|
||||
InputStreamReader inputStreamReader = getStream(entrypointUrl);
|
||||
IGeneratorCallback htmlCallback = createHtmlCallback(entrypointUrl, urlResolver);
|
||||
htmlParser.parse(inputStreamReader, htmlCallback, entrypointUrl.getFile());
|
||||
|
||||
SourceRegion finalRegion = new SourceRegion();
|
||||
htmlCallback.writeToFinalRegion(finalRegion);
|
||||
|
||||
// writing the final region into one SourceFileModule.
|
||||
File outputFile = createOutputFile(entrypointUrl, DELETE_UPON_EXIT);
|
||||
FileMapping fileMapping = finalRegion.writeToFile(new PrintStream(outputFile));
|
||||
SourceFileModule singleFileModule = new SourceFileModule(outputFile, outputFile.getName());
|
||||
return Collections.singletonMap(singleFileModule, fileMapping);
|
||||
}
|
||||
|
||||
protected IGeneratorCallback createHtmlCallback(URL entrypointUrl, IUrlResolver urlResolver) {
|
||||
return new HtmlCallback(entrypointUrl, urlResolver);
|
||||
}
|
||||
|
||||
private File createOutputFile(URL url, boolean delete) throws IOException {
|
||||
File outputFile = File.createTempFile(new File(url.getFile()).getName(), ".js");
|
||||
if (outputFile.exists()){
|
||||
outputFile.delete();
|
||||
}
|
||||
if(delete){
|
||||
outputFile.deleteOnExit();
|
||||
}
|
||||
return outputFile;
|
||||
}
|
||||
|
||||
|
||||
private InputStreamReader getStream(URL url) throws IOException {
|
||||
URLConnection conn = url.openConnection();
|
||||
conn.setDefaultUseCaches(false);
|
||||
conn.setUseCaches(false);
|
||||
|
||||
return new InputStreamReader(conn.getInputStream());
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
// DomLessSourceExtractor domLessScopeGenerator = new DomLessSourceExtractor();
|
||||
DomLessSourceExtractor domLessScopeGenerator = new DefaultSourceExtractor();
|
||||
domLessScopeGenerator.DELETE_UPON_EXIT = false;
|
||||
URL entrypointUrl = new URL(args[0]);
|
||||
IHtmlParser htmlParser = new JerichoHtmlParser();
|
||||
IUrlResolver urlResolver = new IdentityUrlResover();
|
||||
Map<SourceFileModule, FileMapping> res = domLessScopeGenerator.extractSources(entrypointUrl , htmlParser , urlResolver);
|
||||
Entry<SourceFileModule, FileMapping> entry = res.entrySet().iterator().next();
|
||||
System.out.println(entry.getKey());
|
||||
entry.getValue().dump(System.out);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
package com.ibm.wala.cast.js.html;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.wala.util.collections.HashMapFactory;
|
||||
import com.ibm.wala.util.collections.Pair;
|
||||
|
||||
/**
|
||||
* Maps line numbers to lines of other files (fileName + line).
|
||||
*/
|
||||
public class FileMapping{
|
||||
protected Map<Integer, Pair<String, Integer>> lineNumberToFileAndLine = HashMapFactory.make();
|
||||
|
||||
/**
|
||||
* @param line
|
||||
* @return Null if no mapping for the given line.
|
||||
*/
|
||||
public Pair<String,Integer> getAssociatedFileAndLine(int line){
|
||||
return lineNumberToFileAndLine.get(line);
|
||||
}
|
||||
|
||||
public void dump(PrintStream ps){
|
||||
Set<Integer> lines = new TreeSet<Integer>(lineNumberToFileAndLine.keySet());
|
||||
for (Integer line : lines){
|
||||
Pair<String, Integer> fnAndln = lineNumberToFileAndLine.get(line);
|
||||
ps.println(line + ": " + fnAndln.snd + "@" + fnAndln.fst);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -34,11 +34,4 @@ public interface ITag {
|
|||
* @return null if no known
|
||||
*/
|
||||
public int getStartingLineNum();
|
||||
|
||||
/**
|
||||
* @return path to the file containing the tag.
|
||||
*/
|
||||
public String getFilePath();
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
package com.ibm.wala.cast.js.html;
|
||||
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
* Used for handling resources that were copied from the web to local files (and still contain references to the web)
|
||||
* @author yinnonh
|
||||
* @author danielk
|
||||
*
|
||||
*/
|
||||
public interface IUrlResolver {
|
||||
/**
|
||||
* From Internet to local
|
||||
* @param input
|
||||
* @return
|
||||
*/
|
||||
public URL resolve(URL input);
|
||||
|
||||
/**
|
||||
* From local to Internet
|
||||
* @param input
|
||||
* @return
|
||||
*/
|
||||
public URL deResolve(URL input);
|
||||
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
package com.ibm.wala.cast.js.html;
|
||||
|
||||
import java.net.URL;
|
||||
|
||||
public class IdentityUrlResover implements IUrlResolver{
|
||||
|
||||
public URL resolve(URL input) {
|
||||
return input;
|
||||
}
|
||||
|
||||
public URL deResolve(URL input) {
|
||||
return input;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
package com.ibm.wala.cast.js.html;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.Map;
|
||||
|
||||
import com.ibm.wala.classLoader.SourceFileModule;
|
||||
|
||||
/**
|
||||
* Extracts scripts from a given URL of an HTML. Retrieves also attached js files.
|
||||
* Provides file and line mapping for each extracted SourceFileModule back to the original file and line number.
|
||||
*
|
||||
* @author yinnonh
|
||||
* @author danielk
|
||||
*/
|
||||
public interface JSSourceExtractor {
|
||||
|
||||
public Map<SourceFileModule, FileMapping> extractSources(URL entrypointUrl, IHtmlParser htmlParser, IUrlResolver urlResolver) throws IOException;
|
||||
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
package com.ibm.wala.cast.js.html;
|
||||
|
||||
import com.ibm.wala.util.collections.Pair;
|
||||
|
||||
public class MutableFileMapping extends FileMapping {
|
||||
|
||||
void map(int line, String originalFile, int originalLine){
|
||||
lineNumberToFileAndLine.put(line, Pair.<String, Integer> make(originalFile, originalLine));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
package com.ibm.wala.cast.js.html;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.io.StringReader;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import com.ibm.wala.util.collections.Pair;
|
||||
|
||||
public class SourceRegion {
|
||||
|
||||
private final StringBuilder source = new StringBuilder();
|
||||
private final MutableFileMapping fileMapping = new MutableFileMapping();
|
||||
private int currentLine = 1;
|
||||
|
||||
public SourceRegion() {
|
||||
}
|
||||
|
||||
public void print(String text, String originalFile, int originalLine){
|
||||
source.append(text);
|
||||
int numberOfLineDrops = getNumberOfLineDrops(text);
|
||||
if (originalFile != null){
|
||||
for (int i = 0; i < numberOfLineDrops; i++){
|
||||
fileMapping.map(currentLine++, originalFile, originalLine++);
|
||||
}
|
||||
if (! text.endsWith("\n")){ // avoid mapping one line too much
|
||||
fileMapping.map(currentLine, originalFile, originalLine); // required for handling text with no CRs.
|
||||
}
|
||||
} else {
|
||||
currentLine += numberOfLineDrops;
|
||||
}
|
||||
}
|
||||
|
||||
public void println(String text, String originalFile, int originalLine){
|
||||
print(text + "\n", originalFile, originalLine);
|
||||
}
|
||||
|
||||
public void print(String text){
|
||||
print(text, null, -1);
|
||||
}
|
||||
|
||||
public void println(String text){
|
||||
print(text + "\n");
|
||||
}
|
||||
|
||||
public FileMapping writeToFile(PrintStream ps){
|
||||
ps.print(source.toString());
|
||||
return fileMapping;
|
||||
}
|
||||
|
||||
public void write(SourceRegion otherRegion){
|
||||
BufferedReader br = new BufferedReader(new StringReader(otherRegion.source.toString()));
|
||||
int lineNum = 0;
|
||||
String line;
|
||||
try {
|
||||
while ((line = br.readLine()) != null){
|
||||
lineNum++;
|
||||
|
||||
Pair<String, Integer> fileAndLine = otherRegion.fileMapping.getAssociatedFileAndLine(lineNum);
|
||||
if (fileAndLine!= null){
|
||||
this.println(line, fileAndLine.fst, fileAndLine.snd);
|
||||
} else {
|
||||
this.println(line);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
assert false;
|
||||
}
|
||||
}
|
||||
|
||||
public void dump(PrintStream ps){
|
||||
StringTokenizer st = new StringTokenizer(source.toString(),"\n");
|
||||
int lineNum = 0;
|
||||
while (st.hasMoreElements()){
|
||||
String line = (String) st.nextElement();
|
||||
lineNum++;
|
||||
|
||||
Pair<String, Integer> fileAndLine = fileMapping.getAssociatedFileAndLine(lineNum);
|
||||
if (fileAndLine!= null){
|
||||
ps.print(fileAndLine.snd + "@" + fileAndLine.fst + "\t:");
|
||||
} else {
|
||||
ps.print("N/A \t\t:");
|
||||
}
|
||||
|
||||
ps.println(line);
|
||||
}
|
||||
}
|
||||
|
||||
private static int getNumberOfLineDrops(String text) {
|
||||
int ret = 0;
|
||||
int i = text.indexOf('\n');
|
||||
while (i != -1){
|
||||
ret++;
|
||||
if (i < text.length()-1){
|
||||
i = text.indexOf('\n', i + 1);
|
||||
} else {
|
||||
break; // CR was the the last character.
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
package com.ibm.wala.cast.js.html;
|
||||
|
||||
/**
|
||||
http://www.unicode.org/unicode/faq/utf_bom.html
|
||||
BOMs:
|
||||
00 00 FE FF = UTF-32, big-endian
|
||||
FF FE 00 00 = UTF-32, little-endian
|
||||
FE FF = UTF-16, big-endian
|
||||
FF FE = UTF-16, little-endian
|
||||
EF BB BF = UTF-8
|
||||
|
||||
Win2k Notepad:
|
||||
Unicode format = UTF-16LE
|
||||
***/
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* Generic unicode textreader, which will use BOM mark to identify the encoding to be used.
|
||||
*/
|
||||
public class UnicodeReader extends Reader {
|
||||
PushbackInputStream internalIn;
|
||||
|
||||
InputStreamReader internalIn2 = null;
|
||||
|
||||
String defaultEnc;
|
||||
|
||||
private static final int BOM_SIZE = 6;
|
||||
|
||||
/*
|
||||
* Default encoding is used only if BOM is not found. If defaultEncoding is NULL then systemdefault is used.
|
||||
*/
|
||||
public UnicodeReader(InputStream in, String defaultEnc) {
|
||||
internalIn = new PushbackInputStream(in, BOM_SIZE);
|
||||
this.defaultEnc = defaultEnc;
|
||||
}
|
||||
|
||||
public String getDefaultEncoding() {
|
||||
return defaultEnc;
|
||||
}
|
||||
|
||||
public String getEncoding() {
|
||||
if (internalIn2 == null)
|
||||
return null;
|
||||
return internalIn2.getEncoding();
|
||||
}
|
||||
|
||||
/**
|
||||
* Read-ahead four bytes and check for BOM marks. Extra bytes are unread back to the stream, only BOM bytes are skipped.
|
||||
*/
|
||||
protected void init() throws IOException {
|
||||
if (internalIn2 != null)
|
||||
return;
|
||||
|
||||
String encoding;
|
||||
byte bom[] = new byte[BOM_SIZE];
|
||||
int n, unread;
|
||||
n = internalIn.read(bom, 0, bom.length);
|
||||
|
||||
if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF) && (bom[3] == (byte) 0xEF) && (bom[4] == (byte) 0xBB) && (bom[5] == (byte) 0xBF)) {
|
||||
encoding = "UTF-8";
|
||||
unread = n - 6;
|
||||
} else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
|
||||
encoding = "UTF-8";
|
||||
unread = n - 3;
|
||||
} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
|
||||
encoding = "UTF-16BE";
|
||||
unread = n - 2;
|
||||
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
|
||||
encoding = "UTF-16LE";
|
||||
unread = n - 2;
|
||||
} else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
|
||||
encoding = "UTF-32BE";
|
||||
unread = n - 4;
|
||||
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
|
||||
encoding = "UTF-32LE";
|
||||
unread = n - 4;
|
||||
} else {
|
||||
// Unicode BOM mark not found, unread all bytes
|
||||
encoding = defaultEnc;
|
||||
unread = n;
|
||||
}
|
||||
// System.out.println("read=" + n + ", unread=" + unread);
|
||||
|
||||
if (unread > 0)
|
||||
internalIn.unread(bom, (n - unread), unread);
|
||||
else if (unread < -1)
|
||||
internalIn.unread(bom, 0, 0);
|
||||
|
||||
// Use given encoding
|
||||
if (encoding == null) {
|
||||
internalIn2 = new InputStreamReader(internalIn);
|
||||
} else {
|
||||
internalIn2 = new InputStreamReader(internalIn, encoding);
|
||||
}
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
init();
|
||||
internalIn2.close();
|
||||
}
|
||||
|
||||
public int read(char[] cbuf, int off, int len) throws IOException {
|
||||
init();
|
||||
return internalIn2.read(cbuf, off, len);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
package com.ibm.wala.cast.js.html;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
public class UrlManipulator {
|
||||
|
||||
|
||||
/**
|
||||
* @param urlFound the link as appear
|
||||
* @param context the URL in which the link appeared
|
||||
* @return
|
||||
* @throws MalformedURLException
|
||||
*/
|
||||
public static URL relativeToAbsoluteUrl(String urlFound, URL context) throws MalformedURLException {
|
||||
urlFound = urlFound.replace("\\", "/").toLowerCase();
|
||||
|
||||
URL absoluteUrl;
|
||||
if (!isAbsoluteUrl(urlFound)) {
|
||||
if (urlFound.startsWith("//")) {
|
||||
//create URL taking only the protocol from the context
|
||||
String origHostAndPath = urlFound.substring(2);// removing "//"
|
||||
String host;
|
||||
String path;
|
||||
int indexOf = origHostAndPath.indexOf("/");
|
||||
if (indexOf > 0) {
|
||||
host = origHostAndPath.substring(0, indexOf);
|
||||
path = origHostAndPath.substring(indexOf);
|
||||
} else {
|
||||
host = origHostAndPath;
|
||||
path = "";
|
||||
}
|
||||
absoluteUrl = new URL(context.getProtocol(), host, path);
|
||||
} else if (urlFound.startsWith("/")) {
|
||||
//create URL taking the protocol and the host from the context
|
||||
absoluteUrl = new URL(context.getProtocol(), context.getHost(), urlFound);
|
||||
} else {
|
||||
//"concat" URL to context
|
||||
int backDir = 0; // removing directories due to "../"
|
||||
while(urlFound.startsWith("../")){
|
||||
urlFound = urlFound.substring(3);
|
||||
backDir++;
|
||||
}
|
||||
StringBuilder contextPath = new StringBuilder();
|
||||
String path = context.getPath().replace("\\", "/");
|
||||
boolean isContextDirectory = path.endsWith("/");
|
||||
String[] split = path.split("/");
|
||||
// we are also removing last element in case of a directory
|
||||
int rightTrimFromPath = (isContextDirectory ? 0 : 1) + backDir;
|
||||
|
||||
for (int i = 0; i < split.length - rightTrimFromPath; i++) {
|
||||
contextPath.append(split[i]);
|
||||
contextPath.append("/");
|
||||
}
|
||||
absoluteUrl = new URL(context.getProtocol(), context.getHost(), contextPath.toString() + urlFound);
|
||||
}
|
||||
} else{
|
||||
absoluteUrl = new URL(urlFound);
|
||||
}
|
||||
return absoluteUrl;
|
||||
}
|
||||
|
||||
private static boolean isAbsoluteUrl(String orig) {
|
||||
return orig.startsWith("http");
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -18,7 +18,7 @@ import com.ibm.wala.cast.js.html.IHtmlParser;
|
|||
* @author danielk
|
||||
* Uses the Jericho parser to go over the HTML
|
||||
*/
|
||||
public class HTMLJerichoParser implements IHtmlParser{
|
||||
public class JerichoHtmlParser implements IHtmlParser{
|
||||
static{
|
||||
Config.LoggerProvider = LoggerProvider.STDERR;
|
||||
}
|
|
@ -21,7 +21,7 @@ import java.net.URLConnection;
|
|||
|
||||
import com.ibm.wala.cast.js.html.IHtmlParser;
|
||||
import com.ibm.wala.cast.js.html.IHtmlCallback;
|
||||
import com.ibm.wala.cast.js.html.jericho.HTMLJerichoParser;
|
||||
import com.ibm.wala.cast.js.html.jericho.JerichoHtmlParser;
|
||||
|
||||
public class Generator {
|
||||
public static final String preamble = "preamble.js", temp1 = "temp1.js", temp2 = "temp2.js", temp3 = "temp3.js";
|
||||
|
@ -91,7 +91,7 @@ public class Generator {
|
|||
FileWriter out2 = new FileWriter(temp2);
|
||||
FileWriter out3 = new FileWriter(temp3);
|
||||
|
||||
IHtmlParser parser = new HTMLJerichoParser();
|
||||
IHtmlParser parser = new JerichoHtmlParser();
|
||||
IHtmlCallback parseHandler = callbackFactory.createCallback(input, out1, out2, out3);
|
||||
parser.parse(fr, parseHandler, input.getFile());
|
||||
out1.flush();
|
||||
|
|
Loading…
Reference in New Issue