Adding Generators (Extracting js source from html) that support line numbering. Currently used only from within the JsViewerDriver.

git-svn-id: https://wala.svn.sourceforge.net/svnroot/wala/trunk@3957 f5eafffb-2e1d-0410-98e4-8ec43c5233c4
This commit is contained in:
yinnon_haviv 2010-09-29 15:29:55 +00:00
parent 515c77c53c
commit 02842ff765
15 changed files with 796 additions and 13 deletions

View File

@ -2,10 +2,20 @@ package com.ibm.wala.cast.js.vis;
import java.io.IOException;
import java.net.URL;
import java.util.Map;
import com.ibm.wala.cast.js.html.DefaultSourceExtractor;
import com.ibm.wala.cast.js.html.DomLessSourceExtractor;
import com.ibm.wala.cast.js.html.FileMapping;
import com.ibm.wala.cast.js.html.IdentityUrlResover;
import com.ibm.wala.cast.js.html.JSSourceExtractor;
import com.ibm.wala.cast.js.html.jericho.JerichoHtmlParser;
import com.ibm.wala.cast.js.ipa.callgraph.JSCFABuilder;
import com.ibm.wala.cast.js.loader.JavaScriptLoader;
import com.ibm.wala.cast.js.test.Util;
import com.ibm.wala.cast.js.translator.CAstRhinoTranslatorFactory;
import com.ibm.wala.cast.js.util.Generator;
import com.ibm.wala.classLoader.SourceFileModule;
import com.ibm.wala.ipa.callgraph.CallGraph;
import com.ibm.wala.ipa.callgraph.propagation.PointerAnalysis;
import com.ibm.wala.ipa.cha.ClassHierarchyException;
@ -18,17 +28,41 @@ public class JsViewerDriver {
System.out.println("Usage: <URL of html page to analyze>");
System.exit(1);
}
boolean domless = false;
URL url = new URL(args[0]);
URL url = new URL(args[0]);
// computing CG + PA
Util.setTranslatorFactory(new CAstRhinoTranslatorFactory());
JSCFABuilder builder = Util.makeHTMLCGBuilder(url);
CallGraph cg = builder.makeCallGraph(builder.getOptions());
JavaScriptLoader.addBootstrapFile(Generator.preamble);
SourceFileModule[] sources = getSources(domless, url);
JSCFABuilder builder = Util.makeCGBuilder(sources, false);
builder.setBaseURL(url);
CallGraph cg = builder.makeCallGraph(builder.getOptions());
PointerAnalysis pa = builder.getPointerAnalysis();
new JsViewer(cg, pa);
}
private static SourceFileModule[] getSources(boolean domless, URL url)
throws IOException {
JSSourceExtractor sourceExtractor;
if (domless ){
sourceExtractor = new DomLessSourceExtractor();
} else {
sourceExtractor = new DefaultSourceExtractor();
}
Map<SourceFileModule, FileMapping> sourcesMap = sourceExtractor.extractSources(url, new JerichoHtmlParser(), new IdentityUrlResover());
SourceFileModule[] sources = new SourceFileModule[sourcesMap.size()];
int i = 0;
for (SourceFileModule m : sourcesMap.keySet()){
sources[i++] = m;
}
return sources;
}
}

View File

@ -14,6 +14,8 @@ Export-Package: com.ibm.wala.cast.js,
com.ibm.wala.cast.js.cfg,
com.ibm.wala.cast.js.client,
com.ibm.wala.cast.js.client.impl,
com.ibm.wala.cast.js.html,
com.ibm.wala.cast.js.html.jericho,
com.ibm.wala.cast.js.ipa.callgraph,
com.ibm.wala.cast.js.ipa.summaries,
com.ibm.wala.cast.js.loader,

View File

@ -0,0 +1,128 @@
package com.ibm.wala.cast.js.html;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import com.ibm.wala.util.collections.HashMapFactory;
public class DefaultSourceExtractor extends DomLessSourceExtractor{
private static class HtmlCallBack extends DomLessSourceExtractor.HtmlCallback{
private final HashMap<String, String> constructors = HashMapFactory.make();
protected final Stack<String> stack;
public HtmlCallBack(URL entrypointUrl, IUrlResolver urlResolver) {
super(entrypointUrl, urlResolver);
stack = new Stack<String>();
constructors.put("FORM", "DOMHTMLFormElement");
constructors.put("TABLE", "DOMHTMLTableElement");
}
@Override
public void handleEndTag(ITag tag) {
super.handleEndTag(tag);
endElement(stack.pop());
}
@Override
protected void handleDOM(ITag tag, String funcName) {
String cons = constructors.get(tag.getName().toUpperCase());
if(cons == null) cons = "DOMHTMLElement";
writeElement(tag, cons, funcName);
newLine();
}
private void printlnIndented(String line, ITag relatedTag){
StringBuilder indentedLine = new StringBuilder();
for (int i = 0 ; i < stack.size() ; i++){
indentedLine.append(" ");
}
indentedLine.append(line);
if (relatedTag == null){
domRegion.println(indentedLine.toString());
} else {
domRegion.println(indentedLine.toString(), fileName, relatedTag.getStartingLineNum());
}
}
private void newLine(){
domRegion.println("");
}
protected void writeElement(ITag tag, String cons, String varName){
printlnIndented("function make_" + varName + "(parent) {", tag);
stack.push(varName);
printlnIndented("this.temp = " + cons + ";", tag);
printlnIndented("this.temp(" + tag.getName() + ");", tag);
for (Map.Entry<String, String> e : tag.getAllAttributes().entrySet()){
String attr = e.getKey();
String value = e.getValue();
writeAttribute(tag, attr, value, "this", varName);
}
printlnIndented("" + varName + " = this;", tag);
printlnIndented("dom_nodes." + varName + " = this;", tag);
printlnIndented("parent.appendChild(this);", tag);
}
protected void writeAttribute(ITag tag, String attr, String value, String varName, String varName2) {
writePortletAttribute(tag, attr, value, varName);
writeEventAttribute(tag, attr, value, varName, varName2);
}
protected void writeEventAttribute(ITag tag, String attr, String value, String varName, String varName2){
if(attr.substring(0,2).equals("on")) {
printlnIndented("function " + attr + "_" + varName2 + "(event) {" + value + "};", tag);
printlnIndented(varName + "." + attr + " = " + attr + "_" + varName2 + ";", tag);
newLine(); newLine();
printlnIndented(varName2 + "." + attr + "(null);\n", tag);
} else if (value.startsWith("javascript:") || value.startsWith("javaScript:")) {
printlnIndented("var " + varName + attr + " = " + value.substring(11), tag);
printlnIndented(varName + ".setAttribute('" + attr + "', " + varName + attr + ");", tag);
} else {
if (value.indexOf('\'') > 0) {
value = value.replaceAll("\\'", "\\\\'");
}
if (value.indexOf('\n') > 0) {
value = value.replaceAll("\\n", "\\\\n");
}
printlnIndented(varName + ".setAttribute('" + attr + "', '" + value + "');", tag);
}
}
protected void writePortletAttribute(ITag tag, String attr, String value, String varName){
if(attr.equals("portletid")) {
if(value.substring(value.length()-4).equals("vice")) {
newLine(); newLine();
printlnIndented("function cVice() { var contextVice = " + varName + "; }\ncVice();\n", tag);
} else if(value.substring(value.length()-4).equals("root")) {
newLine(); newLine();
printlnIndented("function cRoot() { var contextRoot = " + varName + "; }\ncRoot();\n", tag);
}
}
}
private void endElement(String name) {
printlnIndented("};", null);
if (stack.isEmpty()) {
printlnIndented("new make_" + name + "(document);\n\n", null);
} else {
printlnIndented("new make_" + name + "(this);\n", null);
}
}
}
@Override
protected IGeneratorCallback createHtmlCallback(URL entrypointUrl, IUrlResolver urlResolver) {
return new HtmlCallBack(entrypointUrl, urlResolver);
}
}

View File

@ -0,0 +1,242 @@
package com.ibm.wala.cast.js.html;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Collections;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Pattern;
import com.ibm.wala.cast.js.html.jericho.JerichoHtmlParser;
import com.ibm.wala.classLoader.SourceFileModule;
import com.ibm.wala.util.collections.Pair;
public class DomLessSourceExtractor implements JSSourceExtractor {
private static final Pattern LEGAL_JS_IDENTIFIER_REGEXP = Pattern.compile("[a-zA-Z$_][a-zA-Z\\d$_]*");
private boolean DELETE_UPON_EXIT = true;
interface IGeneratorCallback extends IHtmlCallback {
void writeToFinalRegion(SourceRegion finalRegion);
}
protected static class HtmlCallback implements IGeneratorCallback{
protected final URL entrypointUrl;
protected final IUrlResolver urlResolver;
protected final SourceRegion scriptRegion;
protected final SourceRegion domRegion;
protected final SourceRegion entrypointRegion;
protected final String fileName;
private int counter = 0;
public HtmlCallback(URL entrypointUrl, IUrlResolver urlResolver) {
this.entrypointUrl = entrypointUrl;
this.urlResolver = urlResolver;
this.scriptRegion = new SourceRegion();
this.domRegion = new SourceRegion();
this.entrypointRegion = new SourceRegion();
this.fileName = entrypointUrl.getFile();
}
//Do nothing
public void handleEndTag(ITag tag) {}
public void handleStartTag(ITag tag) {
if (tag.getName().equalsIgnoreCase("script")) {
handleScript(tag);
}
handleDOM(tag);
}
/**
* Model the HTML DOM
*
* @param tag
* - the HTML tag to module
*/
protected void handleDOM(ITag tag) {
// Get the name of the modeling function either from the id attribute or a
// running counter
String idAttribute = tag.getAttributeByName("id");
String funcName;
if (idAttribute != null && LEGAL_JS_IDENTIFIER_REGEXP.matcher(idAttribute).matches()) {
funcName = idAttribute;
} else {
funcName = "node" + (counter++);
}
handleDOM(tag, funcName);
}
protected void handleDOM(ITag tag, String funcName) {
Map<String, String> attributeSet = tag.getAllAttributes();
for (Entry<String, String> a : attributeSet.entrySet()) {
handleAttribute(a, funcName, tag.getStartingLineNum());
}
}
private void handleAttribute(Entry<String, String> a, String funcName, Integer lineNum) {
String attName = a.getKey();
String attValue = a.getValue();
if (attName.toLowerCase().startsWith("on") || (attValue != null && attValue.toLowerCase().startsWith("javascript:"))) {
String fName = attName + "_" + funcName;
String signatureLine = "function " + fName + "(event) {";
domRegion.println(signatureLine, fileName, lineNum);// Defines the function
int offset = 0;
for (String eventContentLine : extructJS(attValue)){
domRegion.println("\t" + eventContentLine, fileName, lineNum + (offset++));
}
domRegion.println("}", fileName, lineNum);// Defines the function
entrypointRegion.println("\t" + fName + "(null);", fileName, lineNum);// Run it
}
}
private String[] extructJS(String attValue) {
if (attValue == null){
return new String[] {};
}
String content;
if (attValue.toLowerCase().equals("javascript:")) {
content = attValue.substring("javascript:".length());
} else {
content = attValue;
}
return content.split("\\n");
}
protected void handleScript(ITag tag) {
String value = tag.getAttributeByName("src");
try {
if (value != null) {
// script is out-of-line
getScriptFromUrl(value);
} else{
getInlineScript(tag);
}
} catch (IOException e) {
System.err.println("Error reading script file: " + e.getMessage());
}
}
private void getScriptFromUrl(String urlAsString) throws IOException, MalformedURLException {
URL absoluteUrl = UrlManipulator.relativeToAbsoluteUrl(urlAsString, this.entrypointUrl);
URL scriptSrc = urlResolver.resolve(absoluteUrl);
if (scriptSrc == null) { //Error resolving URL
return;
}
InputStream scriptInputStream = scriptSrc.openConnection().getInputStream();
try{
int lineNum = 1;
String line;
BufferedReader scriptReader = new BufferedReader(new UnicodeReader(scriptInputStream, "UTF8"));
while ((line = scriptReader.readLine()) != null) {
scriptRegion.println(line, scriptSrc.getFile(), lineNum++);
}
} finally {
scriptInputStream.close();
}
}
private void getInlineScript(ITag tag) throws IOException {
Pair<Integer, String> bodyWithLineNumber = tag.getBodyText();
scriptRegion.println(bodyWithLineNumber.snd, fileName, bodyWithLineNumber.fst);
}
protected String getScriptName(URL url) throws MalformedURLException {
String file = url.getFile();
int lastIdxOfSlash = file.lastIndexOf('/');
file = (lastIdxOfSlash == (-1)) ? file : file.substring(lastIdxOfSlash + 1);
return file;
}
public void writeToFinalRegion(SourceRegion finalRegion) {
finalRegion.println("document.URL = new String(\"" + entrypointUrl + "\");");
// wrapping the embedded scripts with a fake method of the window. Required for making this == window.
finalRegion.println("window.__MAIN__ = function(){");
finalRegion.write(scriptRegion);
finalRegion.println("} // end of window.__MAIN__");
finalRegion.println("window.__MAIN__();");
finalRegion.write(domRegion);
finalRegion.println("while (true){ ");
finalRegion.write(entrypointRegion);
finalRegion.println("} // while (true)");
}
}
public Map<SourceFileModule, FileMapping> extractSources(URL entrypointUrl, IHtmlParser htmlParser, IUrlResolver urlResolver)
throws IOException {
InputStreamReader inputStreamReader = getStream(entrypointUrl);
IGeneratorCallback htmlCallback = createHtmlCallback(entrypointUrl, urlResolver);
htmlParser.parse(inputStreamReader, htmlCallback, entrypointUrl.getFile());
SourceRegion finalRegion = new SourceRegion();
htmlCallback.writeToFinalRegion(finalRegion);
// writing the final region into one SourceFileModule.
File outputFile = createOutputFile(entrypointUrl, DELETE_UPON_EXIT);
FileMapping fileMapping = finalRegion.writeToFile(new PrintStream(outputFile));
SourceFileModule singleFileModule = new SourceFileModule(outputFile, outputFile.getName());
return Collections.singletonMap(singleFileModule, fileMapping);
}
protected IGeneratorCallback createHtmlCallback(URL entrypointUrl, IUrlResolver urlResolver) {
return new HtmlCallback(entrypointUrl, urlResolver);
}
private File createOutputFile(URL url, boolean delete) throws IOException {
File outputFile = File.createTempFile(new File(url.getFile()).getName(), ".js");
if (outputFile.exists()){
outputFile.delete();
}
if(delete){
outputFile.deleteOnExit();
}
return outputFile;
}
private InputStreamReader getStream(URL url) throws IOException {
URLConnection conn = url.openConnection();
conn.setDefaultUseCaches(false);
conn.setUseCaches(false);
return new InputStreamReader(conn.getInputStream());
}
public static void main(String[] args) throws IOException {
// DomLessSourceExtractor domLessScopeGenerator = new DomLessSourceExtractor();
DomLessSourceExtractor domLessScopeGenerator = new DefaultSourceExtractor();
domLessScopeGenerator.DELETE_UPON_EXIT = false;
URL entrypointUrl = new URL(args[0]);
IHtmlParser htmlParser = new JerichoHtmlParser();
IUrlResolver urlResolver = new IdentityUrlResover();
Map<SourceFileModule, FileMapping> res = domLessScopeGenerator.extractSources(entrypointUrl , htmlParser , urlResolver);
Entry<SourceFileModule, FileMapping> entry = res.entrySet().iterator().next();
System.out.println(entry.getKey());
entry.getValue().dump(System.out);
}
}

View File

@ -0,0 +1,32 @@
package com.ibm.wala.cast.js.html;
import java.io.PrintStream;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import com.ibm.wala.util.collections.HashMapFactory;
import com.ibm.wala.util.collections.Pair;
/**
* Maps line numbers to lines of other files (fileName + line).
*/
public class FileMapping{
protected Map<Integer, Pair<String, Integer>> lineNumberToFileAndLine = HashMapFactory.make();
/**
* @param line
* @return Null if no mapping for the given line.
*/
public Pair<String,Integer> getAssociatedFileAndLine(int line){
return lineNumberToFileAndLine.get(line);
}
public void dump(PrintStream ps){
Set<Integer> lines = new TreeSet<Integer>(lineNumberToFileAndLine.keySet());
for (Integer line : lines){
Pair<String, Integer> fnAndln = lineNumberToFileAndLine.get(line);
ps.println(line + ": " + fnAndln.snd + "@" + fnAndln.fst);
}
}
}

View File

@ -34,11 +34,4 @@ public interface ITag {
* @return null if no known
*/
public int getStartingLineNum();
/**
* @return path to the file containing the tag.
*/
public String getFilePath();
}

View File

@ -0,0 +1,26 @@
package com.ibm.wala.cast.js.html;
import java.net.URL;
/**
* Used for handling resources that were copied from the web to local files (and still contain references to the web)
* @author yinnonh
* @author danielk
*
*/
public interface IUrlResolver {
/**
* From Internet to local
* @param input
* @return
*/
public URL resolve(URL input);
/**
* From local to Internet
* @param input
* @return
*/
public URL deResolve(URL input);
}

View File

@ -0,0 +1,15 @@
package com.ibm.wala.cast.js.html;
import java.net.URL;
public class IdentityUrlResover implements IUrlResolver{
public URL resolve(URL input) {
return input;
}
public URL deResolve(URL input) {
return input;
}
}

View File

@ -0,0 +1,20 @@
package com.ibm.wala.cast.js.html;
import java.io.IOException;
import java.net.URL;
import java.util.Map;
import com.ibm.wala.classLoader.SourceFileModule;
/**
* Extracts scripts from a given URL of an HTML. Retrieves also attached js files.
* Provides file and line mapping for each extracted SourceFileModule back to the original file and line number.
*
* @author yinnonh
* @author danielk
*/
public interface JSSourceExtractor {
public Map<SourceFileModule, FileMapping> extractSources(URL entrypointUrl, IHtmlParser htmlParser, IUrlResolver urlResolver) throws IOException;
}

View File

@ -0,0 +1,11 @@
package com.ibm.wala.cast.js.html;
import com.ibm.wala.util.collections.Pair;
public class MutableFileMapping extends FileMapping {
void map(int line, String originalFile, int originalLine){
lineNumberToFileAndLine.put(line, Pair.<String, Integer> make(originalFile, originalLine));
}
}

View File

@ -0,0 +1,104 @@
package com.ibm.wala.cast.js.html;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintStream;
import java.io.StringReader;
import java.util.StringTokenizer;
import com.ibm.wala.util.collections.Pair;
public class SourceRegion {
private final StringBuilder source = new StringBuilder();
private final MutableFileMapping fileMapping = new MutableFileMapping();
private int currentLine = 1;
public SourceRegion() {
}
public void print(String text, String originalFile, int originalLine){
source.append(text);
int numberOfLineDrops = getNumberOfLineDrops(text);
if (originalFile != null){
for (int i = 0; i < numberOfLineDrops; i++){
fileMapping.map(currentLine++, originalFile, originalLine++);
}
if (! text.endsWith("\n")){ // avoid mapping one line too much
fileMapping.map(currentLine, originalFile, originalLine); // required for handling text with no CRs.
}
} else {
currentLine += numberOfLineDrops;
}
}
public void println(String text, String originalFile, int originalLine){
print(text + "\n", originalFile, originalLine);
}
public void print(String text){
print(text, null, -1);
}
public void println(String text){
print(text + "\n");
}
public FileMapping writeToFile(PrintStream ps){
ps.print(source.toString());
return fileMapping;
}
public void write(SourceRegion otherRegion){
BufferedReader br = new BufferedReader(new StringReader(otherRegion.source.toString()));
int lineNum = 0;
String line;
try {
while ((line = br.readLine()) != null){
lineNum++;
Pair<String, Integer> fileAndLine = otherRegion.fileMapping.getAssociatedFileAndLine(lineNum);
if (fileAndLine!= null){
this.println(line, fileAndLine.fst, fileAndLine.snd);
} else {
this.println(line);
}
}
} catch (IOException e) {
e.printStackTrace();
assert false;
}
}
public void dump(PrintStream ps){
StringTokenizer st = new StringTokenizer(source.toString(),"\n");
int lineNum = 0;
while (st.hasMoreElements()){
String line = (String) st.nextElement();
lineNum++;
Pair<String, Integer> fileAndLine = fileMapping.getAssociatedFileAndLine(lineNum);
if (fileAndLine!= null){
ps.print(fileAndLine.snd + "@" + fileAndLine.fst + "\t:");
} else {
ps.print("N/A \t\t:");
}
ps.println(line);
}
}
private static int getNumberOfLineDrops(String text) {
int ret = 0;
int i = text.indexOf('\n');
while (i != -1){
ret++;
if (i < text.length()-1){
i = text.indexOf('\n', i + 1);
} else {
break; // CR was the the last character.
}
}
return ret;
}
}

View File

@ -0,0 +1,108 @@
package com.ibm.wala.cast.js.html;
/**
http://www.unicode.org/unicode/faq/utf_bom.html
BOMs:
00 00 FE FF = UTF-32, big-endian
FF FE 00 00 = UTF-32, little-endian
FE FF = UTF-16, big-endian
FF FE = UTF-16, little-endian
EF BB BF = UTF-8
Win2k Notepad:
Unicode format = UTF-16LE
***/
import java.io.*;
/**
* Generic unicode textreader, which will use BOM mark to identify the encoding to be used.
*/
public class UnicodeReader extends Reader {
PushbackInputStream internalIn;
InputStreamReader internalIn2 = null;
String defaultEnc;
private static final int BOM_SIZE = 6;
/*
* Default encoding is used only if BOM is not found. If defaultEncoding is NULL then systemdefault is used.
*/
public UnicodeReader(InputStream in, String defaultEnc) {
internalIn = new PushbackInputStream(in, BOM_SIZE);
this.defaultEnc = defaultEnc;
}
public String getDefaultEncoding() {
return defaultEnc;
}
public String getEncoding() {
if (internalIn2 == null)
return null;
return internalIn2.getEncoding();
}
/**
* Read-ahead four bytes and check for BOM marks. Extra bytes are unread back to the stream, only BOM bytes are skipped.
*/
protected void init() throws IOException {
if (internalIn2 != null)
return;
String encoding;
byte bom[] = new byte[BOM_SIZE];
int n, unread;
n = internalIn.read(bom, 0, bom.length);
if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF) && (bom[3] == (byte) 0xEF) && (bom[4] == (byte) 0xBB) && (bom[5] == (byte) 0xBF)) {
encoding = "UTF-8";
unread = n - 6;
} else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
encoding = "UTF-8";
unread = n - 3;
} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
encoding = "UTF-16BE";
unread = n - 2;
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
encoding = "UTF-16LE";
unread = n - 2;
} else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
encoding = "UTF-32BE";
unread = n - 4;
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
encoding = "UTF-32LE";
unread = n - 4;
} else {
// Unicode BOM mark not found, unread all bytes
encoding = defaultEnc;
unread = n;
}
// System.out.println("read=" + n + ", unread=" + unread);
if (unread > 0)
internalIn.unread(bom, (n - unread), unread);
else if (unread < -1)
internalIn.unread(bom, 0, 0);
// Use given encoding
if (encoding == null) {
internalIn2 = new InputStreamReader(internalIn);
} else {
internalIn2 = new InputStreamReader(internalIn, encoding);
}
}
public void close() throws IOException {
init();
internalIn2.close();
}
public int read(char[] cbuf, int off, int len) throws IOException {
init();
return internalIn2.read(cbuf, off, len);
}
}

View File

@ -0,0 +1,68 @@
package com.ibm.wala.cast.js.html;
import java.net.MalformedURLException;
import java.net.URL;
public class UrlManipulator {
/**
* @param urlFound the link as appear
* @param context the URL in which the link appeared
* @return
* @throws MalformedURLException
*/
public static URL relativeToAbsoluteUrl(String urlFound, URL context) throws MalformedURLException {
urlFound = urlFound.replace("\\", "/").toLowerCase();
URL absoluteUrl;
if (!isAbsoluteUrl(urlFound)) {
if (urlFound.startsWith("//")) {
//create URL taking only the protocol from the context
String origHostAndPath = urlFound.substring(2);// removing "//"
String host;
String path;
int indexOf = origHostAndPath.indexOf("/");
if (indexOf > 0) {
host = origHostAndPath.substring(0, indexOf);
path = origHostAndPath.substring(indexOf);
} else {
host = origHostAndPath;
path = "";
}
absoluteUrl = new URL(context.getProtocol(), host, path);
} else if (urlFound.startsWith("/")) {
//create URL taking the protocol and the host from the context
absoluteUrl = new URL(context.getProtocol(), context.getHost(), urlFound);
} else {
//"concat" URL to context
int backDir = 0; // removing directories due to "../"
while(urlFound.startsWith("../")){
urlFound = urlFound.substring(3);
backDir++;
}
StringBuilder contextPath = new StringBuilder();
String path = context.getPath().replace("\\", "/");
boolean isContextDirectory = path.endsWith("/");
String[] split = path.split("/");
// we are also removing last element in case of a directory
int rightTrimFromPath = (isContextDirectory ? 0 : 1) + backDir;
for (int i = 0; i < split.length - rightTrimFromPath; i++) {
contextPath.append(split[i]);
contextPath.append("/");
}
absoluteUrl = new URL(context.getProtocol(), context.getHost(), contextPath.toString() + urlFound);
}
} else{
absoluteUrl = new URL(urlFound);
}
return absoluteUrl;
}
private static boolean isAbsoluteUrl(String orig) {
return orig.startsWith("http");
}
}

View File

@ -18,7 +18,7 @@ import com.ibm.wala.cast.js.html.IHtmlParser;
* @author danielk
* Uses the Jericho parser to go over the HTML
*/
public class HTMLJerichoParser implements IHtmlParser{
public class JerichoHtmlParser implements IHtmlParser{
static{
Config.LoggerProvider = LoggerProvider.STDERR;
}

View File

@ -21,7 +21,7 @@ import java.net.URLConnection;
import com.ibm.wala.cast.js.html.IHtmlParser;
import com.ibm.wala.cast.js.html.IHtmlCallback;
import com.ibm.wala.cast.js.html.jericho.HTMLJerichoParser;
import com.ibm.wala.cast.js.html.jericho.JerichoHtmlParser;
public class Generator {
public static final String preamble = "preamble.js", temp1 = "temp1.js", temp2 = "temp2.js", temp3 = "temp3.js";
@ -91,7 +91,7 @@ public class Generator {
FileWriter out2 = new FileWriter(temp2);
FileWriter out3 = new FileWriter(temp3);
IHtmlParser parser = new HTMLJerichoParser();
IHtmlParser parser = new JerichoHtmlParser();
IHtmlCallback parseHandler = callbackFactory.createCallback(input, out1, out2, out3);
parser.parse(fr, parseHandler, input.getFile());
out1.flush();