forked from Qortal/qortal
Moved HTML parsing to new class.
This commit is contained in:
parent
f2feb12708
commit
e64a3978e6
115
src/main/java/org/qortal/api/HTMLParser.java
Normal file
115
src/main/java/org/qortal/api/HTMLParser.java
Normal file
@ -0,0 +1,115 @@
|
||||
package org.qortal.api;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class HTMLParser {
|
||||
|
||||
private String linkPrefix;
|
||||
|
||||
public HTMLParser(String resourceId, boolean usePrefix) {
|
||||
this.linkPrefix = usePrefix ? "/site/" + resourceId : "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Find relative links and prefix them with the resource ID, using Jsoup
|
||||
* @param path
|
||||
* @param data
|
||||
* @return The data with links replaced
|
||||
*/
|
||||
public byte[] replaceRelativeLinks(String path, byte[] data) {
|
||||
if (HTMLParser.isHtmlFile(path)) {
|
||||
String fileContents = new String(data);
|
||||
Document document = Jsoup.parse(fileContents);
|
||||
|
||||
Elements href = document.select("[href]");
|
||||
for (Element element : href) {
|
||||
String elementHtml = element.attr("href");
|
||||
if (this.shouldReplaceLink(elementHtml)) {
|
||||
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
||||
element.attr("href", this.linkPrefix + slash + element.attr("href"));
|
||||
}
|
||||
}
|
||||
Elements src = document.select("[src]");
|
||||
for (Element element : src) {
|
||||
String elementHtml = element.attr("src");
|
||||
if (this.shouldReplaceLink(elementHtml)) {
|
||||
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
||||
element.attr("src", this.linkPrefix + slash + element.attr("src"));
|
||||
}
|
||||
}
|
||||
Elements srcset = document.select("[srcset]");
|
||||
for (Element element : srcset) {
|
||||
String elementHtml = element.attr("srcset").trim();
|
||||
if (this.shouldReplaceLink(elementHtml)) {
|
||||
String[] parts = element.attr("srcset").split(",");
|
||||
ArrayList<String> newParts = new ArrayList<>();
|
||||
for (String part : parts) {
|
||||
part = part.trim();
|
||||
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
||||
String newPart = this.linkPrefix + slash + part;
|
||||
newParts.add(newPart);
|
||||
}
|
||||
String newString = String.join(",", newParts);
|
||||
element.attr("srcset", newString);
|
||||
}
|
||||
}
|
||||
Elements style = document.select("[style]");
|
||||
for (Element element : style) {
|
||||
String elementHtml = element.attr("style");
|
||||
if (elementHtml.contains("url(")) {
|
||||
String[] parts = elementHtml.split("url\\(");
|
||||
String[] parts2 = parts[1].split("\\)");
|
||||
String link = parts2[0];
|
||||
if (link != null) {
|
||||
link = this.removeQuotes(link);
|
||||
if (this.shouldReplaceLink(link)) {
|
||||
String slash = (link.startsWith("/") ? "" : "/");
|
||||
String modifiedLink = "url('" + this.linkPrefix + slash + link + "')";
|
||||
element.attr("style", parts[0] + modifiedLink + parts2[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return document.html().getBytes();
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
private boolean shouldReplaceLink(String elementHtml) {
|
||||
List<String> prefixes = new ArrayList<>();
|
||||
prefixes.add("http"); // Don't modify absolute links
|
||||
prefixes.add("//"); // Don't modify absolute links
|
||||
prefixes.add("javascript:"); // Don't modify javascript
|
||||
prefixes.add("../"); // Don't modify valid relative links
|
||||
for (String prefix : prefixes) {
|
||||
if (elementHtml.startsWith(prefix)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private String removeQuotes(String elementHtml) {
|
||||
if (elementHtml.startsWith("\"") || elementHtml.startsWith("\'")) {
|
||||
elementHtml = elementHtml.substring(1);
|
||||
}
|
||||
if (elementHtml.endsWith("\"") || elementHtml.endsWith("\'")) {
|
||||
elementHtml = elementHtml.substring(0, elementHtml.length() - 1);
|
||||
}
|
||||
return elementHtml;
|
||||
}
|
||||
|
||||
public static boolean isHtmlFile(String path) {
|
||||
if (path.endsWith(".html") || path.endsWith(".htm")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
@ -27,6 +27,7 @@ import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.qortal.api.ApiError;
|
||||
import org.qortal.api.ApiExceptionFactory;
|
||||
import org.qortal.api.HTMLParser;
|
||||
import org.qortal.api.Security;
|
||||
import org.qortal.block.BlockChain;
|
||||
import org.qortal.crypto.Crypto;
|
||||
@ -245,13 +246,13 @@ public class WebsiteResource {
|
||||
@GET
|
||||
@Path("{resource}")
|
||||
public HttpServletResponse getResourceIndex(@PathParam("resource") String resourceId) {
|
||||
return this.get(resourceId, "/");
|
||||
return this.get(resourceId, "/", true);
|
||||
}
|
||||
|
||||
@GET
|
||||
@Path("{resource}/{path:.*}")
|
||||
public HttpServletResponse getResourcePath(@PathParam("resource") String resourceId, @PathParam("path") String inPath) {
|
||||
return this.get(resourceId, inPath);
|
||||
return this.get(resourceId, inPath, true);
|
||||
}
|
||||
|
||||
private HttpServletResponse get(String resourceId, String inPath) {
|
||||
@ -314,10 +315,11 @@ public class WebsiteResource {
|
||||
String filename = this.getFilename(unzippedPath, inPath);
|
||||
String filePath = unzippedPath + File.separator + filename;
|
||||
|
||||
if (this.isHtmlFile(filename)) {
|
||||
if (HTMLParser.isHtmlFile(filename)) {
|
||||
// HTML file - needs to be parsed
|
||||
byte[] data = Files.readAllBytes(Paths.get(filePath)); // TODO: limit file size that can be read into memory
|
||||
data = this.replaceRelativeLinks(filename, data, resourceId);
|
||||
HTMLParser htmlParser = new HTMLParser(resourceId, usePrefix);
|
||||
data = htmlParser.replaceRelativeLinks(filename, data);
|
||||
response.setContentType(context.getMimeType(filename));
|
||||
response.setContentLength(data.length);
|
||||
response.getOutputStream().write(data);
|
||||
@ -384,96 +386,6 @@ public class WebsiteResource {
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find relative links and prefix them with the resource ID, using Jsoup
|
||||
* @param path
|
||||
* @param data
|
||||
* @param resourceId
|
||||
* @return The data with links replaced
|
||||
*/
|
||||
private byte[] replaceRelativeLinks(String path, byte[] data, String resourceId) {
|
||||
if (this.isHtmlFile(path)) {
|
||||
String fileContents = new String(data);
|
||||
Document document = Jsoup.parse(fileContents);
|
||||
|
||||
Elements href = document.select("[href]");
|
||||
for (Element element : href) {
|
||||
String elementHtml = element.attr("href");
|
||||
if (this.shouldReplaceLink(elementHtml)) {
|
||||
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
||||
element.attr("href", "/site/" +resourceId + slash + element.attr("href"));
|
||||
}
|
||||
}
|
||||
Elements src = document.select("[src]");
|
||||
for (Element element : src) {
|
||||
String elementHtml = element.attr("src");
|
||||
if (this.shouldReplaceLink(elementHtml)) {
|
||||
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
||||
element.attr("src", "/site/" +resourceId + slash + element.attr("src"));
|
||||
}
|
||||
}
|
||||
Elements srcset = document.select("[srcset]");
|
||||
for (Element element : srcset) {
|
||||
String elementHtml = element.attr("srcset").trim();
|
||||
if (this.shouldReplaceLink(elementHtml)) {
|
||||
String[] parts = element.attr("srcset").split(",");
|
||||
ArrayList<String> newParts = new ArrayList<>();
|
||||
for (String part : parts) {
|
||||
part = part.trim();
|
||||
String slash = (elementHtml.startsWith("/") ? "" : File.separator);
|
||||
String newPart = "/site/" +resourceId + slash + part;
|
||||
newParts.add(newPart);
|
||||
}
|
||||
String newString = String.join(",", newParts);
|
||||
element.attr("srcset", newString);
|
||||
}
|
||||
}
|
||||
Elements style = document.select("[style]");
|
||||
for (Element element : style) {
|
||||
String elementHtml = element.attr("style");
|
||||
if (elementHtml.contains("url(")) {
|
||||
String[] parts = elementHtml.split("url\\(");
|
||||
String[] parts2 = parts[1].split("\\)");
|
||||
String link = parts2[0];
|
||||
if (link != null) {
|
||||
link = this.removeQuotes(link);
|
||||
if (this.shouldReplaceLink(link)) {
|
||||
String slash = (link.startsWith("/") ? "" : "/");
|
||||
String modifiedLink = "url('" + "/site/" + resourceId + slash + link + "')";
|
||||
element.attr("style", parts[0] + modifiedLink + parts2[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return document.html().getBytes();
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
private boolean shouldReplaceLink(String elementHtml) {
|
||||
List<String> prefixes = new ArrayList<>();
|
||||
prefixes.add("http"); // Don't modify absolute links
|
||||
prefixes.add("//"); // Don't modify absolute links
|
||||
prefixes.add("javascript:"); // Don't modify javascript
|
||||
prefixes.add("../"); // Don't modify valid relative links
|
||||
for (String prefix : prefixes) {
|
||||
if (elementHtml.startsWith(prefix)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private String removeQuotes(String elementHtml) {
|
||||
if (elementHtml.startsWith("\"") || elementHtml.startsWith("\'")) {
|
||||
elementHtml = elementHtml.substring(1);
|
||||
}
|
||||
if (elementHtml.endsWith("\"") || elementHtml.endsWith("\'")) {
|
||||
elementHtml = elementHtml.substring(0, elementHtml.length() - 1);
|
||||
}
|
||||
return elementHtml;
|
||||
}
|
||||
|
||||
private List<String> indexFiles() {
|
||||
List<String> indexFiles = new ArrayList<>();
|
||||
indexFiles.add("index.html");
|
||||
@ -485,11 +397,4 @@ public class WebsiteResource {
|
||||
return indexFiles;
|
||||
}
|
||||
|
||||
private boolean isHtmlFile(String path) {
|
||||
if (path.endsWith(".html") || path.endsWith(".htm")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user