Java Code Examples for com.gargoylesoftware.htmlunit.WebResponse#getContentAsStream()

The following examples show how to use com.gargoylesoftware.htmlunit.WebResponse#getContentAsStream() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HtmlEmbed.java    From htmlunit with Apache License 2.0 6 votes vote down vote up
/**
 * Saves this content as the specified file.
 * @param file the file to save to
 * @throws IOException if an IO error occurs
 */
public void saveAs(final File file) throws IOException {
    final HtmlPage page = (HtmlPage) getPage();
    final WebClient webclient = page.getWebClient();

    final URL url = page.getFullyQualifiedUrl(getAttributeDirect(SRC_ATTRIBUTE));
    final WebRequest request = new WebRequest(url);
    request.setCharset(page.getCharset());
    request.setAdditionalHeader(HttpHeader.REFERER, page.getUrl().toExternalForm());
    final WebResponse webResponse = webclient.loadWebResponse(request);

    try (OutputStream fos = Files.newOutputStream(file.toPath());
            InputStream content =  webResponse.getContentAsStream()) {
        IOUtils.copy(content, fos);
    }
}
 
Example 2
Source File: XmlSerializer.java    From HtmlUnit-Android with Apache License 2.0 6 votes vote down vote up
protected Map<String, DomAttr> getAttributesFor(final HtmlImage image) throws IOException {
    final Map<String, DomAttr> map = createAttributesCopyWithClonedAttribute(image, "src");
    final DomAttr srcAttr = map.get("src");
    if (srcAttr != null && StringUtils.isNotBlank(srcAttr.getValue())) {
        final WebResponse response = image.getWebResponse(true);

        final File file = createFile(srcAttr.getValue(), "." + getSuffix(response));
        try (InputStream inputStream = response.getContentAsStream()) {
            FileUtils.copyInputStreamToFile(inputStream, file);
        }

        final String valueOnFileSystem = outputDir_.getName() + FILE_SEPARATOR + file.getName();
        srcAttr.setValue(valueOnFileSystem); // this is the clone attribute node, not the original one of the page
    }

    return map;
}
 
Example 3
Source File: XmlUtils.java    From htmlunit with Apache License 2.0 5 votes vote down vote up
/**
 * Builds a document from the content of the web response.
 * A warning is logged if an exception is thrown while parsing the XML content
 * (for instance when the content is not a valid XML and can't be parsed).
 *
 * @param webResponse the response from the server
 * @throws IOException if the page could not be created
 * @return the parse result
 * @throws SAXException if the parsing fails
 * @throws ParserConfigurationException if a DocumentBuilder cannot be created
 */
public static Document buildDocument(final WebResponse webResponse)
    throws IOException, SAXException, ParserConfigurationException {

    final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    if (webResponse == null) {
        return factory.newDocumentBuilder().newDocument();
    }

    factory.setNamespaceAware(true);
    final InputStreamReader reader = new InputStreamReader(
            new BOMInputStream(webResponse.getContentAsStream()),
            webResponse.getContentCharset());

    // we have to do the blank input check and the parsing in one step
    final TrackBlankContentReader tracker = new TrackBlankContentReader(reader);

    final InputSource source = new InputSource(tracker);
    final DocumentBuilder builder = factory.newDocumentBuilder();
    builder.setErrorHandler(DISCARD_MESSAGES_HANDLER);
    builder.setEntityResolver(new EntityResolver() {
        @Override
        public InputSource resolveEntity(final String publicId, final String systemId)
            throws SAXException, IOException {
            return new InputSource(new StringReader(""));
        }
    });
    try {
        // this closes the input source/stream
        return builder.parse(source);
    }
    catch (final SAXException e) {
        if (tracker.wasBlank()) {
            return factory.newDocumentBuilder().newDocument();
        }
        throw e;
    }
}
 
Example 4
Source File: DebuggingWebConnection.java    From htmlunit with Apache License 2.0 5 votes vote down vote up
/**
 * Saves the response content in the temp dir and adds it to the summary page.
 * @param response the response to save
 * @param request the request used to get the response
 * @throws IOException if a problem occurs writing the file
 */
protected void saveResponse(final WebResponse response, final WebRequest request)
    throws IOException {
    counter_++;
    final String extension = chooseExtension(response.getContentType());
    final File file = createFile(request.getUrl(), extension);
    int length = 0;
    try (InputStream input = response.getContentAsStream()) {
        try (OutputStream fos = Files.newOutputStream(file.toPath())) {
            length = IOUtils.copy(input, fos);
        }
        catch (final EOFException e) {
            // ignore
        }
    }

    final URL url = response.getWebRequest().getUrl();
    if (LOG.isInfoEnabled()) {
        LOG.info("Created file " + file.getAbsolutePath() + " for response " + counter_ + ": " + url);
    }

    final StringBuilder bduiler = new StringBuilder();
    bduiler.append("tab[tab.length] = {code: " + response.getStatusCode() + ", ")
            .append("fileName: '" + file.getName() + "', ")
            .append("contentType: '" + response.getContentType() + "', ")
            .append("method: '" + request.getHttpMethod().name() + "', ");
    if (request.getHttpMethod() == HttpMethod.POST && request.getEncodingType() == FormEncodingType.URL_ENCODED) {
        bduiler.append("postParameters: " + nameValueListToJsMap(request.getRequestParameters()) + ", ");
    }
    bduiler.append("url: '" + escapeJSString(url.toString()) + "', ")
            .append("loadTime: " + response.getLoadTime() + ", ")
            .append("responseSize: " + length + ", ")
            .append("responseHeaders: " + nameValueListToJsMap(response.getResponseHeaders()))
            .append("};\n");
    appendToJSFile(bduiler.toString());
}
 
Example 5
Source File: AppletClassLoader.java    From htmlunit with Apache License 2.0 5 votes vote down vote up
/**
 * Adds the class defined by the WebResponse to the classpath for the applet.
 * @param className the name of the class to load
 * @param webResponse the web response
 * @throws IOException in case of problem working with the response content
 */
public void addClassToClassPath(final String className, final WebResponse webResponse) throws IOException {
    try (InputStream content = webResponse.getContentAsStream()) {
        final byte[] bytes = IOUtils.toByteArray(content);
        defineClass(className, bytes, 0, bytes.length);
    }
    info_.append("    Class: " + webResponse.getWebRequest().getUrl() + "\n");
}
 
Example 6
Source File: XmlUtil.java    From HtmlUnit-Android with Apache License 2.0 5 votes vote down vote up
/**
 * Builds a document from the content of the web response.
 * A warning is logged if an exception is thrown while parsing the XML content
 * (for instance when the content is not a valid XML and can't be parsed).
 *
 * @param webResponse the response from the server
 * @throws IOException if the page could not be created
 * @return the parse result
 * @throws SAXException if the parsing fails
 * @throws ParserConfigurationException if a DocumentBuilder cannot be created
 */
public static Document buildDocument(final WebResponse webResponse)
    throws IOException, SAXException, ParserConfigurationException {

    final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    if (webResponse == null) {
        return factory.newDocumentBuilder().newDocument();
    }

    factory.setNamespaceAware(true);
    final InputStreamReader reader = new InputStreamReader(
            new BOMInputStream(webResponse.getContentAsStream()),
            webResponse.getContentCharset());

    // we have to do the blank input check and the parsing in one step
    final TrackBlankContentReader tracker = new TrackBlankContentReader(reader);

    final InputSource source = new InputSource(tracker);
    final DocumentBuilder builder = factory.newDocumentBuilder();
    builder.setErrorHandler(DISCARD_MESSAGES_HANDLER);
    builder.setEntityResolver(new EntityResolver() {
        @Override
        public InputSource resolveEntity(final String publicId, final String systemId)
            throws SAXException, IOException {
            return new InputSource(new StringReader(""));
        }
    });
    try {
        // this closes the input source/stream
        return builder.parse(source);
    }
    catch (final SAXException e) {
        if (tracker.wasBlank()) {
            return factory.newDocumentBuilder().newDocument();
        }
        throw e;
    }
}
 
Example 7
Source File: HtmlEmbed.java    From HtmlUnit-Android with Apache License 2.0 5 votes vote down vote up
/**
 * Saves this content as the specified file.
 * @param file the file to save to
 * @throws IOException if an IO error occurs
 */
public void saveAs(final File file) throws IOException {
    final HtmlPage page = (HtmlPage) getPage();
    final WebClient webclient = page.getWebClient();

    final URL url = page.getFullyQualifiedUrl(getAttributeDirect("src"));
    final WebRequest request = new WebRequest(url);
    request.setAdditionalHeader(HttpHeader.REFERER, page.getUrl().toExternalForm());
    final WebResponse webResponse = webclient.loadWebResponse(request);

    try (FileOutputStream fos = new FileOutputStream(file);
            InputStream content =  webResponse.getContentAsStream()) {
        IOUtils.copy(content, fos);
    }
}
 
Example 8
Source File: AttachmentTest.java    From htmlunit with Apache License 2.0 4 votes vote down vote up
/**
 * Tests attachment callbacks and the contents of attachments.
 * @throws Exception if an error occurs
 */
@Test
public void basic() throws Exception {
    final String content1 = "<html><body>\n"
        + "<form method='POST' name='form' action='" + URL_SECOND + "'>\n"
        + "<input type='submit' value='ok'>\n"
        + "</form>\n"
        + "<a href='#' onclick='document.form.submit()'>click me</a>\n"
        + "</body></html>";
    final String content2 = "download file contents";

    final WebClient client = getWebClient();
    final List<Attachment> attachments = new ArrayList<>();
    client.setAttachmentHandler(new CollectingAttachmentHandler(attachments));

    final List<NameValuePair> headers = new ArrayList<>();
    headers.add(new NameValuePair("Content-Disposition", "attachment"));

    final MockWebConnection conn = new MockWebConnection();
    conn.setResponse(URL_FIRST, content1);
    conn.setResponse(URL_SECOND, content2, 200, "OK", MimeType.TEXT_HTML, headers);
    client.setWebConnection(conn);
    assertTrue(attachments.isEmpty());

    final HtmlPage result = client.getPage(URL_FIRST);
    final HtmlAnchor anchor = result.getAnchors().get(0);
    final Page clickResult = anchor.click();
    assertEquals(result, clickResult);
    assertEquals(1, attachments.size());
    assertTrue(HtmlPage.class.isInstance(attachments.get(0).getPage()));
    // the attachment is opened inside a new window
    assertEquals(2, client.getWebWindows().size());

    final Attachment attachment = attachments.get(0);
    final Page attachedPage = attachment.getPage();
    final WebResponse attachmentResponse = attachedPage.getWebResponse();
    final InputStream attachmentStream = attachmentResponse.getContentAsStream();
    HttpWebConnectionTest.assertEquals(new ByteArrayInputStream(content2.getBytes()), attachmentStream);
    assertEquals(MimeType.TEXT_HTML, attachmentResponse.getContentType());
    assertEquals(200, attachmentResponse.getStatusCode());
    assertEquals(URL_SECOND, attachmentResponse.getWebRequest().getUrl());
}
 
Example 9
Source File: AttachmentTest.java    From htmlunit with Apache License 2.0 4 votes vote down vote up
/**
 * Tests attachment callbacks and the contents of attachments.
 * @throws Exception if an error occurs
 */
@Test
public void handleResponseFromHanlder() throws Exception {
    final String content1 = "<html><body>\n"
        + "<form method='POST' name='form' action='" + URL_SECOND + "'>\n"
        + "<input type='submit' value='ok'>\n"
        + "</form>\n"
        + "<a href='#' onclick='document.form.submit()'>click me</a>\n"
        + "</body></html>";
    final String content2 = "download file contents";

    final WebClient client = getWebClient();
    final List<WebResponse> attachments = new ArrayList<>();

    client.setAttachmentHandler(new AttachmentHandler() {
        @Override
        public boolean handleAttachment(final WebResponse response) {
            attachments.add(response);
            return true;
        }

        @Override
        public void handleAttachment(final Page page) {
            throw new IllegalAccessError("handleAttachment(Page) called");
        }
    });

    final List<NameValuePair> headers = new ArrayList<>();
    headers.add(new NameValuePair("Content-Disposition", "attachment"));

    final MockWebConnection conn = new MockWebConnection();
    conn.setResponse(URL_FIRST, content1);
    conn.setResponse(URL_SECOND, content2, 200, "OK", MimeType.TEXT_HTML, headers);
    client.setWebConnection(conn);
    assertTrue(attachments.isEmpty());

    final HtmlPage result = client.getPage(URL_FIRST);
    final HtmlAnchor anchor = result.getAnchors().get(0);
    final Page clickResult = anchor.click();
    assertEquals(result, clickResult);
    assertEquals(1, attachments.size());
    assertEquals(1, client.getWebWindows().size());

    final WebResponse attachmentResponse = attachments.get(0);
    final InputStream attachmentStream = attachmentResponse.getContentAsStream();
    HttpWebConnectionTest.assertEquals(new ByteArrayInputStream(content2.getBytes()), attachmentStream);
    assertEquals(MimeType.TEXT_HTML, attachmentResponse.getContentType());
    assertEquals(200, attachmentResponse.getStatusCode());
    assertEquals(URL_SECOND, attachmentResponse.getWebRequest().getUrl());
}
 
Example 10
Source File: HtmlUnitFetcher.java    From sparkler with Apache License 2.0 4 votes vote down vote up
@Override
public FetchedData fetch(Resource resource) throws Exception {
    LOG.info("HtmlUnit FETCHER {}", resource.getUrl());
    FetchedData fetchedData;
    try {
        String userAgent = getUserAgent();
        if (StringUtils.isNotBlank(userAgent)) {
            driver.removeRequestHeader(USER_AGENT);
            driver.addRequestHeader(USER_AGENT, userAgent);
        }
        Page page = driver.getPage(resource.getUrl());

        WebResponse response = page.getWebResponse();
        boolean truncated = false;
        try (InputStream stream = response.getContentAsStream()) {
            try (BoundedInputStream boundedStream = new BoundedInputStream(stream, CONTENT_LIMIT)) {
                try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
                    IOUtils.copy(boundedStream, out);
                    fetchedData = new FetchedData(out.toByteArray(), response.getContentType(), response.getStatusCode());
                    long contentLength = page.getWebResponse().getContentLength();
                    if (contentLength > 0 && contentLength < Integer.MAX_VALUE) {
                        fetchedData.setContentLength((int) contentLength);
                        truncated = (contentLength > fetchedData.getContentLength());
                        if (truncated) {
                            LOG.info("Content Truncated: {}, TotalSize={}", resource.getUrl(), contentLength);
                        }
                    }
                }
            }
        }
        resource.setStatus(ResourceStatus.FETCHED.toString());

        List<NameValuePair> respHeaders = page.getWebResponse().getResponseHeaders();
        Map<String, List<String>> headers = new HashMap<>();
        fetchedData.setHeaders(headers);
        if (respHeaders != null && !respHeaders.isEmpty()){
            respHeaders.forEach(item -> {
                if (!headers.containsKey(item.getName())) {
                    headers.put(item.getName(), new ArrayList<>());
                }
                headers.get(item.getName()).add(item.getValue());
            });
        }
        if (truncated){ //add truncated header
            headers.put(TRUNCATED, Collections.singletonList(Boolean.TRUE.toString()));
        }
    } catch (Exception e){
        LOG.warn(e.getMessage(), e);
        fetchedData = new FetchedData(new byte[0], "unknown/unknown", 0); // fixme: use proper status code
        resource.setStatus(ResourceStatus.ERROR.toString());
    }
    fetchedData.setResource(resource);
    return fetchedData;
}